diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index d69168b01ee..1df8cea4ad5 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -3,6 +3,9 @@ name: BackportPR env: # Force the stdout and stderr streams to be unbuffered PYTHONUNBUFFERED: 1 + # Export system tables to ClickHouse Cloud + CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }} + CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }} on: # yamllint disable-line rule:truthy push: diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 0fbcb95fc12..284d94b5c5a 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -3,6 +3,9 @@ name: MasterCI env: # Force the stdout and stderr streams to be unbuffered PYTHONUNBUFFERED: 1 + # Export system tables to ClickHouse Cloud + CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }} + CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }} on: # yamllint disable-line rule:truthy push: @@ -892,6 +895,48 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinS390X: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_s390x + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -975,6 +1020,7 @@ jobs: - BuilderBinFreeBSD - BuilderBinPPC64 - BuilderBinRISCV64 + - BuilderBinS390X - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy @@ -2870,6 +2916,216 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan0: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan3: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan4: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan5: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=5 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -3433,7 +3689,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/unit_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Unit tests (release-clang) + CHECK_NAME=Unit tests (release) REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports @@ -3963,6 +4219,12 @@ jobs: - IntegrationTestsAsan3 - IntegrationTestsAsan4 - IntegrationTestsAsan5 + - IntegrationTestsAnalyzerAsan0 + - IntegrationTestsAnalyzerAsan1 + - IntegrationTestsAnalyzerAsan2 + - IntegrationTestsAnalyzerAsan3 + - IntegrationTestsAnalyzerAsan4 + - IntegrationTestsAnalyzerAsan5 - IntegrationTestsRelease0 - IntegrationTestsRelease1 - IntegrationTestsRelease2 diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index f898e764915..cf9f025585d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -3,6 +3,9 @@ name: PullRequestCI env: # Force the stdout and stderr streams to be unbuffered PYTHONUNBUFFERED: 1 + # Export system tables to ClickHouse Cloud + CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }} + CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }} on: # yamllint disable-line rule:truthy pull_request: @@ -952,6 +955,47 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinS390X: + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_s390x + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -1034,6 +1078,7 @@ jobs: - BuilderBinFreeBSD - BuilderBinPPC64 - BuilderBinRISCV64 + - BuilderBinS390X - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy @@ -3903,6 +3948,216 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan0: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan3: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan4: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan5: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=5 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -4331,7 +4586,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/unit_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Unit tests (release-clang) + CHECK_NAME=Unit tests (release) REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports @@ -4889,6 +5144,12 @@ jobs: - IntegrationTestsAsan3 - IntegrationTestsAsan4 - IntegrationTestsAsan5 + - IntegrationTestsAnalyzerAsan0 + - IntegrationTestsAnalyzerAsan1 + - IntegrationTestsAnalyzerAsan2 + - IntegrationTestsAnalyzerAsan3 + - IntegrationTestsAnalyzerAsan4 + - IntegrationTestsAnalyzerAsan5 - IntegrationTestsRelease0 - IntegrationTestsRelease1 - IntegrationTestsRelease2 @@ -4966,3 +5227,39 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" +############################################################################################## +##################################### SQL TEST ############################################### +############################################################################################## + SQLTest: + needs: [BuilderDebRelease] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/sqltest + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=SQLTest + REPO_COPY=${{runner.temp}}/sqltest/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: SQLTest + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 sqltest.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 21284815583..e5527177aa5 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -3,6 +3,9 @@ name: ReleaseBranchCI env: # Force the stdout and stderr streams to be unbuffered PYTHONUNBUFFERED: 1 + # Export system tables to ClickHouse Cloud + CLICKHOUSE_CI_LOGS_HOST: ${{ secrets.CLICKHOUSE_CI_LOGS_HOST }} + CLICKHOUSE_CI_LOGS_PASSWORD: ${{ secrets.CLICKHOUSE_CI_LOGS_PASSWORD }} on: # yamllint disable-line rule:truthy push: diff --git a/.gitignore b/.gitignore index a04c60d5ca3..5341f23a94f 100644 --- a/.gitignore +++ b/.gitignore @@ -69,6 +69,7 @@ cmake-build-* *.pyc __pycache__ *.pytest_cache +.mypy_cache test.cpp CPackConfig.cmake @@ -161,8 +162,10 @@ tests/queries/0_stateless/test_* tests/queries/0_stateless/*.binary tests/queries/0_stateless/*.generated-expect tests/queries/0_stateless/*.expect.history +tests/integration/**/_gen # rust /rust/**/target # It is autogenerated from *.in /rust/**/.cargo/config.toml +/rust/**/vendor diff --git a/.gitmodules b/.gitmodules index 151dc28c55b..86fd7832dd9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -258,9 +258,6 @@ [submodule "contrib/wyhash"] path = contrib/wyhash url = https://github.com/wangyi-fudan/wyhash -[submodule "contrib/hashidsxx"] - path = contrib/hashidsxx - url = https://github.com/schoentoon/hashidsxx [submodule "contrib/nats-io"] path = contrib/nats-io url = https://github.com/ClickHouse/nats.c @@ -334,6 +331,10 @@ [submodule "contrib/liburing"] path = contrib/liburing url = https://github.com/axboe/liburing +[submodule "contrib/libarchive"] + path = contrib/libarchive + url = https://github.com/libarchive/libarchive.git + ignore = dirty [submodule "contrib/libfiu"] path = contrib/libfiu url = https://github.com/ClickHouse/libfiu.git @@ -343,3 +344,6 @@ [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/c-ares/c-ares.git +[submodule "contrib/incbin"] + path = contrib/incbin + url = https://github.com/graphitemaster/incbin.git diff --git a/CHANGELOG.md b/CHANGELOG.md index bf6b309ef2c..ce57affb161 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v23.7, 2023-07-27](#237)**
**[ClickHouse release v23.6, 2023-06-30](#236)**
**[ClickHouse release v23.5, 2023-06-08](#235)**
**[ClickHouse release v23.4, 2023-04-26](#234)**
@@ -9,6 +10,181 @@ # 2023 Changelog +### ClickHouse release 23.7, 2023-07-27 + +#### Backward Incompatible Change +* Add `NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)). +* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after *downgrade* (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)). +* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). +* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). +* Kafka connector can fetch Avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)). +* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Add a column `is_obsolete` to `system.settings` and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)). +* Implement support of encrypted elements in configuration file. Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from `` section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)). +* Grace Hash Join algorithm is now applicable to FULL and RIGHT JOINs. [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)). +* Add `SYSTEM STOP LISTEN` query for more graceful termination. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add `input_format_csv_allow_variable_number_of_columns` options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Another boring feature: add function `substring_index`, as in Spark or MySQL. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)). +* A system table `jemalloc_bins` to show stats for jemalloc bins. Example `SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10`. Enjoy. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)). +* Add `RowBinaryWithDefaults` format with extra byte before each column as a flag for using the column's default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)). +* Added new `initcap` / `initcapUTF8` functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)). +* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)). +* Added Peak Memory Usage statistic to HTTP headers. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added new `hasSubsequence` (+`CaseInsensitive` and `UTF8` versions) functions to match subsequences in strings. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)). +* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)). +* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)). +* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Implement KQL-style formatting for the `Interval` data type. This is only needed for compatibility with the `Kusto` query language. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)). +* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)). +* Aliases `current_database` and a new function `current_schemas` for compatibility with PostgreSQL. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)). +* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). +* Support `async_deduplication_token` for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)). +* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)). +* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)). +* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)). +* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)). +* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)). +* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)). +* Move conditions with primary key columns to the end of PREWHERE chain. The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)). +* Speed up `COUNT(DISTINCT)` for String types by inlining SipHash. The performance experiments of *OnTime* on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of *11.6%* to the QPS of the query *Q8* while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1`. This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823). This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173). [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). +* Reduce the number of syscalls in `FileCache::loadMetadata` - this speeds up server startup if the filesystem cache is configured. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)). +* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)). +* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)). +* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)). +* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow filtering HTTP headers for the URL/S3 table functions with the new `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)). +* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix two issues in `geoHash` functions. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)). +* Log async insert flush queries into `system.query_log`. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)). +* Functions `date_diff` and `age` now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)). +* A third-party product depending on ClickHouse (Gluten: a Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)). +* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)). +* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)). +* Add column `modification_time` into `system.parts_columns`. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)). +* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)). +* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)). +* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added support for function `range` of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)). +* Improve MySQL compatibility of the statement `SHOW INDEX`. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)). +* Cache dictionary now requests only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed the case when settings were not applied for EXPLAIN query when FORMAT was provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)). +* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)). +* Var-Int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)). +* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)). +* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)). +* Log messages are written to the `system.text_log` from the server startup. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)). +* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception was thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Avro input format now supports Union even if it contains only a single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)). +* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)). +* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)). +* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)). +* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093). [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)). +* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)). +* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)). +* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). + +#### Experimental Feature +* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). This is controlled by the setting `output_format_parquet_use_custom_encoder` which is disabled by default, because the feature is non-ideal. +* Added support for [PRQL](https://prql-lang.org/) as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). This syntax can be changed in this release. +* (experimental MaterializedMySQL) Fixed crash when `mysqlxx::Pool::Entry` is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) `CREATE TABLE ... AS SELECT` .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Now unquoted UTF-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)). +* Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)). +* The `session_timezone` setting (new in version 23.6) is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). It is suspected that this feature is incomplete. + +#### Build/Testing/Packaging Improvement +* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add integration test check with the enabled Analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)). +* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)). +* Update Cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). +* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)). +* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix MaterializedPostgreSQL syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)). +* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)). +* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)). +* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)). +* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)). +* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)). +* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)). +* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)). +* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)). +* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)). +* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)). +* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)). +* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)). +* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)). +* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). + + ### ClickHouse release 23.6, 2023-06-29 #### Backward Incompatible Change diff --git a/CMakeLists.txt b/CMakeLists.txt index 45c3c422d7a..55bcf5fbf3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -165,8 +165,14 @@ elseif(GLIBC_COMPATIBILITY) message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration") endif () -# Make sure the final executable has symbols exported -set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") +if (OS_LINUX) + # We should not export dynamic symbols, because: + # - The main clickhouse binary does not use dlopen, + # and whatever is poisoning it by LD_PRELOAD should not link to our symbols. + # - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries + # should not expose their symbols to ODBC drivers and libraries. + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") +endif () if (OS_DARWIN) # The `-all_load` flag forces loading of all symbols from all libraries, @@ -202,9 +208,6 @@ option(OMIT_HEAVY_DEBUG_SYMBOLS "Do not generate debugger info for heavy modules (ClickHouse functions and dictionaries, some contrib)" ${OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT}) -if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") - set(USE_DEBUG_HELPERS ON) -endif() option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS}) option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF) diff --git a/README.md b/README.md index 1036e1a97e1..4ff9b9caaa1 100644 --- a/README.md +++ b/README.md @@ -23,24 +23,21 @@ curl https://clickhouse.com/ | sh ## Upcoming Events -* [**v23.6 Release Webinar**](https://clickhouse.com/company/events/v23-6-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-06) - Jun 29 - 23.6 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. -* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/293913596) - Jul 18 -* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/293913441) - Jul 19 -* [**ClickHouse Meetup in Toronto**](https://www.meetup.com/clickhouse-toronto-user-group/events/294183127) - Jul 20 -* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/294428050/) - Jul 27 +* [**v23.8 Community Call**](https://clickhouse.com/company/events/v23-8-community-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-08) - Aug 31 - 23.8 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. +* [**ClickHouse & AI - A Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/294472987) - Aug 8 * [**ClickHouse Meetup in Paris**](https://www.meetup.com/clickhouse-france-user-group/events/294283460) - Sep 12 Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com. ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" -* **Recording available**: [**v23.4 Release Webinar**](https://www.youtube.com/watch?v=4rrf6bk_mOg) Faster Parquet Reading, Asynchonous Connections to Reoplicas, Trailing Comma before FROM, extractKeyValuePairs, integrations updates, and so much more! Watch it now! +* **Recording available**: [**v23.6 Release Webinar**](https://www.youtube.com/watch?v=cuf_hYn7dqU) All the features of 23.6, one convenient video! Watch it now! * **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU) - ## Interested in joining ClickHouse and making it your full time job? + ## Interested in joining ClickHouse and making it your full-time job? -We are a globally diverse and distributed team, united behind a common goal of creating industry-leading, real-time analytics. Here, you will have an opportunity to solve some of the most cutting edge technical challenges and have direct ownership of your work and vision. If you are a contributor by nature, a thinker as well as a doer - we’ll definitely click! +We are a globally diverse and distributed team, united behind a common goal of creating industry-leading, real-time analytics. Here, you will have an opportunity to solve some of the most cutting-edge technical challenges and have direct ownership of your work and vision. If you are a contributor by nature, a thinker and a doer - we’ll definitely click! Check out our **current openings** here: https://clickhouse.com/company/careers diff --git a/SECURITY.md b/SECURITY.md index 4ba5f13d09c..d61533b44b9 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 23.7 | ✔️ | | 23.6 | ✔️ | | 23.5 | ✔️ | -| 23.4 | ✔️ | +| 23.4 | ❌ | | 23.3 | ✔️ | | 23.2 | ❌ | | 23.1 | ❌ | diff --git a/base/base/EnumReflection.h b/base/base/EnumReflection.h index 0d1f8ae0a40..e3208f16a75 100644 --- a/base/base/EnumReflection.h +++ b/base/base/EnumReflection.h @@ -3,6 +3,7 @@ #include #include + template concept is_enum = std::is_enum_v; namespace detail diff --git a/base/base/JSON.cpp b/base/base/JSON.cpp index 4c6d97b4444..0b43be38149 100644 --- a/base/base/JSON.cpp +++ b/base/base/JSON.cpp @@ -7,8 +7,6 @@ #include #include -#include - #define JSON_MAX_DEPTH 100 diff --git a/base/base/StringRef.h b/base/base/StringRef.h index f428b7c747f..448bc102b41 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -274,6 +275,8 @@ struct CRC32Hash if (size == 0) return 0; + chassert(pos); + if (size < 8) { return static_cast(hashLessThan8(x.data, x.size)); diff --git a/base/base/defines.h b/base/base/defines.h index 6abf8155b95..ee29ecf6118 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -115,8 +115,15 @@ /// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy) #if !defined(chassert) #if defined(ABORT_ON_LOGICAL_ERROR) + // clang-format off + #include + namespace DB + { + void abortOnFailedAssertion(const String & description); + } #define chassert(x) static_cast(x) ? void(0) : ::DB::abortOnFailedAssertion(#x) #define UNREACHABLE() abort() + // clang-format off #else /// Here sizeof() trick is used to suppress unused warning for result, /// since simple "(void)x" will evaluate the expression, while diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h index 83232669c04..fda94edaa88 100644 --- a/base/base/find_symbols.h +++ b/base/base/find_symbols.h @@ -448,7 +448,7 @@ inline char * find_last_not_symbols_or_null(char * begin, char * end) /// See https://github.com/boostorg/algorithm/issues/63 /// And https://bugs.llvm.org/show_bug.cgi?id=41141 template -inline void splitInto(To & to, const std::string & what, bool token_compress = false) +inline To & splitInto(To & to, std::string_view what, bool token_compress = false) { const char * pos = what.data(); const char * end = pos + what.size(); @@ -464,4 +464,6 @@ inline void splitInto(To & to, const std::string & what, bool token_compress = f else pos = delimiter_or_end; } + + return to; } diff --git a/base/base/move_extend.h b/base/base/move_extend.h new file mode 100644 index 00000000000..6e5b16e037c --- /dev/null +++ b/base/base/move_extend.h @@ -0,0 +1,9 @@ +#pragma once + +/// Extend @p to by moving elements from @p from to @p to end +/// @return @p to iterator to first of moved elements. +template +typename To::iterator moveExtend(To & to, From && from) +{ + return to.insert(to.end(), std::make_move_iterator(from.begin()), std::make_move_iterator(from.end())); +} diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index 411841e6d9f..fc4e9e551ca 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -12,7 +12,6 @@ #include #include -#include #include // NOLINTBEGIN(*) @@ -22,6 +21,7 @@ #define CONSTEXPR_FROM_DOUBLE constexpr using FromDoubleIntermediateType = long double; #else +#include /// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended` #define CONSTEXPR_FROM_DOUBLE using FromDoubleIntermediateType = boost::multiprecision::cpp_bin_float_double_extended; diff --git a/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp b/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp index 4caf097c28a..1c5555f8cf3 100644 --- a/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp +++ b/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp @@ -19,7 +19,6 @@ #include "Poco/UTF16Encoding.h" #include "Poco/Buffer.h" #include "Poco/Exception.h" -#include using Poco::Buffer; diff --git a/base/poco/Data/include/Poco/Data/TypeHandler.h b/base/poco/Data/include/Poco/Data/TypeHandler.h index 34f88e986f7..e7633de7018 100644 --- a/base/poco/Data/include/Poco/Data/TypeHandler.h +++ b/base/poco/Data/include/Poco/Data/TypeHandler.h @@ -97,7 +97,7 @@ namespace Data /// /// static void extract(std::size_t pos, Person& obj, const Person& defVal, AbstractExtractor::Ptr pExt) /// { - /// // defVal is the default person we should use if we encunter NULL entries, so we take the individual fields + /// // defVal is the default person we should use if we encounter NULL entries, so we take the individual fields /// // as defaults. You can do more complex checking, ie return defVal if only one single entry of the fields is null etc... /// poco_assert_dbg (!pExt.isNull()); /// std::string lastName; diff --git a/base/poco/Foundation/include/Poco/Message.h b/base/poco/Foundation/include/Poco/Message.h index e8f04888ab4..282c7fb5fd1 100644 --- a/base/poco/Foundation/include/Poco/Message.h +++ b/base/poco/Foundation/include/Poco/Message.h @@ -67,6 +67,8 @@ public: Message( const std::string & source, const std::string & text, Priority prio, const char * file, int line, std::string_view fmt_str = {}); + Message( + std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str); /// Creates a Message with the given source, text, priority, /// source file path and line. /// diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h index 1880af4ccd2..30654504e0b 100644 --- a/base/poco/Foundation/include/Poco/URI.h +++ b/base/poco/Foundation/include/Poco/URI.h @@ -57,7 +57,7 @@ public: URI(); /// Creates an empty URI. - explicit URI(const std::string & uri); + explicit URI(const std::string & uri, bool enable_url_encoding = true); /// Parses an URI from the given string. Throws a /// SyntaxException if the uri is not valid. @@ -350,6 +350,10 @@ protected: static const std::string ILLEGAL; private: + void encodePath(std::string & encodedStr) const; + void decodePath(const std::string & encodedStr); + + std::string _scheme; std::string _userInfo; std::string _host; @@ -357,6 +361,8 @@ private: std::string _path; std::string _query; std::string _fragment; + + bool _enable_url_encoding = true; }; diff --git a/base/poco/Foundation/src/Message.cpp b/base/poco/Foundation/src/Message.cpp index 663c96e47a2..54118cc0fc5 100644 --- a/base/poco/Foundation/src/Message.cpp +++ b/base/poco/Foundation/src/Message.cpp @@ -60,6 +60,19 @@ Message::Message(const std::string& source, const std::string& text, Priority pr } +Message::Message(std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str): + _source(std::move(source)), + _text(std::move(text)), + _prio(prio), + _tid(0), + _file(file), + _line(line), + _pMap(0), + _fmt_str(fmt_str) +{ + init(); +} + Message::Message(const Message& msg): _source(msg._source), _text(msg._text), diff --git a/base/poco/Foundation/src/Task.cpp b/base/poco/Foundation/src/Task.cpp index a850ae37eff..4303d50d6e8 100644 --- a/base/poco/Foundation/src/Task.cpp +++ b/base/poco/Foundation/src/Task.cpp @@ -16,7 +16,6 @@ #include "Poco/TaskManager.h" #include "Poco/Exception.h" -#include #include diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp index 5543e02b279..41e331bb080 100644 --- a/base/poco/Foundation/src/URI.cpp +++ b/base/poco/Foundation/src/URI.cpp @@ -36,8 +36,8 @@ URI::URI(): } -URI::URI(const std::string& uri): - _port(0) +URI::URI(const std::string& uri, bool enable_url_encoding): + _port(0), _enable_url_encoding(enable_url_encoding) { parse(uri); } @@ -107,7 +107,8 @@ URI::URI(const URI& uri): _port(uri._port), _path(uri._path), _query(uri._query), - _fragment(uri._fragment) + _fragment(uri._fragment), + _enable_url_encoding(uri._enable_url_encoding) { } @@ -119,7 +120,8 @@ URI::URI(const URI& baseURI, const std::string& relativeURI): _port(baseURI._port), _path(baseURI._path), _query(baseURI._query), - _fragment(baseURI._fragment) + _fragment(baseURI._fragment), + _enable_url_encoding(baseURI._enable_url_encoding) { resolve(relativeURI); } @@ -151,6 +153,7 @@ URI& URI::operator = (const URI& uri) _path = uri._path; _query = uri._query; _fragment = uri._fragment; + _enable_url_encoding = uri._enable_url_encoding; } return *this; } @@ -181,6 +184,7 @@ void URI::swap(URI& uri) std::swap(_path, uri._path); std::swap(_query, uri._query); std::swap(_fragment, uri._fragment); + std::swap(_enable_url_encoding, uri._enable_url_encoding); } @@ -201,7 +205,7 @@ std::string URI::toString() const std::string uri; if (isRelative()) { - encode(_path, RESERVED_PATH, uri); + encodePath(uri); } else { @@ -217,7 +221,7 @@ std::string URI::toString() const { if (!auth.empty() && _path[0] != '/') uri += '/'; - encode(_path, RESERVED_PATH, uri); + encodePath(uri); } else if (!_query.empty() || !_fragment.empty()) { @@ -313,7 +317,7 @@ void URI::setAuthority(const std::string& authority) void URI::setPath(const std::string& path) { _path.clear(); - decode(path, _path); + decodePath(path); } @@ -418,7 +422,7 @@ void URI::setPathEtc(const std::string& pathEtc) std::string URI::getPathEtc() const { std::string pathEtc; - encode(_path, RESERVED_PATH, pathEtc); + encodePath(pathEtc); if (!_query.empty()) { pathEtc += '?'; @@ -436,7 +440,7 @@ std::string URI::getPathEtc() const std::string URI::getPathAndQuery() const { std::string pathAndQuery; - encode(_path, RESERVED_PATH, pathAndQuery); + encodePath(pathAndQuery); if (!_query.empty()) { pathAndQuery += '?'; @@ -681,6 +685,21 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa } } +void URI::encodePath(std::string & encodedStr) const +{ + if (_enable_url_encoding) + encode(_path, RESERVED_PATH, encodedStr); + else + encodedStr = _path; +} + +void URI::decodePath(const std::string & encodedStr) +{ + if (_enable_url_encoding) + decode(encodedStr, _path); + else + _path = encodedStr; +} bool URI::isWellKnownPort() const { @@ -820,7 +839,7 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it { std::string path; while (it != end && *it != '?' && *it != '#') path += *it++; - decode(path, _path); + decodePath(path); } diff --git a/base/poco/JSON/src/Object.cpp b/base/poco/JSON/src/Object.cpp index 7fca65c5b01..b041f570934 100644 --- a/base/poco/JSON/src/Object.cpp +++ b/base/poco/JSON/src/Object.cpp @@ -14,7 +14,6 @@ #include "Poco/JSON/Object.h" #include -#include using Poco::Dynamic::Var; diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h index d495d662f75..167a06eb7ff 100644 --- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h @@ -306,7 +306,7 @@ namespace Net DEFAULT_KEEP_ALIVE_TIMEOUT = 8 }; - void reconnect(); + virtual void reconnect(); /// Connects the underlying socket to the HTTP server. int write(const char * buffer, std::streamsize length); diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index c5697b556d1..2712c0c452e 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -26,7 +26,6 @@ #include "Poco/CountingStream.h" #include "Poco/RegularExpression.h" #include -#include using Poco::NumberFormatter; diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h index 65917ac9dd4..c19eecf5c73 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h @@ -146,7 +146,7 @@ namespace Net std::string cipherList; /// Specifies the supported ciphers in OpenSSL notation. - /// Defaults to "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH". + /// Defaults to "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH". std::string dhParamsFile; /// Specifies a file containing Diffie-Hellman parameters. @@ -172,7 +172,7 @@ namespace Net VerificationMode verificationMode = VERIFY_RELAXED, int verificationDepth = 9, bool loadDefaultCAs = false, - const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH"); + const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH"); /// Creates a Context. /// /// * usage specifies whether the context is used by a client or server. @@ -200,7 +200,7 @@ namespace Net VerificationMode verificationMode = VERIFY_RELAXED, int verificationDepth = 9, bool loadDefaultCAs = false, - const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH"); + const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH"); /// Creates a Context. /// /// * usage specifies whether the context is used by a client or server. diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h index 21a1ed685e5..e4037c87927 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h @@ -76,7 +76,7 @@ namespace Net /// none|relaxed|strict|once /// 1..9 /// true|false - /// ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH + /// ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH /// true|false /// /// KeyFileHandler diff --git a/base/poco/NetSSL_OpenSSL/src/Context.cpp b/base/poco/NetSSL_OpenSSL/src/Context.cpp index ca220c40a33..d0bab902b89 100644 --- a/base/poco/NetSSL_OpenSSL/src/Context.cpp +++ b/base/poco/NetSSL_OpenSSL/src/Context.cpp @@ -41,7 +41,7 @@ Context::Params::Params(): verificationMode(VERIFY_RELAXED), verificationDepth(9), loadDefaultCAs(false), - cipherList("ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH") + cipherList("ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH") { } diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 821b7b46855..9919d018046 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54476) +SET(VERSION_REVISION 54477) SET(VERSION_MAJOR 23) -SET(VERSION_MINOR 7) +SET(VERSION_MINOR 8) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH d1c7e13d08868cb04d3562dcced704dd577cb1df) -SET(VERSION_DESCRIBE v23.7.1.1-testing) -SET(VERSION_STRING 23.7.1.1) +SET(VERSION_GITHASH a70127baecc451f1f7073bad7b6198f6703441d8) +SET(VERSION_DESCRIBE v23.8.1.1-testing) +SET(VERSION_STRING 23.8.1.1) # end of autochange diff --git a/cmake/embed_binary.cmake b/cmake/embed_binary.cmake deleted file mode 100644 index e5428c24939..00000000000 --- a/cmake/embed_binary.cmake +++ /dev/null @@ -1,58 +0,0 @@ -# Embed a set of resource files into a resulting object file. -# -# Signature: `clickhouse_embed_binaries(TARGET RESOURCE_DIR RESOURCES ...) -# -# This will generate a static library target named ``, which contains the contents of -# each `` file. The files should be located in ``. defaults to -# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty. -# -# Each resource will result in three symbols in the final archive, based on the name ``. -# These are: -# 1. `_binary__start`: Points to the start of the binary data from ``. -# 2. `_binary__end`: Points to the end of the binary data from ``. -# 2. `_binary__size`: Points to the size of the binary data from ``. -# -# `` is a normalized name derived from ``, by replacing the characters "./-" with -# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated -# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`. -macro(clickhouse_embed_binaries) - set(one_value_args TARGET RESOURCE_DIR) - set(resources RESOURCES) - cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN}) - - if (NOT DEFINED EMBED_TARGET) - message(FATAL_ERROR "A target name must be provided for embedding binary resources into") - endif() - - if (NOT DEFINED EMBED_RESOURCE_DIR) - set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") - endif() - - list(LENGTH EMBED_RESOURCES N_RESOURCES) - if (N_RESOURCES LESS 1) - message(FATAL_ERROR "The list of binary resources to embed may not be empty") - endif() - - add_library("${EMBED_TARGET}" STATIC) - set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C) - - set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in") - - foreach(RESOURCE_FILE ${EMBED_RESOURCES}) - set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S") - set(BINARY_FILE_NAME "${RESOURCE_FILE}") - - # Normalize the name of the resource. - string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex - string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}") - - # Generate the configured assembly file in the output directory. - configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY) - - # Set the include directory for relative paths specified for `.incbin` directive. - set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}") - - target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}") - set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}") - endforeach() -endmacro() diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 3a33b3b9989..acc38b6fa2a 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -1,43 +1,38 @@ -# Usage: -# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # In megabytes -# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") -# include (cmake/limit_jobs.cmake) +# Limit compiler/linker job concurrency to avoid OOMs on subtrees where compilation/linking is memory-intensive. +# +# Usage from CMake: +# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # megabyte +# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") # megabyte +# include (cmake/limit_jobs.cmake) +# +# (bigger values mean fewer jobs) -cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY) # Not available under freebsd +cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY) cmake_host_system_information(RESULT NUMBER_OF_LOGICAL_CORES QUERY NUMBER_OF_LOGICAL_CORES) -# 1 if not set -option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" "") +# Set to disable the automatic job-limiting +option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" OFF) +option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" OFF) -# 1 if not set -option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" "") - -if (NOT PARALLEL_COMPILE_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY) +if (NOT PARALLEL_COMPILE_JOBS AND MAX_COMPILER_MEMORY) math(EXPR PARALLEL_COMPILE_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_COMPILER_MEMORY}) if (NOT PARALLEL_COMPILE_JOBS) set (PARALLEL_COMPILE_JOBS 1) endif () - if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES) - set (PARALLEL_COMPILE_JOBS_LESS TRUE) + if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES) + message(WARNING "The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") endif() endif () -if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)) - set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR}) - string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE}) - set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS}) -endif () - - -if (NOT PARALLEL_LINK_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY) +if (NOT PARALLEL_LINK_JOBS AND MAX_LINKER_MEMORY) math(EXPR PARALLEL_LINK_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_LINKER_MEMORY}) if (NOT PARALLEL_LINK_JOBS) set (PARALLEL_LINK_JOBS 1) endif () - if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES) - set (PARALLEL_LINK_JOBS_LESS TRUE) + if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES) + message(WARNING "The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") endif() endif () @@ -52,20 +47,16 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLE set (PARALLEL_LINK_JOBS 2) endif() -if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)) +message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB DRAM, 'OFF' means the native core count).") + +if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES) + set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR}) + string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE}) + set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS}) +endif () + +if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES) set(CMAKE_JOB_POOL_LINK link_job_pool${CMAKE_CURRENT_SOURCE_DIR}) string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_LINK ${CMAKE_JOB_POOL_LINK}) set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_LINK}=${PARALLEL_LINK_JOBS}) endif () - -if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) - message(STATUS - "${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory. - Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)") - if (PARALLEL_COMPILE_JOBS_LESS) - message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") - endif() - if (PARALLEL_LINK_JOBS_LESS) - message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") - endif() -endif () diff --git a/cmake/linux/toolchain-s390x.cmake b/cmake/linux/toolchain-s390x.cmake index b85d4253b89..945eb9affa4 100644 --- a/cmake/linux/toolchain-s390x.cmake +++ b/cmake/linux/toolchain-s390x.cmake @@ -20,6 +20,9 @@ set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/s390x-linux-gnu/libc") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64") +set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64") +set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64") set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE) set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE) diff --git a/cmake/split_debug_symbols.cmake b/cmake/split_debug_symbols.cmake index a9c2158359a..d6821eb6c48 100644 --- a/cmake/split_debug_symbols.cmake +++ b/cmake/split_debug_symbols.cmake @@ -22,8 +22,9 @@ macro(clickhouse_split_debug_symbols) # Splits debug symbols into separate file, leaves the binary untouched: COMMAND "${OBJCOPY_PATH}" --only-keep-debug "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" - # Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check: - COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + # Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check. + # Also, after we disabled the export of symbols for dynamic linking, we still to keep a static symbol table for good stack traces. + COMMAND "${STRIP_PATH}" --strip-debug --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" # Associate stripped binary with debug symbols: COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" COMMENT "Stripping clickhouse binary" VERBATIM diff --git a/cmake/target.cmake b/cmake/target.cmake index ffab08f1103..204a67d4357 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -47,7 +47,7 @@ if (CMAKE_CROSSCOMPILING) set (ENABLE_RUST OFF CACHE INTERNAL "") elseif (ARCH_S390X) set (ENABLE_GRPC OFF CACHE INTERNAL "") - set (ENABLE_SENTRY OFF CACHE INTERNAL "") + set (ENABLE_RUST OFF CACHE INTERNAL "") endif () elseif (OS_FREEBSD) # FIXME: broken dependencies diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2af468970f1..16135351cce 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -92,6 +92,7 @@ add_contrib (google-protobuf-cmake google-protobuf) add_contrib (openldap-cmake openldap) add_contrib (grpc-cmake grpc) add_contrib (msgpack-c-cmake msgpack-c) +add_contrib (libarchive-cmake libarchive) add_contrib (corrosion-cmake corrosion) @@ -164,14 +165,13 @@ add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (datasketches-cpp-cmake datasketches-cpp) -add_contrib (hashidsxx-cmake hashidsxx) +add_contrib (incbin-cmake incbin) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) if (ENABLE_NLP) add_contrib (libstemmer-c-cmake libstemmer_c) add_contrib (wordnet-blast-cmake wordnet-blast) add_contrib (lemmagen-c-cmake lemmagen-c) - add_contrib (nlp-data-cmake nlp-data) add_contrib (cld2-cmake cld2) endif() diff --git a/contrib/NuRaft b/contrib/NuRaft index 491eaf592d9..eb1572129c7 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 491eaf592d950e0e37accbe8b3f217e068c9fecf +Subproject commit eb1572129c71beb2156dcdaadc3fb136954aed96 diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 46b86cb4ddb..e3ea0381595 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -502,9 +502,10 @@ target_include_directories(_parquet SYSTEM BEFORE "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src") target_link_libraries(_parquet - PUBLIC _arrow - PRIVATE + PUBLIC + _arrow ch_contrib::thrift + PRIVATE boost::headers_only boost::regex OpenSSL::Crypto OpenSSL::SSL) diff --git a/contrib/cctz b/contrib/cctz index 5e05432420f..8529bcef5cd 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 5e05432420f9692418e2e12aff09859e420b14a2 +Subproject commit 8529bcef5cd996b7c0f4d7475286b76b5d126c4c diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 10070fbd949..7161f743de1 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -1,4 +1,3 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz") set (SRCS @@ -23,12 +22,10 @@ if (OS_FREEBSD) endif () # Related to time_zones table: -# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build -# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX -# as the library that's built using embedded tzdata is also specific to OS_LINUX -set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp") +# TimeZones.generated.cpp is autogenerated each time during a build +set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp") # remove existing copies so that its generated fresh on each build. -file(REMOVE ${SYSTEM_STORAGE_TZ_FILE}) +file(REMOVE ${TIMEZONES_FILE}) # get the list of timezones from tzdata shipped with cctz set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo") @@ -36,28 +33,44 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION) set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}") message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}") -set(TIMEZONE_RESOURCE_FILES) - # each file in that dir (except of tab and localtime) store the info about timezone execute_process(COMMAND bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -" OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE TIMEZONES) -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" ) +file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") +file(APPEND ${TIMEZONES_FILE} "#include \n") + +set (COUNTER 1) +foreach(TIMEZONE ${TIMEZONES}) + file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TZDIR}/${TIMEZONE}\");\n") + MATH(EXPR COUNTER "${COUNTER}+1") +endforeach(TIMEZONE) + +file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" ) foreach(TIMEZONE ${TIMEZONES}) - file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n") - list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}") + file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n") + MATH(EXPR COUNTER "${COUNTER}+1") endforeach(TIMEZONE) -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n") -clickhouse_embed_binaries( - TARGET tzdata - RESOURCE_DIR "${TZDIR}" - RESOURCES ${TIMEZONE_RESOURCE_FILES} -) -add_dependencies(_cctz tzdata) -target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") + +file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n") + +file(APPEND ${TIMEZONES_FILE} "#include \n\n") +file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" ) + +set (COUNTER 1) +foreach(TIMEZONE ${TIMEZONES}) + file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n") + MATH(EXPR COUNTER "${COUNTER}+1") +endforeach(TIMEZONE) + +file(APPEND ${TIMEZONES_FILE} " return {};\n") +file(APPEND ${TIMEZONES_FILE} "}\n") + +add_library (tzdata ${TIMEZONES_FILE}) +target_link_libraries(tzdata ch_contrib::incbin) +target_link_libraries(_cctz tzdata) add_library(ch_contrib::cctz ALIAS _cctz) diff --git a/contrib/cityhash102/include/city.h b/contrib/cityhash102/include/city.h index 87363d16444..c98eb7e3585 100644 --- a/contrib/cityhash102/include/city.h +++ b/contrib/cityhash102/include/city.h @@ -73,8 +73,8 @@ struct uint128 uint128() = default; uint128(uint64 low64_, uint64 high64_) : low64(low64_), high64(high64_) {} - friend bool operator ==(const uint128 & x, const uint128 & y) { return (x.low64 == y.low64) && (x.high64 == y.high64); } - friend bool operator !=(const uint128 & x, const uint128 & y) { return !(x == y); } + + friend auto operator<=>(const uint128 &, const uint128 &) = default; }; inline uint64 Uint128Low64(const uint128 & x) { return x.low64; } diff --git a/contrib/curl b/contrib/curl index b0edf0b7dae..eb3b049df52 160000 --- a/contrib/curl +++ b/contrib/curl @@ -1 +1 @@ -Subproject commit b0edf0b7dae44d9e66f270a257cf654b35d5263d +Subproject commit eb3b049df526bf125eda23218e680ce7fa9ec46c diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index 70d9c2816dc..733865d5101 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -8,125 +8,122 @@ endif() set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/curl") set (SRCS - "${LIBRARY_DIR}/lib/fopen.c" - "${LIBRARY_DIR}/lib/noproxy.c" - "${LIBRARY_DIR}/lib/idn.c" - "${LIBRARY_DIR}/lib/cfilters.c" - "${LIBRARY_DIR}/lib/cf-socket.c" + "${LIBRARY_DIR}/lib/altsvc.c" + "${LIBRARY_DIR}/lib/amigaos.c" + "${LIBRARY_DIR}/lib/asyn-thread.c" + "${LIBRARY_DIR}/lib/base64.c" + "${LIBRARY_DIR}/lib/bufq.c" + "${LIBRARY_DIR}/lib/bufref.c" + "${LIBRARY_DIR}/lib/cf-h1-proxy.c" "${LIBRARY_DIR}/lib/cf-haproxy.c" "${LIBRARY_DIR}/lib/cf-https-connect.c" - "${LIBRARY_DIR}/lib/file.c" - "${LIBRARY_DIR}/lib/timeval.c" - "${LIBRARY_DIR}/lib/base64.c" - "${LIBRARY_DIR}/lib/hostip.c" - "${LIBRARY_DIR}/lib/progress.c" - "${LIBRARY_DIR}/lib/formdata.c" - "${LIBRARY_DIR}/lib/cookie.c" - "${LIBRARY_DIR}/lib/http.c" - "${LIBRARY_DIR}/lib/sendf.c" - "${LIBRARY_DIR}/lib/url.c" - "${LIBRARY_DIR}/lib/dict.c" - "${LIBRARY_DIR}/lib/if2ip.c" - "${LIBRARY_DIR}/lib/speedcheck.c" - "${LIBRARY_DIR}/lib/ldap.c" - "${LIBRARY_DIR}/lib/version.c" - "${LIBRARY_DIR}/lib/getenv.c" - "${LIBRARY_DIR}/lib/escape.c" - "${LIBRARY_DIR}/lib/mprintf.c" - "${LIBRARY_DIR}/lib/telnet.c" - "${LIBRARY_DIR}/lib/netrc.c" - "${LIBRARY_DIR}/lib/getinfo.c" - "${LIBRARY_DIR}/lib/transfer.c" - "${LIBRARY_DIR}/lib/strcase.c" - "${LIBRARY_DIR}/lib/easy.c" - "${LIBRARY_DIR}/lib/curl_fnmatch.c" - "${LIBRARY_DIR}/lib/curl_log.c" - "${LIBRARY_DIR}/lib/fileinfo.c" - "${LIBRARY_DIR}/lib/krb5.c" - "${LIBRARY_DIR}/lib/memdebug.c" - "${LIBRARY_DIR}/lib/http_chunks.c" - "${LIBRARY_DIR}/lib/strtok.c" + "${LIBRARY_DIR}/lib/cf-socket.c" + "${LIBRARY_DIR}/lib/cfilters.c" + "${LIBRARY_DIR}/lib/conncache.c" "${LIBRARY_DIR}/lib/connect.c" - "${LIBRARY_DIR}/lib/llist.c" - "${LIBRARY_DIR}/lib/hash.c" - "${LIBRARY_DIR}/lib/multi.c" "${LIBRARY_DIR}/lib/content_encoding.c" - "${LIBRARY_DIR}/lib/share.c" - "${LIBRARY_DIR}/lib/http_digest.c" - "${LIBRARY_DIR}/lib/md4.c" - "${LIBRARY_DIR}/lib/md5.c" - "${LIBRARY_DIR}/lib/http_negotiate.c" - "${LIBRARY_DIR}/lib/inet_pton.c" - "${LIBRARY_DIR}/lib/strtoofft.c" - "${LIBRARY_DIR}/lib/strerror.c" - "${LIBRARY_DIR}/lib/amigaos.c" + "${LIBRARY_DIR}/lib/cookie.c" + "${LIBRARY_DIR}/lib/curl_addrinfo.c" + "${LIBRARY_DIR}/lib/curl_des.c" + "${LIBRARY_DIR}/lib/curl_endian.c" + "${LIBRARY_DIR}/lib/curl_fnmatch.c" + "${LIBRARY_DIR}/lib/curl_get_line.c" + "${LIBRARY_DIR}/lib/curl_gethostname.c" + "${LIBRARY_DIR}/lib/curl_gssapi.c" + "${LIBRARY_DIR}/lib/curl_memrchr.c" + "${LIBRARY_DIR}/lib/curl_multibyte.c" + "${LIBRARY_DIR}/lib/curl_ntlm_core.c" + "${LIBRARY_DIR}/lib/curl_ntlm_wb.c" + "${LIBRARY_DIR}/lib/curl_path.c" + "${LIBRARY_DIR}/lib/curl_range.c" + "${LIBRARY_DIR}/lib/curl_rtmp.c" + "${LIBRARY_DIR}/lib/curl_sasl.c" + "${LIBRARY_DIR}/lib/curl_sspi.c" + "${LIBRARY_DIR}/lib/curl_threads.c" + "${LIBRARY_DIR}/lib/curl_trc.c" + "${LIBRARY_DIR}/lib/dict.c" + "${LIBRARY_DIR}/lib/doh.c" + "${LIBRARY_DIR}/lib/dynbuf.c" + "${LIBRARY_DIR}/lib/dynhds.c" + "${LIBRARY_DIR}/lib/easy.c" + "${LIBRARY_DIR}/lib/escape.c" + "${LIBRARY_DIR}/lib/file.c" + "${LIBRARY_DIR}/lib/fileinfo.c" + "${LIBRARY_DIR}/lib/fopen.c" + "${LIBRARY_DIR}/lib/formdata.c" + "${LIBRARY_DIR}/lib/getenv.c" + "${LIBRARY_DIR}/lib/getinfo.c" + "${LIBRARY_DIR}/lib/gopher.c" + "${LIBRARY_DIR}/lib/hash.c" + "${LIBRARY_DIR}/lib/headers.c" + "${LIBRARY_DIR}/lib/hmac.c" "${LIBRARY_DIR}/lib/hostasyn.c" + "${LIBRARY_DIR}/lib/hostip.c" "${LIBRARY_DIR}/lib/hostip4.c" "${LIBRARY_DIR}/lib/hostip6.c" "${LIBRARY_DIR}/lib/hostsyn.c" + "${LIBRARY_DIR}/lib/hsts.c" + "${LIBRARY_DIR}/lib/http.c" + "${LIBRARY_DIR}/lib/http2.c" + "${LIBRARY_DIR}/lib/http_aws_sigv4.c" + "${LIBRARY_DIR}/lib/http_chunks.c" + "${LIBRARY_DIR}/lib/http_digest.c" + "${LIBRARY_DIR}/lib/http_negotiate.c" + "${LIBRARY_DIR}/lib/http_ntlm.c" + "${LIBRARY_DIR}/lib/http_proxy.c" + "${LIBRARY_DIR}/lib/idn.c" + "${LIBRARY_DIR}/lib/if2ip.c" + "${LIBRARY_DIR}/lib/imap.c" "${LIBRARY_DIR}/lib/inet_ntop.c" + "${LIBRARY_DIR}/lib/inet_pton.c" + "${LIBRARY_DIR}/lib/krb5.c" + "${LIBRARY_DIR}/lib/ldap.c" + "${LIBRARY_DIR}/lib/llist.c" + "${LIBRARY_DIR}/lib/md4.c" + "${LIBRARY_DIR}/lib/md5.c" + "${LIBRARY_DIR}/lib/memdebug.c" + "${LIBRARY_DIR}/lib/mime.c" + "${LIBRARY_DIR}/lib/mprintf.c" + "${LIBRARY_DIR}/lib/mqtt.c" + "${LIBRARY_DIR}/lib/multi.c" + "${LIBRARY_DIR}/lib/netrc.c" + "${LIBRARY_DIR}/lib/nonblock.c" + "${LIBRARY_DIR}/lib/noproxy.c" + "${LIBRARY_DIR}/lib/openldap.c" "${LIBRARY_DIR}/lib/parsedate.c" + "${LIBRARY_DIR}/lib/pingpong.c" + "${LIBRARY_DIR}/lib/pop3.c" + "${LIBRARY_DIR}/lib/progress.c" + "${LIBRARY_DIR}/lib/psl.c" + "${LIBRARY_DIR}/lib/rand.c" + "${LIBRARY_DIR}/lib/rename.c" + "${LIBRARY_DIR}/lib/rtsp.c" "${LIBRARY_DIR}/lib/select.c" - "${LIBRARY_DIR}/lib/splay.c" - "${LIBRARY_DIR}/lib/strdup.c" + "${LIBRARY_DIR}/lib/sendf.c" + "${LIBRARY_DIR}/lib/setopt.c" + "${LIBRARY_DIR}/lib/sha256.c" + "${LIBRARY_DIR}/lib/share.c" + "${LIBRARY_DIR}/lib/slist.c" + "${LIBRARY_DIR}/lib/smb.c" + "${LIBRARY_DIR}/lib/smtp.c" + "${LIBRARY_DIR}/lib/socketpair.c" "${LIBRARY_DIR}/lib/socks.c" - "${LIBRARY_DIR}/lib/curl_addrinfo.c" "${LIBRARY_DIR}/lib/socks_gssapi.c" "${LIBRARY_DIR}/lib/socks_sspi.c" - "${LIBRARY_DIR}/lib/curl_sspi.c" - "${LIBRARY_DIR}/lib/slist.c" - "${LIBRARY_DIR}/lib/nonblock.c" - "${LIBRARY_DIR}/lib/curl_memrchr.c" - "${LIBRARY_DIR}/lib/imap.c" - "${LIBRARY_DIR}/lib/pop3.c" - "${LIBRARY_DIR}/lib/smtp.c" - "${LIBRARY_DIR}/lib/pingpong.c" - "${LIBRARY_DIR}/lib/rtsp.c" - "${LIBRARY_DIR}/lib/curl_threads.c" - "${LIBRARY_DIR}/lib/warnless.c" - "${LIBRARY_DIR}/lib/hmac.c" - "${LIBRARY_DIR}/lib/curl_rtmp.c" - "${LIBRARY_DIR}/lib/openldap.c" - "${LIBRARY_DIR}/lib/curl_gethostname.c" - "${LIBRARY_DIR}/lib/gopher.c" - "${LIBRARY_DIR}/lib/http_proxy.c" - "${LIBRARY_DIR}/lib/asyn-thread.c" - "${LIBRARY_DIR}/lib/curl_gssapi.c" - "${LIBRARY_DIR}/lib/http_ntlm.c" - "${LIBRARY_DIR}/lib/curl_ntlm_wb.c" - "${LIBRARY_DIR}/lib/curl_ntlm_core.c" - "${LIBRARY_DIR}/lib/curl_sasl.c" - "${LIBRARY_DIR}/lib/rand.c" - "${LIBRARY_DIR}/lib/curl_multibyte.c" - "${LIBRARY_DIR}/lib/conncache.c" - "${LIBRARY_DIR}/lib/cf-h1-proxy.c" - "${LIBRARY_DIR}/lib/http2.c" - "${LIBRARY_DIR}/lib/smb.c" - "${LIBRARY_DIR}/lib/curl_endian.c" - "${LIBRARY_DIR}/lib/curl_des.c" + "${LIBRARY_DIR}/lib/speedcheck.c" + "${LIBRARY_DIR}/lib/splay.c" + "${LIBRARY_DIR}/lib/strcase.c" + "${LIBRARY_DIR}/lib/strdup.c" + "${LIBRARY_DIR}/lib/strerror.c" + "${LIBRARY_DIR}/lib/strtok.c" + "${LIBRARY_DIR}/lib/strtoofft.c" "${LIBRARY_DIR}/lib/system_win32.c" - "${LIBRARY_DIR}/lib/mime.c" - "${LIBRARY_DIR}/lib/sha256.c" - "${LIBRARY_DIR}/lib/setopt.c" - "${LIBRARY_DIR}/lib/curl_path.c" - "${LIBRARY_DIR}/lib/curl_range.c" - "${LIBRARY_DIR}/lib/psl.c" - "${LIBRARY_DIR}/lib/doh.c" - "${LIBRARY_DIR}/lib/urlapi.c" - "${LIBRARY_DIR}/lib/curl_get_line.c" - "${LIBRARY_DIR}/lib/altsvc.c" - "${LIBRARY_DIR}/lib/socketpair.c" - "${LIBRARY_DIR}/lib/bufref.c" - "${LIBRARY_DIR}/lib/bufq.c" - "${LIBRARY_DIR}/lib/dynbuf.c" - "${LIBRARY_DIR}/lib/dynhds.c" - "${LIBRARY_DIR}/lib/hsts.c" - "${LIBRARY_DIR}/lib/http_aws_sigv4.c" - "${LIBRARY_DIR}/lib/mqtt.c" - "${LIBRARY_DIR}/lib/rename.c" - "${LIBRARY_DIR}/lib/headers.c" + "${LIBRARY_DIR}/lib/telnet.c" "${LIBRARY_DIR}/lib/timediff.c" - "${LIBRARY_DIR}/lib/vauth/vauth.c" + "${LIBRARY_DIR}/lib/timeval.c" + "${LIBRARY_DIR}/lib/transfer.c" + "${LIBRARY_DIR}/lib/url.c" + "${LIBRARY_DIR}/lib/urlapi.c" "${LIBRARY_DIR}/lib/vauth/cleartext.c" "${LIBRARY_DIR}/lib/vauth/cram.c" "${LIBRARY_DIR}/lib/vauth/digest.c" @@ -138,23 +135,24 @@ set (SRCS "${LIBRARY_DIR}/lib/vauth/oauth2.c" "${LIBRARY_DIR}/lib/vauth/spnego_gssapi.c" "${LIBRARY_DIR}/lib/vauth/spnego_sspi.c" + "${LIBRARY_DIR}/lib/vauth/vauth.c" + "${LIBRARY_DIR}/lib/version.c" "${LIBRARY_DIR}/lib/vquic/vquic.c" - "${LIBRARY_DIR}/lib/vtls/openssl.c" + "${LIBRARY_DIR}/lib/vssh/libssh.c" + "${LIBRARY_DIR}/lib/vssh/libssh2.c" + "${LIBRARY_DIR}/lib/vtls/bearssl.c" "${LIBRARY_DIR}/lib/vtls/gtls.c" - "${LIBRARY_DIR}/lib/vtls/vtls.c" - "${LIBRARY_DIR}/lib/vtls/nss.c" - "${LIBRARY_DIR}/lib/vtls/wolfssl.c" + "${LIBRARY_DIR}/lib/vtls/hostcheck.c" + "${LIBRARY_DIR}/lib/vtls/keylog.c" + "${LIBRARY_DIR}/lib/vtls/mbedtls.c" + "${LIBRARY_DIR}/lib/vtls/openssl.c" "${LIBRARY_DIR}/lib/vtls/schannel.c" "${LIBRARY_DIR}/lib/vtls/schannel_verify.c" "${LIBRARY_DIR}/lib/vtls/sectransp.c" - "${LIBRARY_DIR}/lib/vtls/gskit.c" - "${LIBRARY_DIR}/lib/vtls/mbedtls.c" - "${LIBRARY_DIR}/lib/vtls/bearssl.c" - "${LIBRARY_DIR}/lib/vtls/keylog.c" + "${LIBRARY_DIR}/lib/vtls/vtls.c" + "${LIBRARY_DIR}/lib/vtls/wolfssl.c" "${LIBRARY_DIR}/lib/vtls/x509asn1.c" - "${LIBRARY_DIR}/lib/vtls/hostcheck.c" - "${LIBRARY_DIR}/lib/vssh/libssh2.c" - "${LIBRARY_DIR}/lib/vssh/libssh.c" + "${LIBRARY_DIR}/lib/warnless.c" ) add_library (_curl ${SRCS}) diff --git a/contrib/hashidsxx b/contrib/hashidsxx deleted file mode 160000 index 783f6911ccf..00000000000 --- a/contrib/hashidsxx +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee diff --git a/contrib/hashidsxx-cmake/CMakeLists.txt b/contrib/hashidsxx-cmake/CMakeLists.txt deleted file mode 100644 index 17f3888bd94..00000000000 --- a/contrib/hashidsxx-cmake/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx") - -set (SRCS - "${LIBRARY_DIR}/hashids.cpp" -) - -set (HDRS - "${LIBRARY_DIR}/hashids.h" -) - -add_library(_hashidsxx ${SRCS} ${HDRS}) -target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}") - -add_library(ch_contrib::hashidsxx ALIAS _hashidsxx) diff --git a/contrib/idxd-config b/contrib/idxd-config index f6605c41a73..a836ce0e420 160000 --- a/contrib/idxd-config +++ b/contrib/idxd-config @@ -1 +1 @@ -Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99 +Subproject commit a836ce0e42052a69bffbbc14239ab4097f3b77f1 diff --git a/contrib/incbin b/contrib/incbin new file mode 160000 index 00000000000..6e576cae5ab --- /dev/null +++ b/contrib/incbin @@ -0,0 +1 @@ +Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt new file mode 100644 index 00000000000..5778cf83c22 --- /dev/null +++ b/contrib/incbin-cmake/CMakeLists.txt @@ -0,0 +1,8 @@ +set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin") +add_library(_incbin INTERFACE) +target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR}) +add_library(ch_contrib::incbin ALIAS _incbin) + +# Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. +# Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning." +target_compile_definitions(_incbin INTERFACE INCBIN_SILENCE_BITCODE_WARNING) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 20025dfc63e..15e965ed841 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -1,5 +1,5 @@ if (SANITIZE OR NOT ( - ((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE OR ARCH_RISCV64)) OR + ((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE OR ARCH_RISCV64 OR ARCH_S390X)) OR (OS_DARWIN AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")) )) if (ENABLE_JEMALLOC) @@ -17,17 +17,17 @@ if (NOT ENABLE_JEMALLOC) endif () if (NOT OS_LINUX) - message (WARNING "jemalloc support on non-linux is EXPERIMENTAL") + message (WARNING "jemalloc support on non-Linux is EXPERIMENTAL") endif() if (OS_LINUX) - # ThreadPool select job randomly, and there can be some threads that had been - # performed some memory heavy task before and will be inactive for some time, - # but until it will became active again, the memory will not be freed since by - # default each thread has it's own arena, but there should be not more then + # ThreadPool select job randomly, and there can be some threads that have been + # performed some memory-heavy tasks before and will be inactive for some time, + # but until it becomes active again, the memory will not be freed since, by + # default, each thread has its arena, but there should be no more than # 4*CPU arenas (see opt.nareans description). # - # By enabling percpu_arena number of arenas limited to number of CPUs and hence + # By enabling percpu_arena number of arenas is limited to the number of CPUs, and hence # this problem should go away. # # muzzy_decay_ms -- use MADV_FREE when available on newer Linuxes, to @@ -38,7 +38,7 @@ if (OS_LINUX) else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000") endif() -# CACHE variable is empty, to allow changing defaults without necessity +# CACHE variable is empty to allow changing defaults without the necessity # to purge cache set (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE "" CACHE STRING "Change default configuration string of JEMalloc" ) if (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE) @@ -148,6 +148,8 @@ elseif (ARCH_PPC64LE) set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_ppc64le") elseif (ARCH_RISCV64) set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_riscv64") +elseif (ARCH_S390X) + set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_s390x") else () message (FATAL_ERROR "internal jemalloc: This arch is not supported") endif () @@ -172,7 +174,7 @@ target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1) # jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. # The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. -# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing. +# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracking. # # ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) diff --git a/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 00000000000..531f2bca0c2 --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,435 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#define JEMALLOC_OVERRIDE___LIBC_CALLOC +#define JEMALLOC_OVERRIDE___LIBC_FREE +#define JEMALLOC_OVERRIDE___LIBC_MALLOC +#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN +#define JEMALLOC_OVERRIDE___LIBC_REALLOC +#define JEMALLOC_OVERRIDE___LIBC_VALLOC +#define JEMALLOC_OVERRIDE___LIBC_PVALLOC +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 0 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 64 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + +/* Defined if syscall(2) is usable. */ +#define JEMALLOC_USE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +#define JEMALLOC_HAVE_SECURE_GETENV + +/* + * Defined if issetugid(2) is available. + */ +/* #undef JEMALLOC_HAVE_ISSETUGID */ + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Defined if pthread_setname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_SETNAME_NP + +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#define JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* JEMALLOC_PAGEID enabled page id */ +/* #undef JEMALLOC_PAGEID */ + +/* JEMALLOC_HAVE_PRCTL checks prctl */ +#define JEMALLOC_HAVE_PRCTL + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 12 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 20 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +#define JEMALLOC_RETAIN + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */ +#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#define JEMALLOC_HAVE_MADVISE_HUGE + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_DONTDUMP + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + +/* Define if operating system has alloca.h header. */ +#define JEMALLOC_HAS_ALLOCA_H + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +#define JEMALLOC_BIG_ENDIAN + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */ + +/* glibc memalign hook. */ +/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */ + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* GNU specific sched_getcpu support */ +#define JEMALLOC_HAVE_SCHED_GETCPU + +/* GNU specific sched_setaffinity support */ +#define JEMALLOC_HAVE_SCHED_SETAFFINITY + +/* + * If defined, all the features necessary for background threads are present. + */ +#define JEMALLOC_BACKGROUND_THREAD + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#define JEMALLOC_IS_MALLOC + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +#define JEMALLOC_ENABLE_CXX + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ + +/* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */ +#define JEMALLOC_ZERO_REALLOC_DEFAULT_FREE + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/libarchive b/contrib/libarchive new file mode 160000 index 00000000000..ee457961713 --- /dev/null +++ b/contrib/libarchive @@ -0,0 +1 @@ +Subproject commit ee45796171324519f0c0bfd012018dd099296336 diff --git a/contrib/libarchive-cmake/CMakeLists.txt b/contrib/libarchive-cmake/CMakeLists.txt new file mode 100644 index 00000000000..fb64266185e --- /dev/null +++ b/contrib/libarchive-cmake/CMakeLists.txt @@ -0,0 +1,172 @@ +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libarchive") + +set(SRCS + "${LIBRARY_DIR}/libarchive/archive_acl.c" + "${LIBRARY_DIR}/libarchive/archive_blake2sp_ref.c" + "${LIBRARY_DIR}/libarchive/archive_blake2s_ref.c" + "${LIBRARY_DIR}/libarchive/archive_check_magic.c" + "${LIBRARY_DIR}/libarchive/archive_cmdline.c" + "${LIBRARY_DIR}/libarchive/archive_cryptor.c" + "${LIBRARY_DIR}/libarchive/archive_digest.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_darwin.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_freebsd.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_linux.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_sunos.c" + "${LIBRARY_DIR}/libarchive/archive_entry.c" + "${LIBRARY_DIR}/libarchive/archive_entry_copy_bhfi.c" + "${LIBRARY_DIR}/libarchive/archive_entry_copy_stat.c" + "${LIBRARY_DIR}/libarchive/archive_entry_link_resolver.c" + "${LIBRARY_DIR}/libarchive/archive_entry_sparse.c" + "${LIBRARY_DIR}/libarchive/archive_entry_stat.c" + "${LIBRARY_DIR}/libarchive/archive_entry_strmode.c" + "${LIBRARY_DIR}/libarchive/archive_entry_xattr.c" + "${LIBRARY_DIR}/libarchive/archive_getdate.c" + "${LIBRARY_DIR}/libarchive/archive_hmac.c" + "${LIBRARY_DIR}/libarchive/archive_match.c" + "${LIBRARY_DIR}/libarchive/archive_options.c" + "${LIBRARY_DIR}/libarchive/archive_pack_dev.c" + "${LIBRARY_DIR}/libarchive/archive_pathmatch.c" + "${LIBRARY_DIR}/libarchive/archive_ppmd7.c" + "${LIBRARY_DIR}/libarchive/archive_ppmd8.c" + "${LIBRARY_DIR}/libarchive/archive_random.c" + "${LIBRARY_DIR}/libarchive/archive_rb.c" + "${LIBRARY_DIR}/libarchive/archive_read_add_passphrase.c" + "${LIBRARY_DIR}/libarchive/archive_read_append_filter.c" + "${LIBRARY_DIR}/libarchive/archive_read.c" + "${LIBRARY_DIR}/libarchive/archive_read_data_into_fd.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_entry_from_file.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_posix.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_set_standard_lookup.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_windows.c" + "${LIBRARY_DIR}/libarchive/archive_read_extract2.c" + "${LIBRARY_DIR}/libarchive/archive_read_extract.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_fd.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_file.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_filename.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_memory.c" + "${LIBRARY_DIR}/libarchive/archive_read_set_format.c" + "${LIBRARY_DIR}/libarchive/archive_read_set_options.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_all.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_by_code.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_bzip2.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_compress.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_grzip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_gzip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lrzip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lz4.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lzop.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_none.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_program.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_rpm.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_uu.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_xz.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_zstd.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_7zip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_all.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_ar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_by_code.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_cab.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_cpio.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_empty.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_iso9660.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_lha.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_mtree.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_rar5.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_rar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_raw.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_tar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_warc.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_xar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_zip.c" + "${LIBRARY_DIR}/libarchive/archive_string.c" + "${LIBRARY_DIR}/libarchive/archive_string_sprintf.c" + "${LIBRARY_DIR}/libarchive/archive_util.c" + "${LIBRARY_DIR}/libarchive/archive_version_details.c" + "${LIBRARY_DIR}/libarchive/archive_virtual.c" + "${LIBRARY_DIR}/libarchive/archive_windows.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_b64encode.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_by_name.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_bzip2.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_compress.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_grzip.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_gzip.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lrzip.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lz4.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lzop.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_none.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_program.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_uuencode.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_xz.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_zstd.c" + "${LIBRARY_DIR}/libarchive/archive_write.c" + "${LIBRARY_DIR}/libarchive/archive_write_disk_posix.c" + "${LIBRARY_DIR}/libarchive/archive_write_disk_set_standard_lookup.c" + "${LIBRARY_DIR}/libarchive/archive_write_disk_windows.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_fd.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_file.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_filename.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_memory.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_7zip.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_ar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_by_name.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_binary.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_newc.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_odc.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_filter_by_ext.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_gnutar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_iso9660.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_mtree.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_pax.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_raw.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_shar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_ustar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_v7tar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_warc.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_xar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_zip.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_options.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_passphrase.c" + "${LIBRARY_DIR}/libarchive/filter_fork_posix.c" + "${LIBRARY_DIR}/libarchive/filter_fork_windows.c" + "${LIBRARY_DIR}/libarchive/xxhash.c" +) + +add_library(_libarchive ${SRCS}) +target_include_directories(_libarchive PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + "${LIBRARY_DIR}/libarchive" +) + +target_compile_definitions(_libarchive PUBLIC + HAVE_CONFIG_H +) + +target_compile_options(_libarchive PRIVATE "-Wno-reserved-macro-identifier") + +if (TARGET ch_contrib::xz) + target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::xz) +endif() + +if (TARGET ch_contrib::zlib) + target_compile_definitions(_libarchive PUBLIC HAVE_ZLIB_H=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::zlib) +endif() + +if (OS_LINUX) + target_compile_definitions( + _libarchive PUBLIC + MAJOR_IN_SYSMACROS=1 + HAVE_LINUX_FS_H=1 + HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=1 + HAVE_LINUX_TYPES_H=1 + HAVE_SYS_STATFS_H=1 + HAVE_FUTIMESAT=1 + HAVE_ICONV=1 + ) +endif() + +add_library(ch_contrib::libarchive ALIAS _libarchive) \ No newline at end of file diff --git a/contrib/libarchive-cmake/config.h b/contrib/libarchive-cmake/config.h new file mode 100644 index 00000000000..0b0cab47a52 --- /dev/null +++ b/contrib/libarchive-cmake/config.h @@ -0,0 +1,1391 @@ +/* config.h. Generated from build/cmake/config.h.in by cmake configure */ +#define __LIBARCHIVE_CONFIG_H_INCLUDED 1 + +/* + * Ensure we have C99-style int64_t, etc, all defined. + */ + +/* First, we need to know if the system has already defined them. */ +#define HAVE_INT16_T +#define HAVE_INT32_T +#define HAVE_INT64_T +#define HAVE_INTMAX_T + +#define HAVE_UINT8_T +#define HAVE_UINT16_T +#define HAVE_UINT32_T +#define HAVE_UINT64_T +#define HAVE_UINTMAX_T + +/* We might have the types we want under other spellings. */ +/* #undef HAVE___INT64 */ +/* #undef HAVE_U_INT64_T */ +/* #undef HAVE_UNSIGNED___INT64 */ + +/* The sizes of various standard integer types. */ +#define SIZEOF_SHORT 2 +#define SIZEOF_INT 4 +#define SIZEOF_LONG 8 +#define SIZEOF_LONG_LONG 8 +#define SIZEOF_UNSIGNED_SHORT 2 +#define SIZEOF_UNSIGNED 4 +#define SIZEOF_UNSIGNED_LONG 8 +#define SIZEOF_UNSIGNED_LONG_LONG 8 + +/* + * If we lack int64_t, define it to the first of __int64, int, long, and long long + * that exists and is the right size. + */ +#if !defined(HAVE_INT64_T) && defined(HAVE___INT64) +typedef __int64 int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) && SIZEOF_INT == 8 +typedef int int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) && SIZEOF_LONG == 8 +typedef long int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) && SIZEOF_LONG_LONG == 8 +typedef long long int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) +#error No 64-bit integer type was found. +#endif + +/* + * Similarly for int32_t + */ +#if !defined(HAVE_INT32_T) && SIZEOF_INT == 4 +typedef int int32_t; +#define HAVE_INT32_T +#endif + +#if !defined(HAVE_INT32_T) && SIZEOF_LONG == 4 +typedef long int32_t; +#define HAVE_INT32_T +#endif + +#if !defined(HAVE_INT32_T) +#error No 32-bit integer type was found. +#endif + +/* + * Similarly for int16_t + */ +#if !defined(HAVE_INT16_T) && SIZEOF_INT == 2 +typedef int int16_t; +#define HAVE_INT16_T +#endif + +#if !defined(HAVE_INT16_T) && SIZEOF_SHORT == 2 +typedef short int16_t; +#define HAVE_INT16_T +#endif + +#if !defined(HAVE_INT16_T) +#error No 16-bit integer type was found. +#endif + +/* + * Similarly for uint64_t + */ +#if !defined(HAVE_UINT64_T) && defined(HAVE_UNSIGNED___INT64) +typedef unsigned __int64 uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED == 8 +typedef unsigned uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED_LONG == 8 +typedef unsigned long uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED_LONG_LONG == 8 +typedef unsigned long long uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) +#error No 64-bit unsigned integer type was found. +#endif + + +/* + * Similarly for uint32_t + */ +#if !defined(HAVE_UINT32_T) && SIZEOF_UNSIGNED == 4 +typedef unsigned uint32_t; +#define HAVE_UINT32_T +#endif + +#if !defined(HAVE_UINT32_T) && SIZEOF_UNSIGNED_LONG == 4 +typedef unsigned long uint32_t; +#define HAVE_UINT32_T +#endif + +#if !defined(HAVE_UINT32_T) +#error No 32-bit unsigned integer type was found. +#endif + +/* + * Similarly for uint16_t + */ +#if !defined(HAVE_UINT16_T) && SIZEOF_UNSIGNED == 2 +typedef unsigned uint16_t; +#define HAVE_UINT16_T +#endif + +#if !defined(HAVE_UINT16_T) && SIZEOF_UNSIGNED_SHORT == 2 +typedef unsigned short uint16_t; +#define HAVE_UINT16_T +#endif + +#if !defined(HAVE_UINT16_T) +#error No 16-bit unsigned integer type was found. +#endif + +/* + * Similarly for uint8_t + */ +#if !defined(HAVE_UINT8_T) +typedef unsigned char uint8_t; +#define HAVE_UINT8_T +#endif + +#if !defined(HAVE_UINT8_T) +#error No 8-bit unsigned integer type was found. +#endif + +/* Define intmax_t and uintmax_t if they are not already defined. */ +#if !defined(HAVE_INTMAX_T) +typedef int64_t intmax_t; +#endif + +#if !defined(HAVE_UINTMAX_T) +typedef uint64_t uintmax_t; +#endif + +/* Define ZLIB_WINAPI if zlib was built on Visual Studio. */ +/* #undef ZLIB_WINAPI */ + +/* Darwin ACL support */ +/* #undef ARCHIVE_ACL_DARWIN */ + +/* FreeBSD ACL support */ +/* #undef ARCHIVE_ACL_FREEBSD */ + +/* FreeBSD NFSv4 ACL support */ +/* #undef ARCHIVE_ACL_FREEBSD_NFS4 */ + +/* Linux POSIX.1e ACL support via libacl */ +/* #undef ARCHIVE_ACL_LIBACL */ + +/* Linux NFSv4 ACL support via librichacl */ +/* #undef ARCHIVE_ACL_LIBRICHACL */ + +/* Solaris ACL support */ +/* #undef ARCHIVE_ACL_SUNOS */ + +/* Solaris NFSv4 ACL support */ +/* #undef ARCHIVE_ACL_SUNOS_NFS4 */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_LIBC */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_LIBSYSTEM */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_MBEDTLS */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_NETTLE */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_OPENSSL */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_WIN */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_LIBC */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_NETTLE */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_MBEDTLS */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_OPENSSL */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_LIBC */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_LIBSYSTEM */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_MBEDTLS */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_NETTLE */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_OPENSSL */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_WIN */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBC */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC2 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBC2 */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC3 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBC3 */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBSYSTEM */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_MBEDTLS */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_NETTLE */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_OPENSSL */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_WIN */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBC */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC2 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBC2 */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC3 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBC3 */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBSYSTEM */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_MBEDTLS */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_NETTLE */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_OPENSSL */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_WIN */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBC */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC2 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBC2 */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC3 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBC3 */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBSYSTEM */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_MBEDTLS */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_NETTLE */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_OPENSSL */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_WIN */ + +/* AIX xattr support */ +/* #undef ARCHIVE_XATTR_AIX */ + +/* Darwin xattr support */ +/* #undef ARCHIVE_XATTR_DARWIN */ + +/* FreeBSD xattr support */ +/* #undef ARCHIVE_XATTR_FREEBSD */ + +/* Linux xattr support */ +/* #undef ARCHIVE_XATTR_LINUX */ + +/* Version number of bsdcpio */ +#define BSDCPIO_VERSION_STRING "3.7.0" + +/* Version number of bsdtar */ +#define BSDTAR_VERSION_STRING "3.7.0" + +/* Version number of bsdcat */ +#define BSDCAT_VERSION_STRING "3.7.0" + +/* Define to 1 if you have the `acl_create_entry' function. */ +/* #undef HAVE_ACL_CREATE_ENTRY */ + +/* Define to 1 if you have the `acl_get_fd_np' function. */ +/* #undef HAVE_ACL_GET_FD_NP */ + +/* Define to 1 if you have the `acl_get_link' function. */ +/* #undef HAVE_ACL_GET_LINK */ + +/* Define to 1 if you have the `acl_get_link_np' function. */ +/* #undef HAVE_ACL_GET_LINK_NP */ + +/* Define to 1 if you have the `acl_get_perm' function. */ +/* #undef HAVE_ACL_GET_PERM */ + +/* Define to 1 if you have the `acl_get_perm_np' function. */ +/* #undef HAVE_ACL_GET_PERM_NP */ + +/* Define to 1 if you have the `acl_init' function. */ +/* #undef HAVE_ACL_INIT */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ACL_LIBACL_H */ + +/* Define to 1 if the system has the type `acl_permset_t'. */ +/* #undef HAVE_ACL_PERMSET_T */ + +/* Define to 1 if you have the `acl_set_fd' function. */ +/* #undef HAVE_ACL_SET_FD */ + +/* Define to 1 if you have the `acl_set_fd_np' function. */ +/* #undef HAVE_ACL_SET_FD_NP */ + +/* Define to 1 if you have the `acl_set_file' function. */ +/* #undef HAVE_ACL_SET_FILE */ + +/* Define to 1 if you have the `arc4random_buf' function. */ +/* #undef HAVE_ARC4RANDOM_BUF */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ATTR_XATTR_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BCRYPT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BSDXML_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BZLIB_H */ + +/* Define to 1 if you have the `chflags' function. */ +/* #undef HAVE_CHFLAGS */ + +/* Define to 1 if you have the `chown' function. */ +#define HAVE_CHOWN 1 + +/* Define to 1 if you have the `chroot' function. */ +#define HAVE_CHROOT 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_COPYFILE_H */ + +/* Define to 1 if you have the `ctime_r' function. */ +#define HAVE_CTIME_R 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_CTYPE_H 1 + +/* Define to 1 if you have the `cygwin_conv_path' function. */ +/* #undef HAVE_CYGWIN_CONV_PATH */ + +/* Define to 1 if you have the declaration of `ACE_GETACL', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACE_GETACL */ + +/* Define to 1 if you have the declaration of `ACE_GETACLCNT', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACE_GETACLCNT */ + +/* Define to 1 if you have the declaration of `ACE_SETACL', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACE_SETACL */ + +/* Define to 1 if you have the declaration of `ACL_SYNCHRONIZE', and to 0 if + you don't. */ +/* #undef HAVE_DECL_ACL_SYNCHRONIZE */ + +/* Define to 1 if you have the declaration of `ACL_TYPE_EXTENDED', and to 0 if + you don't. */ +/* #undef HAVE_DECL_ACL_TYPE_EXTENDED */ + +/* Define to 1 if you have the declaration of `ACL_TYPE_NFS4', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACL_TYPE_NFS4 */ + +/* Define to 1 if you have the declaration of `ACL_USER', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACL_USER */ + +/* Define to 1 if you have the declaration of `INT32_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_INT32_MAX 1 + +/* Define to 1 if you have the declaration of `INT32_MIN', and to 0 if you + don't. */ +#define HAVE_DECL_INT32_MIN 1 + +/* Define to 1 if you have the declaration of `INT64_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_INT64_MAX 1 + +/* Define to 1 if you have the declaration of `INT64_MIN', and to 0 if you + don't. */ +#define HAVE_DECL_INT64_MIN 1 + +/* Define to 1 if you have the declaration of `INTMAX_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_INTMAX_MAX 1 + +/* Define to 1 if you have the declaration of `INTMAX_MIN', and to 0 if you + don't. */ +#define HAVE_DECL_INTMAX_MIN 1 + +/* Define to 1 if you have the declaration of `SETACL', and to 0 if you don't. + */ +/* #undef HAVE_DECL_SETACL */ + +/* Define to 1 if you have the declaration of `SIZE_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_SIZE_MAX 1 + +/* Define to 1 if you have the declaration of `SSIZE_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_SSIZE_MAX 1 + +/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you + don't. */ +#define HAVE_DECL_STRERROR_R 1 + +/* Define to 1 if you have the declaration of `UINT32_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_UINT32_MAX 1 + +/* Define to 1 if you have the declaration of `UINT64_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_UINT64_MAX 1 + +/* Define to 1 if you have the declaration of `UINTMAX_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_UINTMAX_MAX 1 + +/* Define to 1 if you have the declaration of `XATTR_NOFOLLOW', and to 0 if + you don't. */ +/* #undef HAVE_DECL_XATTR_NOFOLLOW */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DIRECT_H */ + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +#define HAVE_DIRENT_H 1 + +/* Define to 1 if you have the `dirfd' function. */ +#define HAVE_DIRFD 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */ +/* #undef HAVE_DOPRNT */ + +/* Define to 1 if nl_langinfo supports D_MD_ORDER */ +/* #undef HAVE_D_MD_ORDER */ + +/* A possible errno value for invalid file format errors */ +/* #undef HAVE_EFTYPE */ + +/* A possible errno value for invalid file format errors */ +#define HAVE_EILSEQ 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_ERRNO_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_EXPAT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_EXT2FS_EXT2_FS_H */ + +/* Define to 1 if you have the `extattr_get_file' function. */ +/* #undef HAVE_EXTATTR_GET_FILE */ + +/* Define to 1 if you have the `extattr_list_file' function. */ +/* #undef HAVE_EXTATTR_LIST_FILE */ + +/* Define to 1 if you have the `extattr_set_fd' function. */ +/* #undef HAVE_EXTATTR_SET_FD */ + +/* Define to 1 if you have the `extattr_set_file' function. */ +/* #undef HAVE_EXTATTR_SET_FILE */ + +/* Define to 1 if EXTATTR_NAMESPACE_USER is defined in sys/extattr.h. */ +/* #undef HAVE_DECL_EXTATTR_NAMESPACE_USER */ + +/* Define to 1 if you have the declaration of `GETACL', and to 0 if you don't. + */ +/* #undef HAVE_DECL_GETACL */ + +/* Define to 1 if you have the declaration of `GETACLCNT', and to 0 if you + don't. */ +/* #undef HAVE_DECL_GETACLCNT */ + +/* Define to 1 if you have the `fchdir' function. */ +#define HAVE_FCHDIR 1 + +/* Define to 1 if you have the `fchflags' function. */ +/* #undef HAVE_FCHFLAGS */ + +/* Define to 1 if you have the `fchmod' function. */ +#define HAVE_FCHMOD 1 + +/* Define to 1 if you have the `fchown' function. */ +#define HAVE_FCHOWN 1 + +/* Define to 1 if you have the `fcntl' function. */ +#define HAVE_FCNTL 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the `fdopendir' function. */ +#define HAVE_FDOPENDIR 1 + +/* Define to 1 if you have the `fgetea' function. */ +/* #undef HAVE_FGETEA */ + +/* Define to 1 if you have the `fgetxattr' function. */ +/* #undef HAVE_FGETXATTR */ + +/* Define to 1 if you have the `flistea' function. */ +/* #undef HAVE_FLISTEA */ + +/* Define to 1 if you have the `flistxattr' function. */ +#define HAVE_FLISTXATTR 1 + +/* Define to 1 if you have the `fnmatch' function. */ +#define HAVE_FNMATCH 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FNMATCH_H 1 + +/* Define to 1 if you have the `fork' function. */ +#define HAVE_FORK 1 + +/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */ +#define HAVE_FSEEKO 1 + +/* Define to 1 if you have the `fsetea' function. */ +/* #undef HAVE_FSETEA */ + +/* Define to 1 if you have the `fsetxattr' function. */ +/* #undef HAVE_FSETXATTR */ + +/* Define to 1 if you have the `fstat' function. */ +#define HAVE_FSTAT 1 + +/* Define to 1 if you have the `fstatat' function. */ +#define HAVE_FSTATAT 1 + +/* Define to 1 if you have the `fstatfs' function. */ +#define HAVE_FSTATFS 1 + +/* Define to 1 if you have the `fstatvfs' function. */ +#define HAVE_FSTATVFS 1 + +/* Define to 1 if you have the `ftruncate' function. */ +#define HAVE_FTRUNCATE 1 + +/* Define to 1 if you have the `futimens' function. */ +#define HAVE_FUTIMENS 1 + +/* Define to 1 if you have the `futimes' function. */ +#define HAVE_FUTIMES 1 + +/* Define to 1 if you have the `futimesat' function. */ +/* #undef HAVE_FUTIMESAT */ + +/* Define to 1 if you have the `getea' function. */ +/* #undef HAVE_GETEA */ + +/* Define to 1 if you have the `geteuid' function. */ +#define HAVE_GETEUID 1 + +/* Define to 1 if you have the `getgrgid_r' function. */ +#define HAVE_GETGRGID_R 1 + +/* Define to 1 if you have the `getgrnam_r' function. */ +#define HAVE_GETGRNAM_R 1 + +/* Define to 1 if platform uses `optreset` to reset `getopt` */ +#define HAVE_GETOPT_OPTRESET 1 + +/* Define to 1 if you have the `getpid' function. */ +#define HAVE_GETPID 1 + +/* Define to 1 if you have the `getpwnam_r' function. */ +#define HAVE_GETPWNAM_R 1 + +/* Define to 1 if you have the `getpwuid_r' function. */ +#define HAVE_GETPWUID_R 1 + +/* Define to 1 if you have the `getvfsbyname' function. */ +/* #undef HAVE_GETVFSBYNAME */ + +/* Define to 1 if you have the `getxattr' function. */ +#define HAVE_GETXATTR 1 + +/* Define to 1 if you have the `gmtime_r' function. */ +#define HAVE_GMTIME_R 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GRP_H 1 + +/* Define to 1 if you have the `iconv' function. */ +/* #undef HAVE_ICONV */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ICONV_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LANGINFO_H 1 + +/* Define to 1 if you have the `lchflags' function. */ +/* #undef HAVE_LCHFLAGS */ + +/* Define to 1 if you have the `lchmod' function. */ +#define HAVE_LCHMOD 1 + +/* Define to 1 if you have the `lchown' function. */ +#define HAVE_LCHOWN 1 + +/* Define to 1 if you have the `lgetea' function. */ +/* #undef HAVE_LGETEA */ + +/* Define to 1 if you have the `lgetxattr' function. */ +#define HAVE_LGETXATTR 1 + +/* Define to 1 if you have the `acl' library (-lacl). */ +/* #undef HAVE_LIBACL */ + +/* Define to 1 if you have the `attr' library (-lattr). */ +/* #undef HAVE_LIBATTR */ + +/* Define to 1 if you have the `bsdxml' library (-lbsdxml). */ +/* #undef HAVE_LIBBSDXML */ + +/* Define to 1 if you have the `bz2' library (-lbz2). */ +/* #undef HAVE_LIBBZ2 */ + +/* Define to 1 if you have the `b2' library (-lb2). */ +/* #undef HAVE_LIBB2 */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BLAKE2_H */ + +/* Define to 1 if you have the `charset' library (-lcharset). */ +/* #undef HAVE_LIBCHARSET */ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +/* #undef HAVE_LIBCRYPTO */ + +/* Define to 1 if you have the `expat' library (-lexpat). */ +/* #undef HAVE_LIBEXPAT */ + +/* Define to 1 if you have the `gcc' library (-lgcc). */ +/* #undef HAVE_LIBGCC */ + +/* Define to 1 if you have the `lz4' library (-llz4). */ +/* #undef HAVE_LIBLZ4 */ + +/* Define to 1 if you have the `lzma' library (-llzma). */ +/* #undef HAVE_LIBLZMA */ + +/* Define to 1 if you have the `lzmadec' library (-llzmadec). */ +/* #undef HAVE_LIBLZMADEC */ + +/* Define to 1 if you have the `lzo2' library (-llzo2). */ +/* #undef HAVE_LIBLZO2 */ + +/* Define to 1 if you have the `mbedcrypto' library (-lmbedcrypto). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `nettle' library (-lnettle). */ +/* #undef HAVE_LIBNETTLE */ + +/* Define to 1 if you have the `pcre' library (-lpcre). */ +/* #undef HAVE_LIBPCRE */ + +/* Define to 1 if you have the `pcreposix' library (-lpcreposix). */ +/* #undef HAVE_LIBPCREPOSIX */ + +/* Define to 1 if you have the `xml2' library (-lxml2). */ +#define HAVE_LIBXML2 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBXML_XMLREADER_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBXML_XMLWRITER_H */ + +/* Define to 1 if you have the `z' library (-lz). */ +/* #undef HAVE_LIBZ */ + +/* Define to 1 if you have the `zstd' library (-lzstd). */ +/* #undef HAVE_LIBZSTD */ + +/* Define to 1 if you have the `zstd' library (-lzstd) with compression + support. */ +/* #undef HAVE_LIBZSTD_COMPRESSOR */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LIMITS_H 1 + +/* Define to 1 if you have the `link' function. */ +#define HAVE_LINK 1 + +/* Define to 1 if you have the `linkat' function. */ +#define HAVE_LINKAT 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_FIEMAP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_FS_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_MAGIC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_TYPES_H */ + +/* Define to 1 if you have the `listea' function. */ +/* #undef HAVE_LISTEA */ + +/* Define to 1 if you have the `listxattr' function. */ +#define HAVE_LISTXATTR 1 + +/* Define to 1 if you have the `llistea' function. */ +/* #undef HAVE_LLISTEA */ + +/* Define to 1 if you have the `llistxattr' function. */ +#define HAVE_LLISTXATTR 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LOCALCHARSET_H */ + +/* Define to 1 if you have the `locale_charset' function. */ +/* #undef HAVE_LOCALE_CHARSET */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LOCALE_H 1 + +/* Define to 1 if you have the `localtime_r' function. */ +#define HAVE_LOCALTIME_R 1 + +/* Define to 1 if the system has the type `long long int'. */ +/* #undef HAVE_LONG_LONG_INT */ + +/* Define to 1 if you have the `lsetea' function. */ +/* #undef HAVE_LSETEA */ + +/* Define to 1 if you have the `lsetxattr' function. */ +#define HAVE_LSETXATTR 1 + +/* Define to 1 if you have the `lstat' function. */ +#define HAVE_LSTAT 1 + +/* Define to 1 if `lstat' has the bug that it succeeds when given the + zero-length file name argument. */ +/* #undef HAVE_LSTAT_EMPTY_STRING_BUG */ + +/* Define to 1 if you have the `lutimes' function. */ +#define HAVE_LUTIMES 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZ4HC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZ4_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZMADEC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZMA_H */ + +/* Define to 1 if you have a working `lzma_stream_encoder_mt' function. */ +/* #undef HAVE_LZMA_STREAM_ENCODER_MT */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZO_LZO1X_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZO_LZOCONF_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MBEDTLS_AES_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MBEDTLS_MD_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MBEDTLS_PKCS5_H */ + +/* Define to 1 if you have the `mbrtowc' function. */ +/* #undef HAVE_MBRTOWC */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MEMBERSHIP_H */ + +/* Define to 1 if you have the `memmove' function. */ +#define HAVE_MEMMOVE 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `mkdir' function. */ +#define HAVE_MKDIR 1 + +/* Define to 1 if you have the `mkfifo' function. */ +#define HAVE_MKFIFO 1 + +/* Define to 1 if you have the `mknod' function. */ +#define HAVE_MKNOD 1 + +/* Define to 1 if you have the `mkstemp' function. */ +#define HAVE_MKSTEMP 1 + +/* Define to 1 if you have the header file, and it defines `DIR'. */ +/* #undef HAVE_NDIR_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_AES_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_HMAC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_MD5_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_PBKDF2_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_RIPEMD160_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_SHA_H */ + +/* Define to 1 if you have the `nl_langinfo' function. */ +/* #undef HAVE_NL_LANGINFO */ + +/* Define to 1 if you have the `openat' function. */ +#define HAVE_OPENAT 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_EVP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_PATHS_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PCREPOSIX_H */ + +/* Define to 1 if you have the `pipe' function. */ +#define HAVE_PIPE 1 + +/* Define to 1 if you have the `PKCS5_PBKDF2_HMAC_SHA1' function. */ +/* #undef HAVE_PKCS5_PBKDF2_HMAC_SHA1 */ + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_POLL_H 1 + +/* Define to 1 if you have the `posix_spawnp' function. */ +#define HAVE_POSIX_SPAWNP 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PROCESS_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PWD_H 1 + +/* Define to 1 if you have the `readdir_r' function. */ +#define HAVE_READDIR_R 1 + +/* Define to 1 if you have the `readlink' function. */ +#define HAVE_READLINK 1 + +/* Define to 1 if you have the `readlinkat' function. */ +#define HAVE_READLINKAT 1 + +/* Define to 1 if you have the `readpassphrase' function. */ +/* #undef HAVE_READPASSPHRASE */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_READPASSPHRASE_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_REGEX_H 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `setenv' function. */ +#define HAVE_SETENV 1 + +/* Define to 1 if you have the `setlocale' function. */ +#define HAVE_SETLOCALE 1 + +/* Define to 1 if you have the `sigaction' function. */ +#define HAVE_SIGACTION 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SIGNAL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SPAWN_H 1 + +/* Define to 1 if you have the `statfs' function. */ +#define HAVE_STATFS 1 + +/* Define to 1 if you have the `statvfs' function. */ +#define HAVE_STATVFS 1 + +/* Define to 1 if `stat' has the bug that it succeeds when given the + zero-length file name argument. */ +/* #undef HAVE_STAT_EMPTY_STRING_BUG */ + +/* Define to 1 if you have the header file. */ +#define HAVE_STDARG_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strchr' function. */ +#define HAVE_STRCHR 1 + +/* Define to 1 if you have the `strnlen' function. */ +#define HAVE_STRNLEN 1 + +/* Define to 1 if you have the `strdup' function. */ +#define HAVE_STRDUP 1 + +/* Define to 1 if you have the `strerror' function. */ +#define HAVE_STRERROR 1 + +/* Define to 1 if you have the `strerror_r' function. */ +#define HAVE_STRERROR_R 1 + +/* Define to 1 if you have the `strftime' function. */ +#define HAVE_STRFTIME 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the `strrchr' function. */ +#define HAVE_STRRCHR 1 + +/* Define to 1 if `f_namemax' is a member of `struct statfs'. */ +/* #undef HAVE_STRUCT_STATFS_F_NAMEMAX */ + +/* Define to 1 if `f_iosize' is a member of `struct statvfs'. */ +/* #undef HAVE_STRUCT_STATVFS_F_IOSIZE */ + +/* Define to 1 if `st_birthtime' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_BIRTHTIME */ + +/* Define to 1 if `st_birthtimespec.tv_nsec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_BIRTHTIMESPEC_TV_NSEC */ + +/* Define to 1 if `st_blksize' is a member of `struct stat'. */ +#define HAVE_STRUCT_STAT_ST_BLKSIZE 1 + +/* Define to 1 if `st_flags' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_FLAGS */ + +/* Define to 1 if `st_mtimespec.tv_nsec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC */ + +/* Define to 1 if `st_mtime_n' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIME_N */ + +/* Define to 1 if `st_mtime_usec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIME_USEC */ + +/* Define to 1 if `st_mtim.tv_nsec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC */ + +/* Define to 1 if `st_umtime' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_UMTIME */ + +/* Define to 1 if `tm_gmtoff' is a member of `struct tm'. */ +#define HAVE_STRUCT_TM_TM_GMTOFF 1 + +/* Define to 1 if `__tm_gmtoff' is a member of `struct tm'. */ +/* #undef HAVE_STRUCT_TM___TM_GMTOFF */ + +/* Define to 1 if you have `struct vfsconf'. */ +/* #undef HAVE_STRUCT_VFSCONF */ + +/* Define to 1 if you have `struct xvfsconf'. */ +/* #undef HAVE_STRUCT_XVFSCONF */ + +/* Define to 1 if you have the `symlink' function. */ +#define HAVE_SYMLINK 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_ACL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_CDEFS_H */ + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +/* #undef HAVE_SYS_DIR_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_EA_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_EXTATTR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_IOCTL_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_MKDEV_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_MOUNT_H */ + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +/* #undef HAVE_SYS_NDIR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_PARAM_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_POLL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_QUEUE_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_RICHACL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SELECT_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_STATFS_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STATVFS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SYSMACROS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_UTSNAME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_VFS_H 1 + +/* Define to 1 if you have that is POSIX.1 compatible. */ +#define HAVE_SYS_WAIT_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_XATTR_H */ + +/* Define to 1 if you have the `timegm' function. */ +#define HAVE_TIMEGM 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_TIME_H 1 + +/* Define to 1 if you have the `tzset' function. */ +#define HAVE_TZSET 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the `unlinkat' function. */ +#define HAVE_UNLINKAT 1 + +/* Define to 1 if you have the `unsetenv' function. */ +#define HAVE_UNSETENV 1 + +/* Define to 1 if the system has the type `unsigned long long'. */ +/* #undef HAVE_UNSIGNED_LONG_LONG */ + +/* Define to 1 if the system has the type `unsigned long long int'. */ +/* #undef HAVE_UNSIGNED_LONG_LONG_INT */ + +/* Define to 1 if you have the `utime' function. */ +#define HAVE_UTIME 1 + +/* Define to 1 if you have the `utimensat' function. */ +#define HAVE_UTIMENSAT 1 + +/* Define to 1 if you have the `utimes' function. */ +#define HAVE_UTIMES 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UTIME_H 1 + +/* Define to 1 if you have the `vfork' function. */ +#define HAVE_VFORK 1 + +/* Define to 1 if you have the `vprintf' function. */ +#define HAVE_VPRINTF 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_WCHAR_H 1 + +/* Define to 1 if the system has the type `wchar_t'. */ +#define HAVE_WCHAR_T 1 + +/* Define to 1 if you have the `wcrtomb' function. */ +#define HAVE_WCRTOMB 1 + +/* Define to 1 if you have the `wcscmp' function. */ +#define HAVE_WCSCMP 1 + +/* Define to 1 if you have the `wcscpy' function. */ +#define HAVE_WCSCPY 1 + +/* Define to 1 if you have the `wcslen' function. */ +#define HAVE_WCSLEN 1 + +/* Define to 1 if you have the `wctomb' function. */ +#define HAVE_WCTOMB 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_WCTYPE_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINCRYPT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINDOWS_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINIOCTL_H */ + +/* Define to 1 if you have _CrtSetReportMode in */ +/* #undef HAVE__CrtSetReportMode */ + +/* Define to 1 if you have the `wmemcmp' function. */ +#define HAVE_WMEMCMP 1 + +/* Define to 1 if you have the `wmemcpy' function. */ +#define HAVE_WMEMCPY 1 + +/* Define to 1 if you have the `wmemmove' function. */ +#define HAVE_WMEMMOVE 1 + +/* Define to 1 if you have a working EXT2_IOC_GETFLAGS */ +/* #undef HAVE_WORKING_EXT2_IOC_GETFLAGS */ + +/* Define to 1 if you have a working FS_IOC_GETFLAGS */ +#define HAVE_WORKING_FS_IOC_GETFLAGS 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ZLIB_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ZSTD_H */ + +/* Define to 1 if you have the `ctime_s' function. */ +/* #undef HAVE_CTIME_S */ + +/* Define to 1 if you have the `_fseeki64' function. */ +/* #undef HAVE__FSEEKI64 */ + +/* Define to 1 if you have the `_get_timezone' function. */ +/* #undef HAVE__GET_TIMEZONE */ + +/* Define to 1 if you have the `gmtime_s' function. */ +/* #undef HAVE_GMTIME_S */ + +/* Define to 1 if you have the `localtime_s' function. */ +/* #undef HAVE_LOCALTIME_S */ + +/* Define to 1 if you have the `_mkgmtime' function. */ +/* #undef HAVE__MKGMTIME */ + +/* Define as const if the declaration of iconv() needs const. */ +#define ICONV_CONST + +/* Version number of libarchive as a single integer */ +#define LIBARCHIVE_VERSION_NUMBER "3007000" + +/* Version number of libarchive */ +#define LIBARCHIVE_VERSION_STRING "3.7.0" + +/* Define to 1 if `lstat' dereferences a symlink specified with a trailing + slash. */ +/* #undef LSTAT_FOLLOWS_SLASHED_SYMLINK */ + +/* Define to 1 if `major', `minor', and `makedev' are declared in . + */ +/* #undef MAJOR_IN_MKDEV */ + +/* Define to 1 if `major', `minor', and `makedev' are declared in + . */ +/* #undef MAJOR_IN_SYSMACROS */ + +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +/* #undef NO_MINUS_C_MINUS_O */ + +/* The size of `wchar_t', as computed by sizeof. */ +#define SIZEOF_WCHAR_T 4 + +/* Define to 1 if strerror_r returns char *. */ +/* #undef STRERROR_R_CHAR_P */ + +/* Define to 1 if you can safely include both and . */ +/* #undef TIME_WITH_SYS_TIME */ + +/* + * Some platform requires a macro to use extension functions. + */ +#define SAFE_TO_DEFINE_EXTENSIONS 1 +#ifdef SAFE_TO_DEFINE_EXTENSIONS +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# define _ALL_SOURCE 1 +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# define _TANDEM_SOURCE 1 +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif +#endif /* SAFE_TO_DEFINE_EXTENSIONS */ + +/* Version number of package */ +#define VERSION "3.7.0" + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ + +/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ +/* #undef _LARGEFILE_SOURCE */ + +/* Define for large files, on AIX-style hosts. */ +/* #undef _LARGE_FILES */ + +/* Define to control Windows SDK version */ +#ifndef NTDDI_VERSION +/* #undef NTDDI_VERSION */ +#endif // NTDDI_VERSION + +#ifndef _WIN32_WINNT +/* #undef _WIN32_WINNT */ +#endif // _WIN32_WINNT + +#ifndef WINVER +/* #undef WINVER */ +#endif // WINVER + +/* Define to empty if `const' does not conform to ANSI C. */ +/* #undef const */ + +/* Define to `int' if doesn't define. */ +/* #undef gid_t */ + +/* Define to `unsigned long' if does not define. */ +/* #undef id_t */ + +/* Define to `int' if does not define. */ +/* #undef mode_t */ + +/* Define to `long long' if does not define. */ +/* #undef off_t */ + +/* Define to `int' if doesn't define. */ +/* #undef pid_t */ + +/* Define to `unsigned int' if does not define. */ +/* #undef size_t */ + +/* Define to `int' if does not define. */ +/* #undef ssize_t */ + +/* Define to `int' if doesn't define. */ +/* #undef uid_t */ + +/* Define to `int' if does not define. */ +/* #undef intptr_t */ + +/* Define to `unsigned int' if does not define. */ +/* #undef uintptr_t */ diff --git a/contrib/libmetrohash/src/platform.h b/contrib/libmetrohash/src/platform.h index bc00e5a286b..9e83d11cb7c 100644 --- a/contrib/libmetrohash/src/platform.h +++ b/contrib/libmetrohash/src/platform.h @@ -17,7 +17,8 @@ #ifndef METROHASH_PLATFORM_H #define METROHASH_PLATFORM_H -#include +#include +#include #include // rotate right idiom recognized by most compilers @@ -33,6 +34,11 @@ inline static uint64_t read_u64(const void * const ptr) // so we use memcpy() which is the most portable. clang & gcc usually translates `memcpy()` into a single `load` instruction // when hardware supports it, so using memcpy() is efficient too. memcpy(&result, ptr, sizeof(result)); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + result = std::byteswap(result); +#endif + return result; } @@ -40,6 +46,11 @@ inline static uint64_t read_u32(const void * const ptr) { uint32_t result; memcpy(&result, ptr, sizeof(result)); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + result = std::byteswap(result); +#endif + return result; } @@ -47,6 +58,11 @@ inline static uint64_t read_u16(const void * const ptr) { uint16_t result; memcpy(&result, ptr, sizeof(result)); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + result = std::byteswap(result); +#endif + return result; } diff --git a/contrib/nlp-data-cmake/CMakeLists.txt b/contrib/nlp-data-cmake/CMakeLists.txt deleted file mode 100644 index 5380269c479..00000000000 --- a/contrib/nlp-data-cmake/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - -set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data") - -add_library (_nlp_data INTERFACE) - -clickhouse_embed_binaries( - TARGET nlp_dictionaries - RESOURCE_DIR "${LIBRARY_DIR}" - RESOURCES charset.zst tonality_ru.zst programming.zst -) - -add_dependencies(_nlp_data nlp_dictionaries) -target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") -add_library(ch_contrib::nlp_data ALIAS _nlp_data) diff --git a/contrib/qpl b/contrib/qpl index 3f8f5cea277..faaf1935045 160000 --- a/contrib/qpl +++ b/contrib/qpl @@ -1 +1 @@ -Subproject commit 3f8f5cea27739f5261e8fd577dc233ffe88bf679 +Subproject commit faaf19350459c076e66bb5df11743c3fade59b73 diff --git a/docker/README.md b/docker/README.md index ec52ddd143e..c1bb3b49f00 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,5 +1,5 @@ ## ClickHouse Dockerfiles -This directory contain Dockerfiles for `clickhouse-client` and `clickhouse-server`. They are updated in each release. +This directory contain Dockerfiles for `clickhouse-server`. They are updated in each release. -Also there is bunch of images for testing and CI. They are listed in `images.json` file and updated on each commit to master. If you need to add another image, place information about it into `images.json`. +Also, there is a bunch of images for testing and CI. They are listed in `images.json` file and updated on each commit to master. If you need to add another image, place information about it into `images.json`. diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile deleted file mode 100644 index 1c185daec75..00000000000 --- a/docker/client/Dockerfile +++ /dev/null @@ -1,34 +0,0 @@ -FROM ubuntu:18.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=22.1.1.* - -RUN apt-get update \ - && apt-get install --yes --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - dirmngr \ - gnupg \ - && mkdir -p /etc/apt/sources.list.d \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \ - && echo $repository > /etc/apt/sources.list.d/clickhouse.list \ - && apt-get update \ - && env DEBIAN_FRONTEND=noninteractive \ - apt-get install --allow-unauthenticated --yes --no-install-recommends \ - clickhouse-client=$version \ - clickhouse-common-static=$version \ - locales \ - tzdata \ - && rm -rf /var/lib/apt/lists/* /var/cache/debconf \ - && apt-get clean - -RUN locale-gen en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 - -ENTRYPOINT ["/usr/bin/clickhouse-client"] diff --git a/docker/client/README.md b/docker/client/README.md deleted file mode 100644 index bbcc7d60794..00000000000 --- a/docker/client/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# ClickHouse Client Docker Image - -For more information see [ClickHouse Server Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/). - -## License - -View [license information](https://github.com/ClickHouse/ClickHouse/blob/master/LICENSE) for the software contained in this image. diff --git a/docker/images.json b/docker/images.json index e8fc329a640..d895e2da2f0 100644 --- a/docker/images.json +++ b/docker/images.json @@ -125,6 +125,7 @@ "docker/test/keeper-jepsen", "docker/test/server-jepsen", "docker/test/sqllogic", + "docker/test/sqltest", "docker/test/stateless" ] }, @@ -155,11 +156,18 @@ }, "docker/docs/builder": { "name": "clickhouse/docs-builder", - "dependent": [ - ] + "dependent": [] }, "docker/test/sqllogic": { "name": "clickhouse/sqllogic-test", "dependent": [] + }, + "docker/test/sqltest": { + "name": "clickhouse/sqltest", + "dependent": [] + }, + "docker/test/integration/nginx_dav": { + "name": "clickhouse/nginx-dav", + "dependent": [] } } diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 8a6324aef88..c7206550bd8 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.6.2.18" +ARG VERSION="23.7.4.5" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/packager/README.md b/docker/packager/README.md index a78feb8d7fc..3a91f9a63f0 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -6,7 +6,7 @@ Usage: Build deb package with `clang-14` in `debug` mode: ``` $ mkdir deb/test_output -$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --build-type=debug +$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --debug-build $ ls -l deb/test_output -rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb -rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 897bcd24d04..940daad9c61 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -80,6 +80,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ # Download toolchain and SDK for Darwin RUN curl -sL -O https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz +# Download and install mold 2.0 for s390x build +RUN curl -Lo /tmp/mold.tar.gz "https://github.com/rui314/mold/releases/download/v2.0.0/mold-2.0.0-x86_64-linux.tar.gz" \ + && mkdir /tmp/mold \ + && tar -xzf /tmp/mold.tar.gz -C /tmp/mold \ + && cp -r /tmp/mold/mold*/* /usr \ + && rm -rf /tmp/mold \ + && rm /tmp/mold.tar.gz + # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH ARG NFPM_VERSION=2.20.0 diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index c0803c74147..38b2ef75d24 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -64,7 +64,7 @@ then ninja $NINJA_FLAGS clickhouse-keeper ls -la ./programs/ - ldd ./programs/clickhouse-keeper + ldd ./programs/clickhouse-keeper ||: if [ -n "$MAKE_DEB" ]; then # No quotes because I want it to expand to nothing if empty. @@ -80,19 +80,9 @@ else cmake --debug-trycompile -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. fi -if [ "coverity" == "$COMBINED_OUTPUT" ] -then - mkdir -p /workdir/cov-analysis - - wget --post-data "token=$COVERITY_TOKEN&project=ClickHouse%2FClickHouse" -qO- https://scan.coverity.com/download/linux64 | tar xz -C /workdir/cov-analysis --strip-components 1 - export PATH=$PATH:/workdir/cov-analysis/bin - cov-configure --config ./coverity.config --template --comptype clangcc --compiler "$CC" - SCAN_WRAPPER="cov-build --config ./coverity.config --dir cov-int" -fi - # No quotes because I want it to expand to nothing if empty. # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. -$SCAN_WRAPPER ninja $NINJA_FLAGS $BUILD_TARGET +ninja $NINJA_FLAGS $BUILD_TARGET ls -la ./programs @@ -175,13 +165,6 @@ then mv "$COMBINED_OUTPUT.tar.zst" /output fi -if [ "coverity" == "$COMBINED_OUTPUT" ] -then - # Coverity does not understand ZSTD. - tar -cvz -f "coverity-scan.tar.gz" cov-int - mv "coverity-scan.tar.gz" /output -fi - ccache_status ccache --evict-older-than 1d diff --git a/docker/packager/packager b/docker/packager/packager index e12bd55dde3..1d0ccae8a69 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -22,7 +22,7 @@ def check_image_exists_locally(image_name: str) -> bool: output = subprocess.check_output( f"docker images -q {image_name} 2> /dev/null", shell=True ) - return output != "" + return output != b"" except subprocess.CalledProcessError: return False @@ -46,7 +46,7 @@ def build_image(image_name: str, filepath: Path) -> None: ) -def pre_build(repo_path: Path, env_variables: List[str]): +def pre_build(repo_path: Path, env_variables: List[str]) -> None: if "WITH_PERFORMANCE=1" in env_variables: current_branch = subprocess.check_output( "git branch --show-current", shell=True, encoding="utf-8" @@ -80,9 +80,12 @@ def run_docker_image_with_env( output_dir: Path, env_variables: List[str], ch_root: Path, + cargo_cache_dir: Path, ccache_dir: Optional[Path], -): +) -> None: output_dir.mkdir(parents=True, exist_ok=True) + cargo_cache_dir.mkdir(parents=True, exist_ok=True) + env_part = " -e ".join(env_variables) if env_part: env_part = " -e " + env_part @@ -104,7 +107,7 @@ def run_docker_image_with_env( cmd = ( f"docker run --network=host --user={user} --rm {ccache_mount}" f"--volume={output_dir}:/output --volume={ch_root}:/build {env_part} " - f"{interactive} {image_name}" + f"--volume={cargo_cache_dir}:/rust/cargo/registry {interactive} {image_name}" ) logging.info("Will build ClickHouse pkg with cmd: '%s'", cmd) @@ -112,12 +115,12 @@ def run_docker_image_with_env( subprocess.check_call(cmd, shell=True) -def is_release_build(build_type: str, package_type: str, sanitizer: str) -> bool: - return build_type == "" and package_type == "deb" and sanitizer == "" +def is_release_build(debug_build: bool, package_type: str, sanitizer: str) -> bool: + return not debug_build and package_type == "deb" and sanitizer == "" def parse_env_variables( - build_type: str, + debug_build: bool, compiler: str, sanitizer: str, package_type: str, @@ -129,9 +132,10 @@ def parse_env_variables( version: str, official: bool, additional_pkgs: bool, + with_profiler: bool, with_coverage: bool, with_binaries: str, -): +) -> List[str]: DARWIN_SUFFIX = "-darwin" DARWIN_ARM_SUFFIX = "-darwin-aarch64" ARM_SUFFIX = "-aarch64" @@ -139,6 +143,7 @@ def parse_env_variables( FREEBSD_SUFFIX = "-freebsd" PPC_SUFFIX = "-ppc64le" RISCV_SUFFIX = "-riscv64" + S390X_SUFFIX = "-s390x" AMD64_COMPAT_SUFFIX = "-amd64-compat" result = [] @@ -152,6 +157,7 @@ def parse_env_variables( is_cross_arm_v80compat = compiler.endswith(ARM_V80COMPAT_SUFFIX) is_cross_ppc = compiler.endswith(PPC_SUFFIX) is_cross_riscv = compiler.endswith(RISCV_SUFFIX) + is_cross_s390x = compiler.endswith(S390X_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX) @@ -213,6 +219,11 @@ def parse_env_variables( cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake" ) + elif is_cross_s390x: + cc = compiler[: -len(S390X_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-s390x.cmake" + ) elif is_amd64_compat: cc = compiler[: -len(AMD64_COMPAT_SUFFIX)] result.append("DEB_ARCH=amd64") @@ -240,7 +251,7 @@ def parse_env_variables( build_target = ( f"{build_target} clickhouse-odbc-bridge clickhouse-library-bridge" ) - if is_release_build(build_type, package_type, sanitizer): + if is_release_build(debug_build, package_type, sanitizer): cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") result.append("WITH_PERFORMANCE=1") if is_cross_arm: @@ -253,15 +264,10 @@ def parse_env_variables( cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}") cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}") - # Create combined output archive for performance tests. - if package_type == "coverity": - result.append("COMBINED_OUTPUT=coverity") - result.append('COVERITY_TOKEN="$COVERITY_TOKEN"') - if sanitizer: result.append(f"SANITIZER={sanitizer}") - if build_type: - result.append(f"BUILD_TYPE={build_type.capitalize()}") + if debug_build: + result.append("BUILD_TYPE=Debug") else: result.append("BUILD_TYPE=None") @@ -327,6 +333,9 @@ def parse_env_variables( # utils are not included into clickhouse-bundle, so build everything build_target = "all" + if with_profiler: + cmake_flags.append("-DENABLE_BUILD_PROFILING=1") + if with_coverage: cmake_flags.append("-DWITH_COVERAGE=1") @@ -356,7 +365,7 @@ def parse_args() -> argparse.Namespace: ) parser.add_argument( "--package-type", - choices=["deb", "binary", "coverity"], + choices=["deb", "binary"], required=True, ) parser.add_argument( @@ -366,7 +375,7 @@ def parse_args() -> argparse.Namespace: help="ClickHouse git repository", ) parser.add_argument("--output-dir", type=dir_name, required=True) - parser.add_argument("--build-type", choices=("debug", ""), default="") + parser.add_argument("--debug-build", action="store_true") parser.add_argument( "--compiler", @@ -378,6 +387,7 @@ def parse_args() -> argparse.Namespace: "clang-16-aarch64-v80compat", "clang-16-ppc64le", "clang-16-riscv64", + "clang-16-s390x", "clang-16-amd64-compat", "clang-16-freebsd", ), @@ -417,10 +427,18 @@ def parse_args() -> argparse.Namespace: action="store_true", help="if set, the build fails on errors writing cache to S3", ) + parser.add_argument( + "--cargo-cache-dir", + default=Path(os.getenv("CARGO_HOME", "") or Path.home() / ".cargo") + / "registry", + type=dir_name, + help="a directory to preserve the rust cargo crates", + ) parser.add_argument("--force-build-image", action="store_true") parser.add_argument("--version") parser.add_argument("--official", action="store_true") parser.add_argument("--additional-pkgs", action="store_true") + parser.add_argument("--with-profiler", action="store_true") parser.add_argument("--with-coverage", action="store_true") parser.add_argument( "--with-binaries", choices=("programs", "tests", ""), default="" @@ -456,7 +474,7 @@ def parse_args() -> argparse.Namespace: return args -def main(): +def main() -> None: logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") args = parse_args() @@ -472,7 +490,7 @@ def main(): build_image(image_with_version, dockerfile) env_prepared = parse_env_variables( - args.build_type, + args.debug_build, args.compiler, args.sanitizer, args.package_type, @@ -484,6 +502,7 @@ def main(): args.version, args.official, args.additional_pkgs, + args.with_profiler, args.with_coverage, args.with_binaries, ) @@ -495,6 +514,7 @@ def main(): args.output_dir, env_prepared, ch_root, + args.cargo_cache_dir, args.ccache_dir, ) logging.info("Output placed into %s", args.output_dir) diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 7f453627601..a3a1f912e90 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.6.2.18" +ARG VERSION="23.7.4.5" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 1fa7b83ae16..4936cfeccb0 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.6.2.18" +ARG VERSION="23.7.4.5" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/server/README.md b/docker/server/README.md index 67646a262f5..6200acbd30c 100644 --- a/docker/server/README.md +++ b/docker/server/README.md @@ -97,8 +97,8 @@ docker run -d \ You may also want to mount: -* `/etc/clickhouse-server/config.d/*.xml` - files with server configuration adjustmenets -* `/etc/clickhouse-server/users.d/*.xml` - files with user settings adjustmenets +* `/etc/clickhouse-server/config.d/*.xml` - files with server configuration adjustments +* `/etc/clickhouse-server/users.d/*.xml` - files with user settings adjustments * `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below). ### Linux capabilities diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index f6836804454..b55baa0e0fc 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -11,6 +11,7 @@ RUN apt-get update \ pv \ ripgrep \ zstd \ + locales \ --yes --no-install-recommends # Sanitizer options for services (clickhouse-server) @@ -18,17 +19,23 @@ RUN apt-get update \ # and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB). # TSAN will flush shadow memory when reaching this limit. # It may cause false-negatives, but it's better than OOM. -RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment +RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment # Sanitizer options for current shell (not current, but the one that will be spawned on "docker run") # (but w/o verbosity for TSAN, otherwise test.reference will not match) -ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' +ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' -ENV TZ=Europe/Moscow +RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 +ENV LC_ALL en_US.UTF-8 + +ENV TZ=Europe/Amsterdam RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone +# This script is used to setup realtime export of server logs from the CI into external ClickHouse cluster: +COPY setup_export_logs.sh / + CMD sleep 1 diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh new file mode 100755 index 00000000000..d68fd392d62 --- /dev/null +++ b/docker/test/base/setup_export_logs.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# This script sets up export of system log tables to a remote server. +# Remote tables are created if not exist, and augmented with extra columns, +# and their names will contain a hash of the table structure, +# which allows exporting tables from servers of different versions. + +# Pre-configured destination cluster, where to export the data +CLUSTER=${CLUSTER:=system_logs_export} + +EXTRA_COLUMNS=${EXTRA_COLUMNS:="pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), "} +EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:="0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type"} +EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:="check_name, "} + +CONNECTION_PARAMETERS=${CONNECTION_PARAMETERS:=""} + +# Create all configured system logs: +clickhouse-client --query "SYSTEM FLUSH LOGS" + +# It's doesn't make sense to try creating tables if SYNC fails +echo "SYSTEM SYNC DATABASE REPLICA default" clickhouse-client --receive_timeout 180 $CONNECTION_PARAMETERS || exit 0 + +# For each system log table: +clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table +do + # Calculate hash of its structure: + hash=$(clickhouse-client --query " + SELECT sipHash64(groupArray((name, type))) + FROM (SELECT name, type FROM system.columns + WHERE database = 'system' AND table = '$table' + ORDER BY position) + ") + + # Create the destination table with adapted name and structure: + statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e ' + s/^\($/('"$EXTRA_COLUMNS"'/; + s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/; + s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/; + /^TTL /d + ') + + echo "Creating destination table ${table}_${hash}" >&2 + + echo "$statement" | clickhouse-client --distributed_ddl_task_timeout=10 $CONNECTION_PARAMETERS || continue + + echo "Creating table system.${table}_sender" >&2 + + # Create Distributed table and materialized view to watch on the original table: + clickhouse-client --query " + CREATE TABLE system.${table}_sender + ENGINE = Distributed(${CLUSTER}, default, ${table}_${hash}) + SETTINGS flush_on_detach=0 + EMPTY AS + SELECT ${EXTRA_COLUMNS_EXPRESSION}, * + FROM system.${table} + " + + echo "Creating materialized view system.${table}_watcher" >&2 + + clickhouse-client --query " + CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS + SELECT ${EXTRA_COLUMNS_EXPRESSION}, * + FROM system.${table} + " +done diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index da4baa8c687..ad24e662a6c 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -32,7 +32,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 828c73e6781..477bbdb6919 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -141,13 +141,14 @@ function clone_submodules contrib/jemalloc contrib/replxx contrib/wyhash - contrib/hashidsxx contrib/c-ares contrib/morton-nd contrib/xxHash contrib/simdjson contrib/liburing contrib/libfiu + contrib/incbin + contrib/yaml-cpp ) git submodule sync @@ -170,6 +171,7 @@ function run_cmake "-DENABLE_SIMDJSON=1" "-DENABLE_JEMALLOC=1" "-DENABLE_LIBURING=1" + "-DENABLE_YAML_CPP=1" ) export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache" diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index aa71074c02a..0bc0fb06633 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -8,7 +8,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 5cda0831a84..05cc92ee040 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -122,6 +122,23 @@ EOL $PWD EOL + + # Setup a cluster for logs export to ClickHouse Cloud + # Note: these variables are provided to the Docker run command by the Python script in tests/ci + if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] + then + echo " +remote_servers: + system_logs_export: + shard: + replica: + secure: 1 + user: ci + host: '${CLICKHOUSE_CI_LOGS_HOST}' + port: 9440 + password: '${CLICKHOUSE_CI_LOGS_PASSWORD}' +" > db/config.d/system_logs_export.yaml + fi } function filter_exists_and_template @@ -223,7 +240,22 @@ quit done clickhouse-client --query "select 1" # This checks that the server is responding kill -0 $server_pid # This checks that it is our server that is started and not some other one - echo Server started and responded + echo 'Server started and responded' + + # Initialize export of system logs to ClickHouse Cloud + if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ] + then + export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type" + # TODO: Check if the password will appear in the logs. + export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}" + + /setup_export_logs.sh + + # Unset variables after use + export CONNECTION_PARAMETERS='' + export CLICKHOUSE_CI_LOGS_HOST='' + export CLICKHOUSE_CI_LOGS_PASSWORD='' + fi # SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric. # SC2046: Quote this to prevent word splitting. Actually I need word splitting. diff --git a/docker/test/install/deb/Dockerfile b/docker/test/install/deb/Dockerfile index 9614473c69b..e9c928b1fe7 100644 --- a/docker/test/install/deb/Dockerfile +++ b/docker/test/install/deb/Dockerfile @@ -12,6 +12,7 @@ ENV \ # install systemd packages RUN apt-get update && \ apt-get install -y --no-install-recommends \ + sudo \ systemd \ && \ apt-get clean && \ diff --git a/docker/test/integration/nginx_dav/Dockerfile b/docker/test/integration/nginx_dav/Dockerfile new file mode 100644 index 00000000000..42c1244f6dc --- /dev/null +++ b/docker/test/integration/nginx_dav/Dockerfile @@ -0,0 +1,6 @@ +FROM nginx:alpine-slim + +COPY default.conf /etc/nginx/conf.d/ + +RUN mkdir /usr/share/nginx/files/ \ + && chown nginx: /usr/share/nginx/files/ -R diff --git a/docker/test/integration/nginx_dav/default.conf b/docker/test/integration/nginx_dav/default.conf new file mode 100644 index 00000000000..466d0584a2d --- /dev/null +++ b/docker/test/integration/nginx_dav/default.conf @@ -0,0 +1,25 @@ +server { + listen 80; + + #root /usr/share/nginx/test.com; + index index.html index.htm; + + server_name test.com localhost; + + location / { + expires max; + root /usr/share/nginx/files; + client_max_body_size 20m; + client_body_temp_path /usr/share/nginx/tmp; + dav_methods PUT; # Allowed methods, only PUT is necessary + + create_full_put_path on; # nginx automatically creates nested directories + dav_access user:rw group:r all:r; # access permissions for files + + limit_except GET { + allow all; + } + } + + error_page 405 =200 $uri; +} diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index d6c127c8421..f67c45f7114 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -95,9 +95,11 @@ RUN python3 -m pip install --no-cache-dir \ pytest-timeout \ pytest-xdist \ pytz \ + pyyaml==5.3.1 \ redis \ requests-kerberos \ tzlocal==2.1 \ + retry \ urllib3 # Hudi supports only spark 3.3.*, not 3.4 @@ -128,10 +130,11 @@ COPY misc/ /misc/ # Same options as in test/base/Dockerfile # (in case you need to override them in tests) -ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' +ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' EXPOSE 2375 ENTRYPOINT ["dockerd-entrypoint.sh"] -CMD ["sh", "-c", "pytest $PYTEST_OPTS"] +# To pass additional arguments (i.e. list of tests) use PYTEST_ADDOPTS +CMD ["sh", "-c", "pytest"] diff --git a/docker/test/integration/runner/compose/docker_compose_coredns.yml b/docker/test/integration/runner/compose/docker_compose_coredns.yml index b329d4e0a46..e4736e04846 100644 --- a/docker/test/integration/runner/compose/docker_compose_coredns.yml +++ b/docker/test/integration/runner/compose/docker_compose_coredns.yml @@ -2,7 +2,7 @@ version: "2.3" services: coredns: - image: coredns/coredns:latest + image: coredns/coredns:1.9.3 # :latest broke this test restart: always volumes: - ${COREDNS_CONFIG_DIR}/example.com:/example.com diff --git a/docker/test/integration/runner/compose/docker_compose_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_hdfs.yml index f83eb93fea7..1cae54ad9e1 100644 --- a/docker/test/integration/runner/compose/docker_compose_hdfs.yml +++ b/docker/test/integration/runner/compose/docker_compose_hdfs.yml @@ -12,3 +12,5 @@ services: - type: ${HDFS_FS:-tmpfs} source: ${HDFS_LOGS:-} target: /usr/local/hadoop/logs + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index 7e34f4c114d..4ae3de3cbc7 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -4,6 +4,8 @@ services: kafka_zookeeper: image: zookeeper:3.4.9 hostname: kafka_zookeeper + ports: + - 2181:2181 environment: ZOO_MY_ID: 1 ZOO_PORT: 2181 @@ -15,33 +17,59 @@ services: image: confluentinc/cp-kafka:5.2.0 hostname: kafka1 ports: - - ${KAFKA_EXTERNAL_PORT:-8081}:${KAFKA_EXTERNAL_PORT:-8081} + - ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT} environment: KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092 KAFKA_ADVERTISED_HOST_NAME: kafka1 - KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19092 KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE KAFKA_BROKER_ID: 1 - KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" + KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181 KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 depends_on: - kafka_zookeeper security_opt: - label:disable + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' schema-registry: image: confluentinc/cp-schema-registry:5.2.0 hostname: schema-registry ports: - - ${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313} + - ${SCHEMA_REGISTRY_EXTERNAL_PORT}:${SCHEMA_REGISTRY_EXTERNAL_PORT} environment: SCHEMA_REGISTRY_HOST_NAME: schema-registry - SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 + SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_EXTERNAL_PORT} + SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: noauth depends_on: - kafka_zookeeper - kafka1 + restart: always + security_opt: + - label:disable + + schema-registry-auth: + image: confluentinc/cp-schema-registry:5.2.0 + hostname: schema-registry-auth + ports: + - ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} + environment: + SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth + SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} + SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 + SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC + SCHEMA_REGISTRY_AUTHENTICATION_ROLES: user + SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar + SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf" + SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: auth + volumes: + - ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets + depends_on: + - kafka_zookeeper + - kafka1 + restart: always security_opt: - label:disable diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml index 1160192696d..e955a14eb3d 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml @@ -20,6 +20,8 @@ services: depends_on: - hdfskerberos entrypoint: /etc/bootstrap.sh -d + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' hdfskerberos: image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} @@ -29,3 +31,5 @@ services: - ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh - /dev/urandom:/dev/random expose: [88, 749] + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml index 86e920ff573..49d4c1db90f 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml @@ -48,6 +48,8 @@ services: - kafka_kerberos security_opt: - label:disable + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' kafka_kerberos: image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} diff --git a/docker/test/integration/runner/compose/docker_compose_meili.yml b/docker/test/integration/runner/compose/docker_compose_meili.yml index c734c43b4c6..c1fad4aca87 100644 --- a/docker/test/integration/runner/compose/docker_compose_meili.yml +++ b/docker/test/integration/runner/compose/docker_compose_meili.yml @@ -1,16 +1,15 @@ version: '2.3' services: meili1: - image: getmeili/meilisearch:v0.27.0 + image: getmeili/meilisearch:v0.27.0 restart: always ports: - ${MEILI_EXTERNAL_PORT:-7700}:${MEILI_INTERNAL_PORT:-7700} meili_secure: - image: getmeili/meilisearch:v0.27.0 + image: getmeili/meilisearch:v0.27.0 restart: always ports: - ${MEILI_SECURE_EXTERNAL_PORT:-7700}:${MEILI_SECURE_INTERNAL_PORT:-7700} environment: MEILI_MASTER_KEY: "password" - diff --git a/docker/test/integration/runner/compose/docker_compose_minio.yml b/docker/test/integration/runner/compose/docker_compose_minio.yml index 3eaf891ff8e..f2979566296 100644 --- a/docker/test/integration/runner/compose/docker_compose_minio.yml +++ b/docker/test/integration/runner/compose/docker_compose_minio.yml @@ -14,7 +14,7 @@ services: MINIO_ACCESS_KEY: minio MINIO_SECRET_KEY: minio123 MINIO_PROMETHEUS_AUTH_TYPE: public - command: server --address :9001 --certs-dir /certs /data1-1 + command: server --console-address 127.0.0.1:19001 --address :9001 --certs-dir /certs /data1-1 depends_on: - proxy1 - proxy2 diff --git a/docker/test/integration/runner/compose/docker_compose_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql.yml index 6b98a372bd0..103fe2769e9 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql.yml @@ -9,10 +9,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-1.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-1.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/error.log @@ -21,4 +21,4 @@ services: volumes: - type: ${MYSQL_LOGS_FS:-tmpfs} source: ${MYSQL_LOGS:-} - target: /mysql/ \ No newline at end of file + target: /mysql/ diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml b/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml index d5fb5a53aaf..9c9c7430cec 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml @@ -9,9 +9,9 @@ services: DATADIR: /mysql/ expose: - ${MYSQL8_PORT:-3306} - command: --server_id=100 --log-bin='mysql-bin-1.log' - --default_authentication_plugin='mysql_native_password' - --default-time-zone='+3:00' --gtid-mode="ON" + command: --server_id=100 --log-bin='mysql-bin-1.log' + --default_authentication_plugin='mysql_native_password' + --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/error.log @@ -20,4 +20,4 @@ services: volumes: - type: ${MYSQL8_LOGS_FS:-tmpfs} source: ${MYSQL8_LOGS:-} - target: /mysql/ \ No newline at end of file + target: /mysql/ diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml index 8e145a3b408..73f9e39f0d6 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml @@ -9,10 +9,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_CLUSTER_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-2.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-2.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/2_error.log @@ -31,10 +31,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_CLUSTER_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-3.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-3.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/3_error.log @@ -53,10 +53,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_CLUSTER_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-4.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-4.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/4_error.log @@ -65,4 +65,4 @@ services: volumes: - type: ${MYSQL_CLUSTER_LOGS_FS:-tmpfs} source: ${MYSQL_CLUSTER_LOGS:-} - target: /mysql/ \ No newline at end of file + target: /mysql/ diff --git a/docker/test/integration/runner/compose/docker_compose_nginx.yml b/docker/test/integration/runner/compose/docker_compose_nginx.yml index d0fb9fc1ff4..38d2a6d84c8 100644 --- a/docker/test/integration/runner/compose/docker_compose_nginx.yml +++ b/docker/test/integration/runner/compose/docker_compose_nginx.yml @@ -5,7 +5,7 @@ services: # Files will be put into /usr/share/nginx/files. nginx: - image: kssenii/nginx-test:1.1 + image: clickhouse/nginx-dav:${DOCKER_NGINX_DAV_TAG:-latest} restart: always ports: - 80:80 diff --git a/docker/test/integration/runner/compose/docker_compose_postgres.yml b/docker/test/integration/runner/compose/docker_compose_postgres.yml index 1fb6b7a1410..2ef7eb17395 100644 --- a/docker/test/integration/runner/compose/docker_compose_postgres.yml +++ b/docker/test/integration/runner/compose/docker_compose_postgres.yml @@ -12,9 +12,9 @@ services: timeout: 5s retries: 5 networks: - default: - aliases: - - postgre-sql.local + default: + aliases: + - postgre-sql.local environment: POSTGRES_HOST_AUTH_METHOD: "trust" POSTGRES_PASSWORD: mysecretpassword diff --git a/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml b/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml index 7a1c32e0023..b5dbae423b2 100644 --- a/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml +++ b/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml @@ -12,7 +12,7 @@ services: command: ["zkServer.sh", "start-foreground"] entrypoint: /zookeeper-ssl-entrypoint.sh volumes: - - type: bind + - type: bind source: /misc/zookeeper-ssl-entrypoint.sh target: /zookeeper-ssl-entrypoint.sh - type: bind @@ -37,7 +37,7 @@ services: command: ["zkServer.sh", "start-foreground"] entrypoint: /zookeeper-ssl-entrypoint.sh volumes: - - type: bind + - type: bind source: /misc/zookeeper-ssl-entrypoint.sh target: /zookeeper-ssl-entrypoint.sh - type: bind @@ -61,7 +61,7 @@ services: command: ["zkServer.sh", "start-foreground"] entrypoint: /zookeeper-ssl-entrypoint.sh volumes: - - type: bind + - type: bind source: /misc/zookeeper-ssl-entrypoint.sh target: /zookeeper-ssl-entrypoint.sh - type: bind diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index 3c4ff522b36..b05aef76faf 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -64,15 +64,16 @@ export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge export CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH=/clickhouse-library-bridge export DOCKER_BASE_TAG=${DOCKER_BASE_TAG:=latest} -export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest} -export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest} export DOCKER_DOTNET_CLIENT_TAG=${DOCKER_DOTNET_CLIENT_TAG:=latest} +export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest} +export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest} +export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest} +export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest} export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest} export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest} export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest} +export DOCKER_NGINX_DAV_TAG=${DOCKER_NGINX_DAV_TAG:=latest} export DOCKER_POSTGRESQL_JAVA_CLIENT_TAG=${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:=latest} -export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest} -export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest} cd /ClickHouse/tests/integration exec "$@" diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index ab9f1f8a2e3..d31663f9071 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -1,18 +1,7 @@ # docker build -t clickhouse/performance-comparison . -# Using ubuntu:22.04 over 20.04 as all other images, since: -# a) ubuntu 20.04 has too old parallel, and does not support --memsuspend -# b) anyway for perf tests it should not be important (backward compatiblity -# with older ubuntu had been checked lots of times in various tests) -FROM ubuntu:22.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \ @@ -56,10 +45,9 @@ COPY * / # node #0 should be less stable because of system interruptions. We bind # randomly to node 1 or 0 to gather some statistics on that. We have to bind # both servers and the tmpfs on which the database is stored. How to do it -# through Yandex Sandbox API is unclear, but by default tmpfs uses +# is unclear, but by default tmpfs uses # 'process allocation policy', not sure which process but hopefully the one that -# writes to it, so just bind the downloader script as well. We could also try to -# remount it with proper options in Sandbox task. +# writes to it, so just bind the downloader script as well. # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt # Double-escaped backslashes are a tribute to the engineering wonder of docker -- # it gives '/bin/sh: 1: [bash,: not found' otherwise. diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 798d2a40b12..4b1b5c13b9b 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -90,7 +90,7 @@ function configure set +m wait_for_server $LEFT_SERVER_PORT $left_pid - echo Server for setup started + echo "Server for setup started" clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||: clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||: @@ -156,9 +156,9 @@ function restart wait_for_server $RIGHT_SERVER_PORT $right_pid echo right ok - clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database != 'system'" + clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')" clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.build_options" - clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database != 'system'" + clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')" clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.build_options" # Check again that both servers we started are running -- this is important @@ -352,14 +352,12 @@ function get_profiles wait clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > left-query-log.tsv ||: & - clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > right-query-log.tsv ||: & - clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: & @@ -665,9 +663,8 @@ create view partial_query_times as select * from -- Report for backward-incompatible ('partial') queries that we could only run on the new server (e.g. -- queries with new functions added in the tested PR). create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv') - settings output_format_decimal_trailing_zeros = 1 - as select toDecimal64(time_median, 3) time, - toDecimal64(time_stddev / time_median, 3) relative_time_stddev, + as select round(time_median, 3) time, + round(time_stddev / time_median, 3) relative_time_stddev, test, query_index, query_display_name from partial_query_times join query_display_names using (test, query_index) @@ -739,28 +736,26 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv') ; create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv') - settings output_format_decimal_trailing_zeros = 1 as with -- server_time is sometimes reported as zero (if it's less than 1 ms), -- so we have to work around this to not get an error about conversion -- of NaN to decimal. (left > right ? left / right : right / left) as times_change_float, isFinite(times_change_float) as times_change_finite, - toDecimal64(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, + round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, times_change_finite ? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x' : '--' as times_change_str select - toDecimal64(left, 3), toDecimal64(right, 3), times_change_str, - toDecimal64(diff, 3), toDecimal64(stat_threshold, 3), + round(left, 3), round(right, 3), times_change_str, + round(diff, 3), round(stat_threshold, 3), changed_fail, test, query_index, query_display_name from queries where changed_show order by abs(diff) desc; create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv') - settings output_format_decimal_trailing_zeros = 1 as select - toDecimal64(left, 3), toDecimal64(right, 3), toDecimal64(diff, 3), - toDecimal64(stat_threshold, 3), unstable_fail, test, query_index, query_display_name + round(left, 3), round(right, 3), round(diff, 3), + round(stat_threshold, 3), unstable_fail, test, query_index, query_display_name from queries where unstable_show order by stat_threshold desc; @@ -789,11 +784,10 @@ create view total_speedup as ; create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv') - settings output_format_decimal_trailing_zeros = 1 as with (times_speedup >= 1 - ? '-' || toString(toDecimal64(times_speedup, 3)) || 'x' - : '+' || toString(toDecimal64(1 / times_speedup, 3)) || 'x') + ? '-' || toString(round(times_speedup, 3)) || 'x' + : '+' || toString(round(1 / times_speedup, 3)) || 'x') as times_speedup_str select test, times_speedup_str, queries, bad, changed, unstable -- Not sure what's the precedence of UNION ALL vs WHERE & ORDER BY, hence all @@ -817,11 +811,10 @@ create view total_client_time_per_query as select * 'test text, query_index int, client float, server float'); create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv') - settings output_format_decimal_trailing_zeros = 1 - as select client, server, toDecimal64(client/server, 3) p, + as select client, server, round(client/server, 3) p, test, query_display_name from total_client_time_per_query left join query_display_names using (test, query_index) - where p > toDecimal64(1.02, 3) order by p desc; + where p > round(1.02, 3) order by p desc; create table wall_clock_time_per_test engine Memory as select * from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float'); @@ -899,15 +892,14 @@ create view test_times_view_total as ; create table test_times_report engine File(TSV, 'report/test-times.tsv') - settings output_format_decimal_trailing_zeros = 1 as select test, - toDecimal64(real, 3), - toDecimal64(total_client_time, 3), + round(real, 3), + round(total_client_time, 3), queries, - toDecimal64(query_max, 3), - toDecimal64(avg_real_per_query, 3), - toDecimal64(query_min, 3), + round(query_max, 3), + round(avg_real_per_query, 3), + round(query_min, 3), runs from ( select * from test_times_view @@ -919,21 +911,20 @@ create table test_times_report engine File(TSV, 'report/test-times.tsv') -- report for all queries page, only main metric create table all_tests_report engine File(TSV, 'report/all-queries.tsv') - settings output_format_decimal_trailing_zeros = 1 as with -- server_time is sometimes reported as zero (if it's less than 1 ms), -- so we have to work around this to not get an error about conversion -- of NaN to decimal. (left > right ? left / right : right / left) as times_change_float, isFinite(times_change_float) as times_change_finite, - toDecimal64(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, + round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, times_change_finite ? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x' : '--' as times_change_str select changed_fail, unstable_fail, - toDecimal64(left, 3), toDecimal64(right, 3), times_change_str, - toDecimal64(isFinite(diff) ? diff : 0, 3), - toDecimal64(isFinite(stat_threshold) ? stat_threshold : 0, 3), + round(left, 3), round(right, 3), times_change_str, + round(isFinite(diff) ? diff : 0, 3), + round(isFinite(stat_threshold) ? stat_threshold : 0, 3), test, query_index, query_display_name from queries order by test, query_index; @@ -1044,27 +1035,6 @@ create table unstable_run_traces engine File(TSVWithNamesAndTypes, order by count() desc ; -create table metric_devation engine File(TSVWithNamesAndTypes, - 'report/metric-deviation.$version.tsv') - settings output_format_decimal_trailing_zeros = 1 - -- first goes the key used to split the file with grep - as select test, query_index, query_display_name, - toDecimal64(d, 3) d, q, metric - from ( - select - test, query_index, - (q[3] - q[1])/q[2] d, - quantilesExact(0, 0.5, 1)(value) q, metric - from (select * from unstable_run_metrics - union all select * from unstable_run_traces - union all select * from unstable_run_metrics_2) mm - group by test, query_index, metric - having isFinite(d) and d > 0.5 and q[3] > 5 - ) metrics - left join query_display_names using (test, query_index) - order by test, query_index, d desc - ; - create table stacks engine File(TSV, 'report/stacks.$version.tsv') as select -- first goes the key used to split the file with grep @@ -1173,9 +1143,8 @@ create table metrics engine File(TSV, 'metrics/metrics.tsv') as -- Show metrics that have changed create table changes engine File(TSV, 'metrics/changes.tsv') - settings output_format_decimal_trailing_zeros = 1 as select metric, left, right, - toDecimal64(diff, 3), toDecimal64(times_diff, 3) + round(diff, 3), round(times_diff, 3) from ( select metric, median(left) as left, median(right) as right, (right - left) / left diff, @@ -1226,7 +1195,6 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv') '$SHA_TO_TEST' :: LowCardinality(String) AS commit_sha, '${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME:-Performance}' :: LowCardinality(String) AS check_name, '$(sed -n 's/.*/\1/p' report.html)' :: LowCardinality(String) AS check_status, - -- TODO toDateTime() can't parse output of 'date', so no time for now. (($(date +%s) - $CHPC_CHECK_START_TIMESTAMP) * 1000) :: UInt64 AS check_duration_ms, fromUnixTimestamp($CHPC_CHECK_START_TIMESTAMP) check_start_time, test_name :: LowCardinality(String) AS test_name , diff --git a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml index 39c29bb61ca..292665c4f68 100644 --- a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml +++ b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml @@ -19,31 +19,6 @@ - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - 1000000000 10 diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml index 093834943a3..dab41adca51 100644 --- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml +++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml @@ -3,7 +3,7 @@ 1 1 - 1 + 0 Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day ``` - ## trace_log {#server_configuration_parameters-trace_log} Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation. @@ -1875,6 +1983,12 @@ Parameters: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1886,6 +2000,10 @@ The default server configuration file `config.xml` contains the following settin trace_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1900,9 +2018,18 @@ Parameters: - `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) **Example** + ```xml @@ -1910,11 +2037,53 @@ Parameters: asynchronous_insert_log
7500 toYYYYMM(event_date) + 1048576 + 8192 + 524288 + false
``` +## crash_log {#server_configuration_parameters-crash_log} + +Settings for the [crash_log](../../operations/system-tables/crash-log.md) system table operation. + +Parameters: + +- `database` — Database for storing a table. +- `table` — Table name. +- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined. +- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. +- `storage_policy` – Name of storage policy to use for the table (optional) +- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). + +The default server configuration file `config.xml` contains the following settings section: + +``` xml + + system + crash_log
+ toYYYYMM(event_date) + 7500 + 1024 + 1024 + 512 + false +
+``` + ## query_masking_rules {#query-masking-rules} Regexp-based rules, which will be applied to queries as well as all log messages before storing them in server logs, @@ -2119,6 +2288,8 @@ This section contains the following parameters: - `session_timeout_ms` — Maximum timeout for the client session in milliseconds. - `operation_timeout_ms` — Maximum timeout for one operation in milliseconds. - `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes) that is used as the root for znodes used by the ClickHouse server. Optional. +- `fallback_session_lifetime.min` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the minimal duration of the fallback session. Set in seconds. Optional. Default is 3 hours. +- `fallback_session_lifetime.max` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the maximum duration of the fallback session. Set in seconds. Optional. Default is 6 hours. - `identity` — User and password, that can be required by ZooKeeper to give access to requested znodes. Optional. - zookeeper_load_balancing - Specifies the algorithm of ZooKeeper node selection. * random - randomly selects one of ZooKeeper nodes. diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index eb1d5db5676..86d24c3a942 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -7,90 +7,20 @@ pagination_next: en/operations/settings/settings # Settings Overview -There are multiple ways to define ClickHouse settings. Settings are configured in layers, and each subsequent layer redefines the previous values of a setting. +:::note +XML-based Settings Profiles and [configuration files](https://clickhouse.com/docs/en/operations/configuration-files) are currently not supported for ClickHouse Cloud. To specify settings for your ClickHouse Cloud service, you must use [SQL-driven Settings Profiles](https://clickhouse.com/docs/en/operations/access-rights#settings-profiles-management). +::: -The order of priority for defining a setting is: +There are two main groups of ClickHouse settings: -1. Settings in the `users.xml` server configuration file +- Global server settings +- Query-level settings - - Set in the element ``. +The main distinction between global server settings and query-level settings is that +global server settings must be set in configuration files while query-level settings +can be set in configuration files or with SQL queries. -2. Session settings +Read about [global server settings](/docs/en/operations/server-configuration-parameters/settings.md) to learn more about configuring your ClickHouse server at the global server level. - - Send `SET setting=value` from the ClickHouse console client in interactive mode. - Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you need to specify the `session_id` HTTP parameter. +Read about [query-level settings](/docs/en/operations/settings/settings-query-level.md) to learn more about configuring your ClickHouse server at the query-level. -3. Query settings - - - When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`. - - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). - - Define settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) clause of the SELECT query. The setting value is applied only to that query and is reset to the default or previous value after the query is executed. - -View the [Settings](./settings.md) page for a description of the ClickHouse settings. - -## Converting a Setting to its Default Value - -If you change a setting and would like to revert it back to its default value, set the value to `DEFAULT`. The syntax looks like: - -```sql -SET setting_name = DEFAULT -``` - -For example, the default value of `max_insert_block_size` is 1048449. Suppose you change its value to 100000: - -```sql -SET max_insert_block_size=100000; - -SELECT value FROM system.settings where name='max_insert_block_size'; -``` - -The response is: - -```response -┌─value──┐ -│ 100000 │ -└────────┘ -``` - -The following command sets its value back to 1048449: - -```sql -SET max_insert_block_size=DEFAULT; - -SELECT value FROM system.settings where name='max_insert_block_size'; -``` - -The setting is now back to its default: - -```response -┌─value───┐ -│ 1048449 │ -└─────────┘ -``` - - -## Custom Settings {#custom_settings} - -In addition to the common [settings](../../operations/settings/settings.md), users can define custom settings. - -A custom setting name must begin with one of predefined prefixes. The list of these prefixes must be declared in the [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) parameter in the server configuration file. - -```xml -custom_ -``` - -To define a custom setting use `SET` command: - -```sql -SET custom_a = 123; -``` - -To get the current value of a custom setting use `getSetting()` function: - -```sql -SELECT getSetting('custom_a'); -``` - -**See Also** - -- [Server Configuration Settings](../../operations/server-configuration-parameters/settings.md) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 4122b4af40f..8ea599b9861 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -56,11 +56,11 @@ Possible values: - Any positive integer. -Default value: 300. +Default value: 3000. To achieve maximum performance of `SELECT` queries, it is necessary to minimize the number of parts processed, see [Merge Tree](../../development/architecture.md#merge-tree). -You can set a larger value to 600 (1200), this will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300. +Prior to 23.6 this setting was set to 300. You can set a higher different value, it will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300. ## parts_to_delay_insert {#parts-to-delay-insert} diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 163ed5d5826..14d612be831 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -298,7 +298,7 @@ Default value: `THROW`. - [JOIN clause](../../sql-reference/statements/select/join.md#select-join) - [Join table engine](../../engines/table-engines/special/join.md) -## max_partitions_per_insert_block {#max-partitions-per-insert-block} +## max_partitions_per_insert_block {#settings-max_partitions_per_insert_block} Limits the maximum number of partitions in a single inserted block. @@ -309,9 +309,18 @@ Default value: 100. **Details** -When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse throws an exception with the following text: +When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse either logs a warning or throws an exception based on `throw_on_max_partitions_per_insert_block`. Exceptions have the following text: -> “Too many partitions for single INSERT block (more than” + toString(max_parts) + “). The limit is controlled by ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).” +> “Too many partitions for a single INSERT block (`partitions_count` partitions, limit is ” + toString(max_partitions) + “). The limit is controlled by the ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).” + +## throw_on_max_partitions_per_insert_block {#settings-throw_on_max_partition_per_insert_block} + +Allows you to control behaviour when `max_partitions_per_insert_block` is reached. + +- `true` - When an insert block reaches `max_partitions_per_insert_block`, an exception is raised. +- `false` - Logs a warning when `max_partitions_per_insert_block` is reached. + +Default value: `true` ## max_temporary_data_on_disk_size_for_user {#settings_max_temporary_data_on_disk_size_for_user} @@ -327,3 +336,39 @@ The maximum amount of data consumed by temporary files on disk in bytes for all Zero means unlimited. Default value: 0. + +## max_sessions_for_user {#max-sessions-per-user} + +Maximum number of simultaneous sessions per authenticated user to the ClickHouse server. + +Example: + +``` xml + + + 1 + + + 2 + + + 0 + + + + + + single_session_user + + + + two_sessions_profile + + + + unlimited_sessions_profile + + +``` + +Default value: 0 (Infinite count of simultaneous sessions). diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 1b22a6d1223..beb1d372e08 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -242,6 +242,26 @@ See also: - [DateTime data type.](../../sql-reference/data-types/datetime.md) - [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) +## interval_output_format {#interval_output_format} + +Allows choosing different output formats of the text representation of interval types. + +Possible values: + +- `kusto` - KQL-style output format. + + ClickHouse outputs intervals in [KQL format](https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-timespan-format-strings#the-constant-c-format-specifier). For example, `toIntervalDay(2)` would be formatted as `2.00:00:00`. Please note that for interval types of varying length (ie. `IntervalMonth` and `IntervalYear`) the average number of seconds per interval is taken into account. + +- `numeric` - Numeric output format. + + ClickHouse outputs intervals as their underlying numeric representation. For example, `toIntervalDay(2)` would be formatted as `2`. + +Default value: `numeric`. + +See also: + +- [Interval](../../sql-reference/data-types/special-data-types/interval.md) + ## input_format_ipv4_default_on_conversion_error {#input_format_ipv4_default_on_conversion_error} Deserialization of IPv4 will use default values instead of throwing exception on conversion error. @@ -301,6 +321,10 @@ If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` ar This parameter is useful when you are using formats that require a schema definition, such as [Cap’n Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format. +## output_format_schema {#output-format-schema} + +The path to the file where the automatically generated schema will be saved in [Cap’n Proto](../../interfaces/formats.md#capnproto-capnproto) or [Protobuf](../../interfaces/formats.md#protobuf-protobuf) formats. + ## output_format_enable_streaming {#output_format_enable_streaming} Enable streaming in output formats that support it. @@ -931,6 +955,11 @@ Result ```text " string " ``` +### input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns} + +ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. + +Disabled by default. ### input_format_csv_allow_whitespace_or_tab_as_delimiter {#input_format_csv_allow_whitespace_or_tab_as_delimiter} @@ -964,6 +993,28 @@ Result a b ``` +### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values} + +Allow to set default value to column when CSV field deserialization failed on bad value + +Default value: `false`. + +**Examples** + +Query + +```bash +./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x" +echo 'a,b,c' | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV" +./clickhouse local -q "select * from test_tbl" +``` + +Result + +```text +a 0 1971-01-01 +``` + ## Values format settings {#values-format-settings} ### input_format_values_interpret_expressions {#input_format_values_interpret_expressions} @@ -1061,17 +1112,6 @@ Default value: 1. ## Arrow format settings {#arrow-format-settings} -### input_format_arrow_import_nested {#input_format_arrow_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_arrow_case_insensitive_column_matching {#input_format_arrow_case_insensitive_column_matching} Ignore case when matching Arrow column names with ClickHouse column names. @@ -1117,21 +1157,10 @@ Enabled by default. Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed) -Default value: `none`. +Default value: `lz4_frame`. ## ORC format settings {#orc-format-settings} -### input_format_orc_import_nested {#input_format_orc_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_orc_row_batch_size {#input_format_orc_row_batch_size} Batch size when reading ORC stripes. @@ -1170,17 +1199,6 @@ Default value: `none`. ## Parquet format settings {#parquet-format-settings} -### input_format_parquet_import_nested {#input_format_parquet_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_parquet_case_insensitive_column_matching {#input_format_parquet_case_insensitive_column_matching} Ignore case when matching Parquet column names with ClickHouse column names. @@ -1283,6 +1301,11 @@ When serializing Nullable columns with Google wrappers, serialize default values Disabled by default. +### format_protobuf_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema} + +Use autogenerated Protobuf schema when [format_schema](#formatschema-format-schema) is not set. +The schema is generated from ClickHouse table structure using function [structureToProtobufSchema](../../sql-reference/functions/other-functions.md#structure_to_protobuf_schema) + ## Avro format settings {#avro-format-settings} ### input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields} @@ -1300,6 +1323,17 @@ Default value: 0. Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format. +Format: +``` text +http://[user:password@]machine[:port]" +``` + +Examples: +``` text +http://registry.example.com:8081 +http://admin:secret@registry.example.com:8081 +``` + Default value: `Empty`. ### output_format_avro_codec {#output_format_avro_codec} @@ -1568,6 +1602,11 @@ Possible values: Default value: `'by_values'`. +### format_capn_proto_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema} + +Use autogenerated CapnProto schema when [format_schema](#formatschema-format-schema) is not set. +The schema is generated from ClickHouse table structure using function [structureToCapnProtoSchema](../../sql-reference/functions/other-functions.md#structure_to_capnproto_schema) + ## MySQLDump format settings {#musqldump-format-settings} ### input_format_mysql_dump_table_name (#input_format_mysql_dump_table_name) diff --git a/docs/en/operations/settings/settings-profiles.md b/docs/en/operations/settings/settings-profiles.md index 2f39a75453c..d08266b8ef8 100644 --- a/docs/en/operations/settings/settings-profiles.md +++ b/docs/en/operations/settings/settings-profiles.md @@ -39,7 +39,7 @@ Example: 8
- + 1000000000 100000000000 @@ -67,6 +67,8 @@ Example: 50 100 + 4 + 1 diff --git a/docs/en/operations/settings/settings-query-level.md b/docs/en/operations/settings/settings-query-level.md new file mode 100644 index 00000000000..81cc2294a4c --- /dev/null +++ b/docs/en/operations/settings/settings-query-level.md @@ -0,0 +1,217 @@ +--- +sidebar_label: Query-level Settings +title: Query-level Settings +slug: /en/operations/settings/query-level +--- + +There are multiple ways to set ClickHouse query-level settings. Settings are configured in layers, and each subsequent layer redefines the previous values of a setting. + +The order of priority for defining a setting is: + +1. Applying a setting to a user directly, or within a settings profile + + - SQL (recommended) + - adding one or more XML or YAML files to `/etc/clickhouse-server/users.d` + +2. Session settings + + - Send `SET setting=value` from the ClickHouse Cloud SQL console or + `clickhouse client` in interactive mode. Similarly, you can use ClickHouse + sessions in the HTTP protocol. To do this, you need to specify the + `session_id` HTTP parameter. + +3. Query settings + + - When starting `clickhouse client` in non-interactive mode, set the startup + parameter `--setting=value`. + - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). + - Define settings in the + [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) + clause of the SELECT query. The setting value is applied only to that query + and is reset to the default or previous value after the query is executed. + +## Examples + +These examples all set the value of the `async_insert` setting to `1`, and +show how to examine the settings in a running system. + +### Using SQL to apply a setting to a user directly + +This creates the user `ingester` with the setting `async_inset = 1`: + +```sql +CREATE USER ingester +IDENTIFIED WITH sha256_hash BY '7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3' +# highlight-next-line +SETTINGS async_insert = 1 +``` + +#### Examine the settings profile and assignment + +```sql +SHOW ACCESS +``` + +```response +┌─ACCESS─────────────────────────────────────────────────────────────────────────────┐ +│ ... │ +# highlight-next-line +│ CREATE USER ingester IDENTIFIED WITH sha256_password SETTINGS async_insert = true │ +│ ... │ +└────────────────────────────────────────────────────────────────────────────────────┘ +``` +### Using SQL to create a settings profile and assign to a user + +This creates the profile `log_ingest` with the setting `async_inset = 1`: + +```sql +CREATE +SETTINGS PROFILE log_ingest SETTINGS async_insert = 1 +``` + +This creates the user `ingester` and assigns the user the settings profile `log_ingest`: + +```sql +CREATE USER ingester +IDENTIFIED WITH sha256_hash BY '7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3' +# highlight-next-line +SETTINGS PROFILE log_ingest +``` + + +### Using XML to create a settings profile and user + +```xml title=/etc/clickhouse-server/users.d/users.xml + +# highlight-start + + + 1 + + +# highlight-end + + + + 7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3 +# highlight-start + log_ingest +# highlight-end + + + 7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3 + 1 + 1 + + + +``` + +#### Examine the settings profile and assignment + +```sql +SHOW ACCESS +``` + +```response +┌─ACCESS─────────────────────────────────────────────────────────────────────────────┐ +│ CREATE USER default IDENTIFIED WITH sha256_password │ +# highlight-next-line +│ CREATE USER ingester IDENTIFIED WITH sha256_password SETTINGS PROFILE log_ingest │ +│ CREATE SETTINGS PROFILE default │ +# highlight-next-line +│ CREATE SETTINGS PROFILE log_ingest SETTINGS async_insert = true │ +│ CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1 │ +│ ... │ +└────────────────────────────────────────────────────────────────────────────────────┘ +``` + +### Assign a setting to a session + +```sql +SET async_insert =1; +SELECT value FROM system.settings where name='async_insert'; +``` + +```response +┌─value──┐ +│ 1 │ +└────────┘ +``` + +### Assign a setting during a query + +```sql +INSERT INTO YourTable +# highlight-next-line +SETTINGS async_insert=1 +VALUES (...) +``` + + +## Converting a Setting to its Default Value + +If you change a setting and would like to revert it back to its default value, set the value to `DEFAULT`. The syntax looks like: + +```sql +SET setting_name = DEFAULT +``` + +For example, the default value of `async_insert` is `0`. Suppose you change its value to `1`: + +```sql +SET async_insert = 1; + +SELECT value FROM system.settings where name='async_insert'; +``` + +The response is: + +```response +┌─value──┐ +│ 1 │ +└────────┘ +``` + +The following command sets its value back to 0: + +```sql +SET async_insert = DEFAULT; + +SELECT value FROM system.settings where name='async_insert'; +``` + +The setting is now back to its default: + +```response +┌─value───┐ +│ 0 │ +└─────────┘ +``` + +## Custom Settings {#custom_settings} + +In addition to the common [settings](../../operations/settings/settings.md), users can define custom settings. + +A custom setting name must begin with one of predefined prefixes. The list of these prefixes must be declared in the [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) parameter in the server configuration file. + +```xml +custom_ +``` + +To define a custom setting use `SET` command: + +```sql +SET custom_a = 123; +``` + +To get the current value of a custom setting use `getSetting()` function: + +```sql +SELECT getSetting('custom_a'); +``` + +**See Also** + +- View the [Settings](./settings.md) page for a description of the ClickHouse settings. +- [Global server settings](../../operations/server-configuration-parameters/settings.md) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 8b969f87a4d..29434ef556f 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -98,6 +98,18 @@ Default value: 0. ``` +## mutations_execute_nondeterministic_on_initiator {#mutations_execute_nondeterministic_on_initiator} + +If true constant nondeterministic functions (e.g. function `now()`) are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. It helps to keep data in sync on replicas while executing mutations with constant nondeterministic functions. Default value: `false`. + +## mutations_execute_subqueries_on_initiator {#mutations_execute_subqueries_on_initiator} + +If true scalar subqueries are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. Default value: `false`. + +## mutations_max_literal_size_to_replace {#mutations_max_literal_size_to_replace} + +The maximum size of serialized literal in bytes to replace in `UPDATE` and `DELETE` queries. Takes effect only if at least one the two settings above is enabled. Default value: 16384 (16 KiB). + ## distributed_product_mode {#distributed-product-mode} Changes the behaviour of [distributed subqueries](../../sql-reference/operators/in.md). @@ -537,6 +549,8 @@ Possible values: The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned. + Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`. + - hash [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section. @@ -3466,6 +3480,12 @@ Possible values: Default value: `0`. +## enable_url_encoding {#enable_url_encoding} + +Allows to enable/disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. + +Enabled by default. + ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} Adds a modifier `SYNC` to all `DROP` and `DETACH` queries. @@ -4290,7 +4310,7 @@ Use this setting only for backward compatibility if your use cases depend on old ## session_timezone {#session_timezone} Sets the implicit time zone of the current session or query. -The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone. +The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone. The setting takes precedence over the globally configured (server-level) implicit time zone. A value of '' (empty string) means that the implicit time zone of the current session or query is equal to the [server time zone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone). @@ -4325,7 +4345,7 @@ SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zuric ``` :::warning -Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors. +Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors. See the following example and explanation. ::: @@ -4524,6 +4544,7 @@ This setting allows to specify renaming pattern for files processed by `file` ta ### Placeholders +- `%a` — Full original filename (e.g., "sample.csv"). - `%f` — Original filename without extension (e.g., "sample"). - `%e` — Original file extension with dot (e.g., ".csv"). - `%t` — Timestamp (in microseconds). @@ -4569,3 +4590,39 @@ Type: Int64 Default: 0 +## rewrite_count_distinct_if_with_count_distinct_implementation + +Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#settings-count_distinct_implementation) setting. + +Possible values: + +- true — Allow. +- false — Disallow. + +Default value: `false`. + +## precise_float_parsing {#precise_float_parsing} + +Switches [Float32/Float64](../../sql-reference/data-types/float.md) parsing algorithms: +* If the value is `1`, then precise method is used. It is slower than fast method, but it always returns a number that is the closest machine representable number to the input. +* Otherwise, fast method is used (default). It usually returns the same value as precise, but in rare cases result may differ by one or two least significant digits. + +Possible values: `0`, `1`. + +Default value: `0`. + +Example: + +```sql +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7090999999999998 │ 15008753.000000002 │ +└─────────────────────┴──────────────────────────┘ + +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7091 │ 15008753 │ +└─────────────────────┴──────────────────────────┘ +``` diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index f357341da67..e46b495239c 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -32,6 +32,10 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 └─────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` + + ## Metric descriptions @@ -483,6 +487,14 @@ The value is similar to `OSUserTime` but divided to the number of CPU cores to b Number of threads in the server of the PostgreSQL compatibility protocol. +### QueryCacheBytes + +Total size of the query cache cache in bytes. + +### QueryCacheEntries + +Total number of entries in the query cache. + ### ReplicasMaxAbsoluteDelay Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data. diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md index ba5602ee292..d2b90a49b0d 100644 --- a/docs/en/operations/system-tables/events.md +++ b/docs/en/operations/system-tables/events.md @@ -10,6 +10,9 @@ Columns: - `event` ([String](../../sql-reference/data-types/string.md)) — Event name. - `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred. - `description` ([String](../../sql-reference/data-types/string.md)) — Event description. +- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `event`. + +You can find all supported events in source file [src/Common/ProfileEvents.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ProfileEvents.cpp). **Example** diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index 1b720098fc7..a46f306f677 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -47,6 +47,10 @@ An example: ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024 --> 7500 + 1048576 + 8192 + 524288 + false ``` diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md index d8539908bf7..557835ce3b6 100644 --- a/docs/en/operations/system-tables/merge_tree_settings.md +++ b/docs/en/operations/system-tables/merge_tree_settings.md @@ -7,11 +7,17 @@ Contains information about settings for `MergeTree` tables. Columns: -- `name` (String) — Setting name. -- `value` (String) — Setting value. -- `description` (String) — Setting description. -- `type` (String) — Setting type (implementation specific string value). -- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. +- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name. +- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value. +- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Whether the setting was explicitly defined in the config or explicitly changed. +- `description` ([String](../../sql-reference/data-types/string.md)) — Setting description. +- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no minimum value, contains [NULL](../../sql-reference/syntax.md#null-literal). +- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no maximum value, contains [NULL](../../sql-reference/syntax.md#null-literal). +- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting: + - `0` — Current user can change the setting. + - `1` — Current user can’t change the setting. +- `type` ([String](../../sql-reference/data-types/string.md)) — Setting type (implementation specific string value). +- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete. **Example** ```sql @@ -21,35 +27,51 @@ SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical; ```response Row 1: ────── +name: min_compress_block_size +value: 0 +changed: 0 +description: When granule is written, compress the data in buffer if the size of pending uncompressed data is larger or equal than the specified threshold. If this setting is not set, the corresponding global setting is used. +min: ____ +max: ____ +readonly: 0 +type: UInt64 +is_obsolete: 0 + +Row 2: +────── +name: max_compress_block_size +value: 0 +changed: 0 +description: Compress the pending uncompressed data in buffer if its size is larger or equal than the specified threshold. Block of data will be compressed even if the current granule is not finished. If this setting is not set, the corresponding global setting is used. +min: ____ +max: ____ +readonly: 0 +type: UInt64 +is_obsolete: 0 + +Row 3: +────── name: index_granularity value: 8192 changed: 0 description: How many rows correspond to one primary key value. -type: SettingUInt64 - -Row 2: -────── -name: min_bytes_for_wide_part -value: 0 -changed: 0 -description: Minimal uncompressed size in bytes to create part in wide format instead of compact -type: SettingUInt64 - -Row 3: -────── -name: min_rows_for_wide_part -value: 0 -changed: 0 -description: Minimal number of rows to create part in wide format instead of compact -type: SettingUInt64 +min: ____ +max: ____ +readonly: 0 +type: UInt64 +is_obsolete: 0 Row 4: ────── -name: merge_max_block_size -value: 8192 +name: max_digestion_size_per_segment +value: 268435456 changed: 0 -description: How many rows in blocks should be formed for merge operations. -type: SettingUInt64 +description: Max number of bytes to digest per segment to build GIN index. +min: ____ +max: ____ +readonly: 0 +type: UInt64 +is_obsolete: 0 -4 rows in set. Elapsed: 0.001 sec. +4 rows in set. Elapsed: 0.009 sec. ``` diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 5a7dfd03eb4..ae0e7620d35 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -10,8 +10,9 @@ Columns: - `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. - `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value. - `description` ([String](../../sql-reference/data-types/string.md)) — Metric description. +- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `metric`. -The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse. +You can find all supported metrics in source file [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp). **Example** diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index 5829e5ad313..8113b850a38 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -39,6 +39,8 @@ Columns: - `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. +- `primary_key_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) used by primary key values in the primary.idx/cidx file on disk. + - `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks. - `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index b9fdd19c643..7143520835f 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -48,7 +48,7 @@ Columns: - `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it’s `read_rows` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. - `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it’s `read_bytes` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. - `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes (uncompressed). For other queries, the column value is 0. - `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query. - `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result. - `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query. @@ -111,6 +111,11 @@ Columns: - `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. - `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. +- `query_cache_usage` ([Enum8](../../sql-reference/data-types/enum.md)) — Usage of the [query cache](../query-cache.md) during query execution. Values: + - `'Unknown'` = Status unknown. + - `'None'` = The query result was neither written into nor read from the query cache. + - `'Write'` = The query result was written into the query cache. + - `'Read'` = The query result was read from the query cache. **Example** @@ -186,6 +191,7 @@ used_formats: [] used_functions: [] used_storages: [] used_table_functions: [] +query_cache_usage: None ``` **See Also** diff --git a/docs/en/operations/system-tables/server_settings.md b/docs/en/operations/system-tables/server_settings.md index 3085b1acaf4..df482261ae8 100644 --- a/docs/en/operations/system-tables/server_settings.md +++ b/docs/en/operations/system-tables/server_settings.md @@ -14,6 +14,7 @@ Columns: - `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml` - `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description. - `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type. +- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete. **Example** @@ -26,14 +27,22 @@ WHERE name LIKE '%thread_pool%' ``` ``` text -┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐ -│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ -│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ -│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ -│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ -│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ -│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ -└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘ +┌─name────────────────────────────────────────_─value─_─default─_─changed─_─description────────────────────────────────────────────────────────────────────────────────────────────────────── +───────────────────────────────────_─type───_─is_obsolete─┐ +│ max_thread_pool_size │ 10000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ 0 │ +│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ 0 │ +│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ 0 │ +│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ 0 │ +│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ 0 │ +│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ 0 │ +│ max_active_parts_loading_thread_pool_size │ 64 │ 64 │ 0 │ The number of threads to load active set of data parts (Active ones) at startup. │ UInt64 │ 0 │ +│ max_outdated_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup. │ UInt64 │ 0 │ +│ max_parts_cleaning_thread_pool_size │ 128 │ 128 │ 0 │ The number of threads for concurrent removal of inactive data parts. │ UInt64 │ 0 │ +│ max_backups_io_thread_pool_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that would be used for IO operations for BACKUP queries │ UInt64 │ 0 │ +│ max_backups_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for backups IO thread pool. │ UInt64 │ 0 │ +│ backups_io_thread_pool_queue_size │ 0 │ 0 │ 0 │ Queue size for backups IO thread pool. │ UInt64 │ 0 │ +└─────────────────────────────────────────────┴───────┴─────────┴─────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +───────────────────────────────────┴────────┴─────────────┘ ``` Using of `WHERE changed` can be useful, for example, when you want to check diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md index afae45077cc..7dd2345a2d0 100644 --- a/docs/en/operations/system-tables/settings.md +++ b/docs/en/operations/system-tables/settings.md @@ -17,6 +17,7 @@ Columns: - `0` — Current user can change the setting. - `1` — Current user can’t change the setting. - `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value. +- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) _ Shows whether a setting is obsolete. **Example** @@ -29,11 +30,14 @@ WHERE name LIKE '%min_i%' ``` ``` text -┌─name────────────────────────────────────────┬─value─────┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─min──┬─max──┬─readonly─┐ -│ min_insert_block_size_rows │ 1048576 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ -│ min_insert_block_size_bytes │ 268435456 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ -│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ -└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘ +┌─name───────────────────────────────────────────────_─value─────_─changed─_─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────_─min──_─max──_─readonly─_─type─────────_─default───_─alias_for─_─is_obsolete─┐ +│ min_insert_block_size_rows │ 1048449 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ____ │ ____ │ 0 │ UInt64 │ 1048449 │ │ 0 │ +│ min_insert_block_size_bytes │ 268402944 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ____ │ ____ │ 0 │ UInt64 │ 268402944 │ │ 0 │ +│ min_insert_block_size_rows_for_materialized_views │ 0 │ 0 │ Like min_insert_block_size_rows, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_rows) │ ____ │ ____ │ 0 │ UInt64 │ 0 │ │ 0 │ +│ min_insert_block_size_bytes_for_materialized_views │ 0 │ 0 │ Like min_insert_block_size_bytes, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_bytes) │ ____ │ ____ │ 0 │ UInt64 │ 0 │ │ 0 │ +│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ____ │ ____ │ 0 │ Milliseconds │ 1000 │ │ 0 │ +└────────────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────── +──────────────────────────────────────────────────────┴──────┴──────┴──────────┴──────────────┴───────────┴───────────┴─────────────┘ ``` Using of `WHERE changed` can be useful, for example, when you want to check: diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md index 77f816fe428..2140d22b620 100644 --- a/docs/en/operations/utilities/clickhouse-keeper-client.md +++ b/docs/en/operations/utilities/clickhouse-keeper-client.md @@ -11,7 +11,7 @@ A client application to interact with clickhouse-keeper by its native protocol. - `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode. - `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. -- `-p N`, `--port=N` — Server port. Default value: 2181 +- `-p N`, `--port=N` — Server port. Default value: 9181 - `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s. - `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s. - `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s. @@ -21,8 +21,8 @@ A client application to interact with clickhouse-keeper by its native protocol. ## Example {#clickhouse-keeper-client-example} ```bash -./clickhouse-keeper-client -h localhost:2181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30 -Connected to ZooKeeper at [::1]:2181 with session_id 137 +./clickhouse-keeper-client -h localhost:9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30 +Connected to ZooKeeper at [::1]:9181 with session_id 137 / :) ls keeper foo bar / :) cd keeper @@ -45,9 +45,14 @@ keeper foo bar - `ls [path]` -- Lists the nodes for the given path (default: cwd) - `cd [path]` -- Change the working path (default `.`) - `set [version]` -- Updates the node's value. Only update if version matches (default: -1) -- `create ` -- Creates new node +- `create [mode]` -- Creates new node with the set value +- `touch ` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists - `get ` -- Returns the node's value - `remove ` -- Remove the node - `rmr ` -- Recursively deletes path. Confirmation required - `flwc ` -- Executes four-letter-word command - `help` -- Prints this message +- `get_stat [path]` -- Returns the node's stat (default `.`) +- `find_super_nodes [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`) +- `delete_stale_backups` -- Deletes ClickHouse nodes used for backups that are now inactive +- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10) diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index 0443a80cf17..0b17afb7e12 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -34,7 +34,13 @@ The binary you just downloaded can run all sorts of ClickHouse tools and utiliti A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL. -If the file is sitting on the same machine as `clickhouse-local`, use the `file` table engine. The following `reviews.tsv` file contains a sampling of Amazon product reviews: +If the file is sitting on the same machine as `clickhouse-local`, you can simple specify the file to load. The following `reviews.tsv` file contains a sampling of Amazon product reviews: + +```bash +./clickhouse local -q "SELECT * FROM 'reviews.tsv'" +``` + +This command is a shortcut of: ```bash ./clickhouse local -q "SELECT * FROM file('reviews.tsv')" diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index a395b350a55..18ff5073e3f 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -300,7 +300,7 @@ SELECT groupArrayResample(30, 75, 30)(name, age) FROM people Consider the results. -`Jonh` is out of the sample because he’s too young. Other people are distributed according to the specified age intervals. +`John` is out of the sample because he’s too young. Other people are distributed according to the specified age intervals. Now let’s count the total number of people and their average wage in the specified age intervals. diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index db19f524b31..f79fe66c05d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -12,3 +12,5 @@ To get a determinate result, you can use the ‘min’ or ‘max’ function ins In some cases, you can rely on the order of execution. This applies to cases when SELECT comes from a subquery that uses ORDER BY. When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function. + +- Alias: `any_value` diff --git a/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md new file mode 100644 index 00000000000..3c71129bdb5 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md @@ -0,0 +1,32 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/array_concat_agg +sidebar_position: 110 +--- + +# array_concat_agg +- Alias of `groupArrayArray`. The function is case insensitive. + +**Example** + +```text +SELECT * +FROM t + +┌─a───────┐ +│ [1,2,3] │ +│ [4,5] │ +│ [6] │ +└─────────┘ + +``` + +Query: + +```sql +SELECT array_concat_agg(a) AS a +FROM t + +┌─a─────────────┐ +│ [1,2,3,4,5,6] │ +└───────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md index 18048fa4f71..ad678443df6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md @@ -44,3 +44,5 @@ Result: ``` The groupArray function will remove ᴺᵁᴸᴸ value based on the above results. + +- Alias: `array_agg`. diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 0da273e01ad..c99c8791542 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -140,8 +140,9 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse - [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) - [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) - [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format) +- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) - [The `Date` data type](../../sql-reference/data-types/date.md) diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md index 793691850b1..3b80e8b1a8b 100644 --- a/docs/en/sql-reference/data-types/datetime64.md +++ b/docs/en/sql-reference/data-types/datetime64.md @@ -119,6 +119,7 @@ FROM dt; - [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format) - [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-for-working-with-dates-and-times) - [`Date` data type](../../sql-reference/data-types/date.md) - [`DateTime` data type](../../sql-reference/data-types/datetime.md) diff --git a/docs/en/sql-reference/data-types/tuple.md b/docs/en/sql-reference/data-types/tuple.md index dfe0eda2e21..3add754e6b6 100644 --- a/docs/en/sql-reference/data-types/tuple.md +++ b/docs/en/sql-reference/data-types/tuple.md @@ -4,7 +4,7 @@ sidebar_position: 54 sidebar_label: Tuple(T1, T2, ...) --- -# Tuple(t1, T2, …) +# Tuple(T1, T2, …) A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types). Tuple must contain at least one element. diff --git a/docs/en/sql-reference/data-types/uuid.md b/docs/en/sql-reference/data-types/uuid.md index b0f19f0d8be..40f756b9588 100644 --- a/docs/en/sql-reference/data-types/uuid.md +++ b/docs/en/sql-reference/data-types/uuid.md @@ -6,42 +6,42 @@ sidebar_label: UUID # UUID -A universally unique identifier (UUID) is a 16-byte number used to identify records. For detailed information about the UUID, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier). +A Universally Unique Identifier (UUID) is a 16-byte value used to identify records. For detailed information about UUIDs, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier). -The example of UUID type value is represented below: +While different UUID variants exist (see [here](https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis)), ClickHouse does not validate that inserted UUIDs conform to a particular variant. UUIDs are internally treated as a sequence of 16 random bytes with [8-4-4-4-12 representation](https://en.wikipedia.org/wiki/Universally_unique_identifier#Textual_representation) at SQL level. + +Example UUID value: ``` text 61f0c404-5cb3-11e7-907b-a6006ad3dba0 ``` -If you do not specify the UUID column value when inserting a new record, the UUID value is filled with zero: +The default UUID is all-zero. It is used, for example, when a new record is inserted but no value for a UUID column is specified: ``` text 00000000-0000-0000-0000-000000000000 ``` -## How to Generate +## Generating UUIDs -To generate the UUID value, ClickHouse provides the [generateUUIDv4](../../sql-reference/functions/uuid-functions.md) function. +ClickHouse provides the [generateUUIDv4](../../sql-reference/functions/uuid-functions.md) function to generate random UUID version 4 values. ## Usage Example **Example 1** -This example demonstrates creating a table with the UUID type column and inserting a value into the table. +This example demonstrates the creation of a table with a UUID column and the insertion of a value into the table. ``` sql CREATE TABLE t_uuid (x UUID, y String) ENGINE=TinyLog -``` -``` sql INSERT INTO t_uuid SELECT generateUUIDv4(), 'Example 1' -``` -``` sql SELECT * FROM t_uuid ``` +Result: + ``` text ┌────────────────────────────────────x─┬─y─────────┐ │ 417ddc5d-e556-4d27-95dd-a34d84e46a50 │ Example 1 │ @@ -50,13 +50,11 @@ SELECT * FROM t_uuid **Example 2** -In this example, the UUID column value is not specified when inserting a new record. +In this example, no UUID column value is specified when the record is inserted, i.e. the default UUID value is inserted: ``` sql INSERT INTO t_uuid (y) VALUES ('Example 2') -``` -``` sql SELECT * FROM t_uuid ``` diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index 6c3d80683db..dd8031461e0 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -1092,7 +1092,7 @@ Types of sources (`source_type`): - [Local file](#local_file) - [Executable File](#executable) - [Executable Pool](#executable_pool) -- [HTTP(s)](#http) +- [HTTP(S)](#http) - DBMS - [ODBC](#odbc) - [MySQL](#mysql) @@ -1102,7 +1102,7 @@ Types of sources (`source_type`): - [Cassandra](#cassandra) - [PostgreSQL](#postgresql) -## Local File {#local_file} +### Local File {#local_file} Example of settings: @@ -1132,7 +1132,7 @@ When a dictionary with source `FILE` is created via DDL command (`CREATE DICTION - [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function) -## Executable File {#executable} +### Executable File {#executable} Working with executable files depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data. @@ -1161,7 +1161,7 @@ Setting fields: That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node. -## Executable Pool {#executable_pool} +### Executable Pool {#executable_pool} Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts. @@ -1196,9 +1196,9 @@ Setting fields: That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node. -## Http(s) {#https} +### HTTP(S) {#https} -Working with an HTTP(s) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. +Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. Example of settings: @@ -1248,7 +1248,55 @@ Setting fields: When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server. -### Known Vulnerability of the ODBC Dictionary Functionality +### DBMS + +#### ODBC + +You can use this method to connect any database that has an ODBC driver. + +Example of settings: + +``` xml + + + DatabaseName + ShemaName.TableName
+ DSN=some_parameters + SQL_QUERY + SELECT id, value_1, value_2 FROM ShemaName.TableName +
+ +``` + +or + +``` sql +SOURCE(ODBC( + db 'DatabaseName' + table 'SchemaName.TableName' + connection_string 'DSN=some_parameters' + invalidate_query 'SQL_QUERY' + query 'SELECT id, value_1, value_2 FROM db_name.table_name' +)) +``` + +Setting fields: + +- `db` – Name of the database. Omit it if the database name is set in the `` parameters. +- `table` – Name of the table and schema if exists. +- `connection_string` – Connection string. +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `query` – The custom query. Optional parameter. + +:::note +The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared. +::: + +ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database. + +If you have a problems with encodings when using Oracle, see the corresponding [FAQ](/knowledgebase/oracle-odbc) item. + +##### Known Vulnerability of the ODBC Dictionary Functionality :::note When connecting to the database through the ODBC driver connection parameter `Servername` can be substituted. In this case values of `USERNAME` and `PASSWORD` from `odbc.ini` are sent to the remote server and can be compromised. @@ -1277,7 +1325,7 @@ SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db'); ODBC driver will send values of `USERNAME` and `PASSWORD` from `odbc.ini` to `some-server.com`. -### Example of Connecting Postgresql +##### Example of Connecting Postgresql Ubuntu OS. @@ -1358,7 +1406,7 @@ LIFETIME(MIN 300 MAX 360) You may need to edit `odbc.ini` to specify the full path to the library with the driver `DRIVER=/usr/local/lib/psqlodbcw.so`. -### Example of Connecting MS SQL Server +##### Example of Connecting MS SQL Server Ubuntu OS. @@ -1462,55 +1510,7 @@ LAYOUT(FLAT()) LIFETIME(MIN 300 MAX 360) ``` -## DBMS - -### ODBC - -You can use this method to connect any database that has an ODBC driver. - -Example of settings: - -``` xml - - - DatabaseName - ShemaName.TableName
- DSN=some_parameters - SQL_QUERY - SELECT id, value_1, value_2 FROM ShemaName.TableName -
- -``` - -or - -``` sql -SOURCE(ODBC( - db 'DatabaseName' - table 'SchemaName.TableName' - connection_string 'DSN=some_parameters' - invalidate_query 'SQL_QUERY' - query 'SELECT id, value_1, value_2 FROM db_name.table_name' -)) -``` - -Setting fields: - -- `db` – Name of the database. Omit it if the database name is set in the `` parameters. -- `table` – Name of the table and schema if exists. -- `connection_string` – Connection string. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). -- `query` – The custom query. Optional parameter. - -:::note -The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared. -::: - -ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database. - -If you have a problems with encodings when using Oracle, see the corresponding [FAQ](/knowledgebase/oracle-odbc) item. - -### Mysql +#### Mysql Example of settings: @@ -1627,7 +1627,7 @@ SOURCE(MYSQL( )) ``` -### ClickHouse +#### ClickHouse Example of settings: @@ -1680,7 +1680,7 @@ Setting fields: The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. ::: -### Mongodb +#### Mongodb Example of settings: @@ -1723,7 +1723,7 @@ Setting fields: - `options` - MongoDB connection string options (optional parameter). -### Redis +#### Redis Example of settings: @@ -1756,7 +1756,7 @@ Setting fields: - `storage_type` – The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`. - `db_index` – The specific numeric index of Redis logical database. May be omitted, default value is 0. -### Cassandra +#### Cassandra Example of settings: @@ -1798,7 +1798,7 @@ Setting fields: The `column_family` or `where` fields cannot be used together with the `query` field. And either one of the `column_family` or `query` fields must be declared. ::: -### PostgreSQL +#### PostgreSQL Example of settings: @@ -1855,7 +1855,7 @@ Setting fields: The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. ::: -## Null +### Null A special source that can be used to create dummy (empty) dictionaries. Such dictionaries can useful for tests or with setups with separated data and query nodes at nodes with Distributed tables. @@ -2476,52 +2476,3 @@ Dictionary updates (other than loading at first use) do not block queries. Durin We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server. There are also functions for working with OS identifiers and search engines, but they shouldn’t be used. - -## Embedded Dictionaries - - - -ClickHouse contains a built-in feature for working with a geobase. - -This allows you to: - -- Use a region’s ID to get its name in the desired language. -- Use a region’s ID to get the ID of a city, area, federal district, country, or continent. -- Check whether a region is part of another region. -- Get a chain of parent regions. - -All the functions support “translocality,” the ability to simultaneously use different perspectives on region ownership. For more information, see the section “Functions for working with web analytics dictionaries”. - -The internal dictionaries are disabled in the default package. -To enable them, uncomment the parameters `path_to_regions_hierarchy_file` and `path_to_regions_names_files` in the server configuration file. - -The geobase is loaded from text files. - -Place the `regions_hierarchy*.txt` files into the `path_to_regions_hierarchy_file` directory. This configuration parameter must contain the path to the `regions_hierarchy.txt` file (the default regional hierarchy), and the other files (`regions_hierarchy_ua.txt`) must be located in the same directory. - -Put the `regions_names_*.txt` files in the `path_to_regions_names_files` directory. - -You can also create these files yourself. The file format is as follows: - -`regions_hierarchy*.txt`: TabSeparated (no header), columns: - -- region ID (`UInt32`) -- parent region ID (`UInt32`) -- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types do not have values -- population (`UInt32`) — optional column - -`regions_names_*.txt`: TabSeparated (no header), columns: - -- region ID (`UInt32`) -- region name (`String`) — Can’t contain tabs or line feeds, even escaped ones. - -A flat array is used for storing in RAM. For this reason, IDs shouldn’t be more than a million. - -Dictionaries can be updated without restarting the server. However, the set of available dictionaries is not updated. -For updates, the file modification times are checked. If a file has changed, the dictionary is updated. -The interval to check for changes is configured in the `builtin_dictionaries_reload_interval` parameter. -Dictionary updates (other than loading at first use) do not block queries. During updates, queries use the old versions of dictionaries. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. - -We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server. - -There are also functions for working with OS identifiers and search engines, but they shouldn’t be used. diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index 64fae0e82f0..69f1816b7df 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -6,9 +6,20 @@ sidebar_label: Arithmetic # Arithmetic Functions -The result type of all arithmetic functions is the smallest type which can represent all possible results. Size promotion happens for integers up to 32 bit, e.g. `UInt8 + UInt16 = UInt32`. If one of the inters has 64 or more bits, the result is of the same type as the bigger of the input integers, e.g. `UInt16 + UInt128 = UInt128`. While this introduces a risk of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of 64 bit. +Arithmetic functions work for any two operands of type `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64`. -The result of addition or multiplication of two integers is unsigned unless one of the integers is signed. +Before performing the operation, both operands are casted to the result type. The result type is determined as follows (unless specified +differently in the function documentation below): +- If both operands are up to 32 bits wide, the size of the result type will be the size of the next bigger type following the bigger of the + two operands (integer size promotion). For example, `UInt8 + UInt16 = UInt32` or `Float32 * Float32 = Float64`. +- If one of the operands has 64 or more bits, the size of the result type will be the same size as the bigger of the two operands. For + example, `UInt32 + UInt128 = UInt128` or `Float32 * Float64 = Float64`. +- If one of the operands is signed, the result type will also be signed, otherwise it will be signed. For example, `UInt32 * Int32 = Int64`. + +These rules make sure that the result type will be the smallest type which can represent all possible results. While this introduces a risk +of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of +64 bit. This behavior also guarantees compatibility with many other databases which provide 64 bit integers (BIGINT) as the biggest integer +type. Example: @@ -22,8 +33,6 @@ SELECT toTypeName(0), toTypeName(0 + 0), toTypeName(0 + 0 + 0), toTypeName(0 + 0 └───────────────┴────────────────────────┴─────────────────────────────────┴──────────────────────────────────────────┘ ``` -Arithmetic functions work for any pair of `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64` values. - Overflows are produced the same way as in C++. ## plus @@ -68,7 +77,7 @@ Alias: `a \* b` (operator) ## divide -Calculates the quotient of two values `a` and `b`. The result is always a floating-point value. If you need integer division, you can use the `intDiv` function. +Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../../sql-reference/data-types/float.md). Integer division is provided by the `intDiv` function. Division by 0 returns `inf`, `-inf`, or `nan`. @@ -84,7 +93,7 @@ Alias: `a / b` (operator) Performs an integer division of two values `a` by `b`, i.e. computes the quotient rounded down to the next smallest integer. -The result has the same type as the dividend (the first parameter). +The result has the same width as the dividend (the first parameter). An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one. @@ -135,7 +144,7 @@ intDivOrZero(a, b) Calculates the remainder of the division of two values `a` by `b`. -The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result is a floating-point number. +The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../../sql-reference/data-types/float.md). The remainder is computed like in C++. Truncated division is used for negative numbers. diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 7f2b8f3c605..f66994ed5a6 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -102,6 +102,8 @@ The function also works for strings. Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`. +Alias: `OCTET_LENGTH` + ## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64 ## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64 @@ -142,6 +144,7 @@ range([start, ] end [, step]) - All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments. - An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting. +- Returns Null if any argument has Nullable(Nothing) type. An exception is thrown if any argument has Null value (Nullable(T) type). **Examples** @@ -180,9 +183,8 @@ arrayConcat(arrays) **Arguments** - `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. - **Example** - +**Example** ``` sql SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res @@ -878,7 +880,7 @@ A special function. See the section [“ArrayJoin function”](../../sql-referen ## arrayDifference -Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). +Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). **Syntax** @@ -996,6 +998,24 @@ SELECT └──────────────┴───────────┘ ``` +## arrayJaccardIndex + +Returns the [Jaccard index](https://en.wikipedia.org/wiki/Jaccard_index) of two arrays. + +**Example** + +Query: +``` sql +SELECT arrayJaccardIndex([1, 2], [2, 3]) AS res +``` + +Result: +``` text +┌─res────────────────┐ +│ 0.3333333333333333 │ +└────────────────────┘ +``` + ## arrayReduce Applies an aggregate function to array elements and returns its result. The name of the aggregation function is passed as a string in single quotes `'max'`, `'sum'`. When using parametric aggregate functions, the parameter is indicated after the function name in parentheses `'uniqUpTo(6)'`. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 3f61e7a214d..87d84425029 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -694,10 +694,14 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we Returns year and week for a date. The year in the result may be different from the year in the date argument for the first and the last week of the year. -The mode argument works exactly like the mode argument to `toWeek()`. For the single-argument syntax, a mode value of 0 is used. +The mode argument works like the mode argument to `toWeek()`. For the single-argument syntax, a mode value of 0 is used. `toISOYear()` is a compatibility function that is equivalent to `intDiv(toYearWeek(date,3),100)`. +:::warning +The week number returned by `toYearWeek()` can be different from what the `toWeek()` returns. `toWeek()` always returns week number in the context of the given year, and in case `toWeek()` returns `0`, `toYearWeek()` returns the value corresponding to the last week of previous year. See `prev_yearWeek` in example below. +::: + **Syntax** ``` sql @@ -707,18 +711,18 @@ toYearWeek(t[, mode[, timezone]]) **Example** ``` sql -SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9; +SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9, toYearWeek(toDate('2022-01-01')) AS prev_yearWeek; ``` ``` text -┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┐ -│ 2016-12-27 │ 201652 │ 201652 │ 201701 │ -└────────────┴───────────┴───────────┴───────────┘ +┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┬─prev_yearWeek─┐ +│ 2016-12-27 │ 201652 │ 201652 │ 201701 │ 202152 │ +└────────────┴───────────┴───────────┴───────────┴───────────────┘ ``` ## age -Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second. +Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond. E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit. For an alternative to `age`, see function `date\_diff`. @@ -734,6 +738,8 @@ age('unit', startdate, enddate, [timezone]) - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: + - `microsecond` (possible abbreviations: `us`, `u`) + - `millisecond` (possible abbreviations: `ms`) - `second` (possible abbreviations: `ss`, `s`) - `minute` (possible abbreviations: `mi`, `n`) - `hour` (possible abbreviations: `hh`, `h`) @@ -809,6 +815,8 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: + - `microsecond` (possible abbreviations: `us`, `u`) + - `millisecond` (possible abbreviations: `ms`) - `second` (possible abbreviations: `ss`, `s`) - `minute` (possible abbreviations: `mi`, `n`) - `hour` (possible abbreviations: `hh`, `h`) @@ -1130,6 +1138,8 @@ Result: Returns the current date and time at the moment of query analysis. The function is a constant expression. +Alias: `current_timestamp`. + **Syntax** ``` sql @@ -1260,6 +1270,8 @@ Result: Accepts zero arguments and returns the current date at one of the moments of query analysis. The same as ‘toDate(now())’. +Aliases: `curdate`, `current_date`. + ## yesterday Accepts zero arguments and returns yesterday’s date at one of the moments of query analysis. @@ -1437,7 +1449,7 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %n | new-line character (‘’) | | | %p | AM or PM designation | PM | | %Q | Quarter (1-4) | 1 | -| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p | 10:30 PM | +| %r | 12-hour HH:MM AM/PM time, equivalent to %h:%i %p | 10:30 PM | | %R | 24-hour HH:MM time, equivalent to %H:%i | 22:33 | | %s | second (00-59) | 44 | | %S | second (00-59) | 44 | diff --git a/docs/en/sql-reference/functions/files.md b/docs/en/sql-reference/functions/files.md index 73d72aa50e5..d62cd1db88d 100644 --- a/docs/en/sql-reference/functions/files.md +++ b/docs/en/sql-reference/functions/files.md @@ -18,7 +18,7 @@ file(path[, default]) **Arguments** -- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. +- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. - `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). **Example** diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 26dcccfd42e..bfbd26551d3 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2552,3 +2552,187 @@ Result: This function can be used together with [generateRandom](../../sql-reference/table-functions/generate.md) to generate completely random tables. +## structureToCapnProtoSchema {#structure_to_capn_proto_schema} + +Converts ClickHouse table structure to CapnProto schema. + +**Syntax** + +``` sql +structureToCapnProtoSchema(structure) +``` + +**Arguments** + +- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`. +- `root_struct_name` — Name for root struct in CapnProto schema. Default value - `Message`; + +**Returned value** + +- CapnProto schema + +Type: [String](../../sql-reference/data-types/string.md). + +**Examples** + +Query: + +``` sql +SELECT structureToCapnProtoSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB +``` + +Result: + +``` text +@0xf96402dd754d0eb7; + +struct Message +{ + column1 @0 : Data; + column2 @1 : UInt32; + column3 @2 : List(Data); +} +``` + +Query: + +``` sql +SELECT structureToCapnProtoSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB +``` + +Result: + +``` text +@0xd1c8320fecad2b7f; + +struct Message +{ + struct Column1 + { + union + { + value @0 : Data; + null @1 : Void; + } + } + column1 @0 : Column1; + struct Column2 + { + element1 @0 : UInt32; + element2 @1 : List(Data); + } + column2 @1 : Column2; + struct Column3 + { + struct Entry + { + key @0 : Data; + value @1 : Data; + } + entries @0 : List(Entry); + } + column3 @2 : Column3; +} +``` + +Query: + +``` sql +SELECT structureToCapnProtoSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB +``` + +Result: + +``` text +@0x96ab2d4ab133c6e1; + +struct Root +{ + column1 @0 : Data; + column2 @1 : UInt32; +} +``` + +## structureToProtobufSchema {#structure_to_protobuf_schema} + +Converts ClickHouse table structure to Protobuf schema. + +**Syntax** + +``` sql +structureToProtobufSchema(structure) +``` + +**Arguments** + +- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`. +- `root_message_name` — Name for root message in Protobuf schema. Default value - `Message`; + +**Returned value** + +- Protobuf schema + +Type: [String](../../sql-reference/data-types/string.md). + +**Examples** + +Query: + +``` sql +SELECT structureToProtobufSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB +``` + +Result: + +``` text +syntax = "proto3"; + +message Message +{ + bytes column1 = 1; + uint32 column2 = 2; + repeated bytes column3 = 3; +} +``` + +Query: + +``` sql +SELECT structureToProtobufSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB +``` + +Result: + +``` text +syntax = "proto3"; + +message Message +{ + bytes column1 = 1; + message Column2 + { + uint32 element1 = 1; + repeated bytes element2 = 2; + } + Column2 column2 = 2; + map column3 = 3; +} +``` + +Query: + +``` sql +SELECT structureToProtobufSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB +``` + +Result: + +``` text +syntax = "proto3"; + +message Root +{ + bytes column1 = 1; + uint32 column2 = 2; +} +``` diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 5175bbf0615..e39bdbae06e 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -90,6 +90,8 @@ Returns the length of a string in bytes (not: in characters or Unicode code poin The function also works for arrays. +Alias: `OCTET_LENGTH` + ## lengthUTF8 Returns the length of a string in Unicode code points (not: in bytes or characters). It assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. @@ -573,6 +575,42 @@ Alias: Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +## substringIndex(s, delim, count) + +Returns the substring of `s` before `count` occurrences of the delimiter `delim`, as in Spark or MySQL. + +**Syntax** + +```sql +substringIndex(s, delim, count) +``` +Alias: `SUBSTRING_INDEX` + + +**Arguments** + +- s: The string to extract substring from. [String](../../sql-reference/data-types/string.md). +- delim: The character to split. [String](../../sql-reference/data-types/string.md). +- count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) + +**Example** + +``` sql +SELECT substringIndex('www.clickhouse.com', '.', 2) +``` + +Result: +``` +┌─substringIndex('www.clickhouse.com', '.', 2)─┐ +│ www.clickhouse │ +└──────────────────────────────────────────────┘ +``` + +## substringIndexUTF8(s, delim, count) + +Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + ## appendTrailingCharIfAbsent Appends character `c` to string `s` if `s` is non-empty and does not end with character `c`. @@ -691,6 +729,30 @@ Returns whether string `str` ends with `suffix`. endsWith(str, suffix) ``` +## endsWithUTF8 + +Returns whether string `str` ends with `suffix`, the difference between `endsWithUTF8` and `endsWith` is that `endsWithUTF8` match `str` and `suffix` by UTF-8 characters. + +**Syntax** + +```sql +endsWithUTF8(str, suffix) +``` + +**Example** + +``` sql +SELECT endsWithUTF8('中国', '\xbd'), endsWith('中国', '\xbd') +``` + +Result: + +```result +┌─endsWithUTF8('中国', '½')─┬─endsWith('中国', '½')─┐ +│ 0 │ 1 │ +└──────────────────────────┴──────────────────────┘ +``` + ## startsWith Returns whether string `str` starts with `prefix`. @@ -707,6 +769,25 @@ startsWith(str, prefix) SELECT startsWith('Spider-Man', 'Spi'); ``` +## startsWithUTF8 + +Returns whether string `str` starts with `prefix`, the difference between `startsWithUTF8` and `startsWith` is that `startsWithUTF8` match `str` and `suffix` by UTF-8 characters. + + +**Example** + +``` sql +SELECT startsWithUTF8('中国', '\xe4'), startsWith('中国', '\xe4') +``` + +Result: + +```result +┌─startsWithUTF8('中国', '⥩─┬─startsWith('中国', '⥩─┐ +│ 0 │ 1 │ +└────────────────────────────┴────────────────────────┘ +``` + ## trim Removes the specified characters from the start or end of a string. If not specified otherwise, the function removes whitespace (ASCII-character 32). @@ -1253,3 +1334,48 @@ Result: │ A240 │ └──────────────────┘ ``` + +## initcap + +Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. + +## initcapUTF8 + +Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). + +If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. + +## firstLine + +Returns the first line from a multi-line string. + +**Syntax** + +```sql +firstLine(val) +``` + +**Arguments** + +- `val` - Input value. [String](../data-types/string.md) + +**Returned value** + +- The first line of the input value or the whole value if there is no line + separators. [String](../data-types/string.md) + +**Example** + +```sql +select firstLine('foo\nbar\nbaz'); +``` + +Result: + +```result +┌─firstLine('foo\nbar\nbaz')─┐ +│ foo │ +└────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 3d8f89f7295..c10a1036677 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -631,3 +631,53 @@ Result: │ 100 │ 200 │ 100-200 │ 100 │ └──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘ ``` + +## hasSubsequence + +Returns 1 if needle is a subsequence of haystack, or 0 otherwise. +A subsequence of a string is a sequence that can be derived from the given string by deleting zero or more elements without changing the order of the remaining elements. + + +**Syntax** + +``` sql +hasSubsequence(haystack, needle) +``` + +**Arguments** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). + +**Returned values** + +- 1, if needle is a subsequence of haystack. +- 0, otherwise. + +Type: `UInt8`. + +**Examples** + +``` sql +SELECT hasSubsequence('garbage', 'arg') ; +``` + +Result: + +``` text +┌─hasSubsequence('garbage', 'arg')─┐ +│ 1 │ +└──────────────────────────────────┘ +``` + +## hasSubsequenceCaseInsensitive + +Like [hasSubsequence](#hasSubsequence) but searches case-insensitively. + +## hasSubsequenceUTF8 + +Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings. + +## hasSubsequenceCaseInsensitiveUTF8 + +Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively. \ No newline at end of file diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 7ed2deaeda6..88e4ac03fdb 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -559,6 +559,29 @@ Result: └────────────────────────────┘ ``` +## tupleConcat + +Combines tuples passed as arguments. + +``` sql +tupleConcat(tuples) +``` + +**Arguments** + +- `tuples` – Arbitrary number of arguments of [Tuple](../../sql-reference/data-types/tuple.md) type. + +**Example** + +``` sql +SELECT tupleConcat((1, 2), (3, 4), (true, false)) AS res +``` + +``` text +┌─res──────────────────┐ +│ (1,2,3,4,true,false) │ +└──────────────────────┘ +``` ## Distance functions diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 214c885bc0e..36f40b37238 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -399,7 +399,11 @@ toDateTime(expr[, time_zone ]) - `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [Int](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). - `time_zone` — Time zone. [String](/docs/en/sql-reference/data-types/string.md). -If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp). +:::note +If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp). +If `expr` is a [String](/docs/en/sql-reference/data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time. +Thus, parsing of short numbers' string representations (up to 4 digits) is explicitly disabled due to ambiguity, e.g. a string `'1999'` may be both a year (an incomplete string representation of Date / DateTime) or a unix timestamp. Longer numeric strings are allowed. +::: **Returned value** diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index dae2c7dd1d3..6ceb9b5849e 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -213,7 +213,7 @@ Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC Syntax: ```sql -ALTER TABLE table_name MODIFY column_name REMOVE property; +ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property; ``` **Example** diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 7dadc2be5b2..dca34d16f25 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -36,6 +36,8 @@ These `ALTER` statements modify entities related to role-based access control: [ALTER TABLE ... MODIFY COMMENT](/docs/en/sql-reference/statements/alter/comment.md) statement adds, modifies, or removes comments to the table, regardless if it was set before or not. +[ALTER NAMED COLLECTION](/docs/en/sql-reference/statements/alter/named-collection.md) statement modifies [Named Collections](/docs/en/operations/named-collections.md). + ## Mutations `ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE … DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE … UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts. diff --git a/docs/en/sql-reference/statements/alter/named-collection.md b/docs/en/sql-reference/statements/alter/named-collection.md new file mode 100644 index 00000000000..ac6752127c1 --- /dev/null +++ b/docs/en/sql-reference/statements/alter/named-collection.md @@ -0,0 +1,30 @@ +--- +slug: /en/sql-reference/statements/alter/named-collection +sidebar_label: NAMED COLLECTION +--- + +# ALTER NAMED COLLECTION + +This query intends to modify already existing named collections. + +**Syntax** + +```sql +ALTER NAMED COLLECTION [IF EXISTS] name [ON CLUSTER cluster] +[ SET +key_name1 = 'some value', +key_name2 = 'some value', +key_name3 = 'some value', +... ] | +[ DELETE key_name4, key_name5, ... ] +``` + +**Example** + +```sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; + +ALTER NAMED COLLECTION foobar SET a = '2', c = '3'; + +ALTER NAMED COLLECTION foobar DELETE b; +``` diff --git a/docs/en/sql-reference/statements/create/index.md b/docs/en/sql-reference/statements/create/index.md index 14e29d051d7..fa39526a53e 100644 --- a/docs/en/sql-reference/statements/create/index.md +++ b/docs/en/sql-reference/statements/create/index.md @@ -8,13 +8,14 @@ sidebar_label: CREATE Create queries make a new entity of one of the following kinds: -- [DATABASE](../../../sql-reference/statements/create/database.md) -- [TABLE](../../../sql-reference/statements/create/table.md) -- [VIEW](../../../sql-reference/statements/create/view.md) -- [DICTIONARY](../../../sql-reference/statements/create/dictionary.md) -- [FUNCTION](../../../sql-reference/statements/create/function.md) -- [USER](../../../sql-reference/statements/create/user.md) -- [ROLE](../../../sql-reference/statements/create/role.md) -- [ROW POLICY](../../../sql-reference/statements/create/row-policy.md) -- [QUOTA](../../../sql-reference/statements/create/quota.md) -- [SETTINGS PROFILE](../../../sql-reference/statements/create/settings-profile.md) +- [DATABASE](/docs/en/sql-reference/statements/create/database.md) +- [TABLE](/docs/en/sql-reference/statements/create/table.md) +- [VIEW](/docs/en/sql-reference/statements/create/view.md) +- [DICTIONARY](/docs/en/sql-reference/statements/create/dictionary.md) +- [FUNCTION](/docs/en/sql-reference/statements/create/function.md) +- [USER](/docs/en/sql-reference/statements/create/user.md) +- [ROLE](/docs/en/sql-reference/statements/create/role.md) +- [ROW POLICY](/docs/en/sql-reference/statements/create/row-policy.md) +- [QUOTA](/docs/en/sql-reference/statements/create/quota.md) +- [SETTINGS PROFILE](/docs/en/sql-reference/statements/create/settings-profile.md) +- [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md) diff --git a/docs/en/sql-reference/statements/create/named-collection.md b/docs/en/sql-reference/statements/create/named-collection.md new file mode 100644 index 00000000000..1fc7b11c554 --- /dev/null +++ b/docs/en/sql-reference/statements/create/named-collection.md @@ -0,0 +1,34 @@ +--- +slug: /en/sql-reference/statements/create/named-collection +sidebar_label: NAMED COLLECTION +--- + +# CREATE NAMED COLLECTION + +Creates a new named collection. + +**Syntax** + +```sql +CREATE NAMED COLLECTION [IF NOT EXISTS] name [ON CLUSTER cluster] AS +key_name1 = 'some value', +key_name2 = 'some value', +key_name3 = 'some value', +... +``` + +**Example** + +```sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; +``` + +**Related statements** + +- [CREATE NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/alter/named-collection) +- [DROP NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/drop#drop-function) + + +**See Also** + +- [Named collections guide](/docs/en/operations/named-collections.md) diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md index c69285171ab..a6ced870c18 100644 --- a/docs/en/sql-reference/statements/create/quota.md +++ b/docs/en/sql-reference/statements/create/quota.md @@ -11,6 +11,7 @@ Syntax: ``` sql CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name] + [IN access_storage_type] [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED] [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year} {MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] | diff --git a/docs/en/sql-reference/statements/create/role.md b/docs/en/sql-reference/statements/create/role.md index 9b14e220e1f..4b6fffe4f60 100644 --- a/docs/en/sql-reference/statements/create/role.md +++ b/docs/en/sql-reference/statements/create/role.md @@ -11,6 +11,7 @@ Syntax: ``` sql CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] + [IN access_storage_type] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...] ``` diff --git a/docs/en/sql-reference/statements/create/row-policy.md b/docs/en/sql-reference/statements/create/row-policy.md index 83bb2e6bb9a..cd7718793bd 100644 --- a/docs/en/sql-reference/statements/create/row-policy.md +++ b/docs/en/sql-reference/statements/create/row-policy.md @@ -16,6 +16,7 @@ Syntax: ``` sql CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1|db1.* [, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2|db2.* ...] + [IN access_storage_type] [FOR SELECT] USING condition [AS {PERMISSIVE | RESTRICTIVE}] [TO {role1 [, role2 ...] | ALL | ALL EXCEPT role1 [, role2 ...]}] diff --git a/docs/en/sql-reference/statements/create/settings-profile.md b/docs/en/sql-reference/statements/create/settings-profile.md index 8e221a4d82f..d8afce9d6de 100644 --- a/docs/en/sql-reference/statements/create/settings-profile.md +++ b/docs/en/sql-reference/statements/create/settings-profile.md @@ -12,6 +12,7 @@ Syntax: ``` sql CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] + [IN access_storage_type] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...] ``` diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index d168be63c36..11d4eae7bc8 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -14,6 +14,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + [IN access_storage_type] [DEFAULT ROLE role [,...]] [DEFAULT DATABASE database | NONE] [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]] diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 10b15638152..11026340a0f 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -97,7 +97,7 @@ This is an experimental feature that may change in backwards-incompatible ways i ::: ```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... ``` Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query. diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index b6208c2fd52..eed76dbcd5c 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -49,7 +49,7 @@ Deletes a user. Syntax: ``` sql -DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP ROLE @@ -59,7 +59,7 @@ Deletes a role. The deleted role is revoked from all the entities where it was a Syntax: ``` sql -DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP ROW POLICY @@ -69,7 +69,7 @@ Deletes a row policy. Deleted row policy is revoked from all the entities where Syntax: ``` sql -DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER cluster_name] +DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP QUOTA @@ -79,7 +79,7 @@ Deletes a quota. The deleted quota is revoked from all the entities where it was Syntax: ``` sql -DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP SETTINGS PROFILE @@ -89,7 +89,7 @@ Deletes a settings profile. The deleted settings profile is revoked from all the Syntax: ``` sql -DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP VIEW @@ -119,3 +119,20 @@ DROP FUNCTION [IF EXISTS] function_name [on CLUSTER cluster] CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b; DROP FUNCTION linear_equation; ``` + +## DROP NAMED COLLECTION + +Deletes a named collection. + +**Syntax** + +``` sql +DROP NAMED COLLECTION [IF EXISTS] name [on CLUSTER cluster] +``` + +**Example** + +``` sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; +DROP NAMED COLLECTION foobar; +``` diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index d6e30827f9b..e0cc98c2351 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -11,7 +11,7 @@ Inserts data into a table. **Syntax** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). @@ -107,7 +107,7 @@ If table has [constraints](../../sql-reference/statements/create/table.md#constr **Syntax** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ... ``` Columns are mapped according to their position in the SELECT clause. However, their names in the SELECT expression and the table for INSERT may differ. If necessary, type casting is performed. @@ -126,7 +126,7 @@ To insert a default value instead of `NULL` into a column with not nullable data **Syntax** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name ``` Use the syntax above to insert data from a file, or files, stored on the **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause. diff --git a/docs/en/sql-reference/statements/move.md b/docs/en/sql-reference/statements/move.md new file mode 100644 index 00000000000..fac738ff711 --- /dev/null +++ b/docs/en/sql-reference/statements/move.md @@ -0,0 +1,32 @@ +--- +slug: /en/sql-reference/statements/move +sidebar_position: 54 +sidebar_label: MOVE +--- + +# MOVE access entity statement + +This statement allows to move an access entity from one access storage to another. + +Syntax: + +```sql +MOVE {USER, ROLE, QUOTA, SETTINGS PROFILE, ROW POLICY} name1 [, name2, ...] TO access_storage_type +``` + +Currently, there are five access storages in ClickHouse: + - `local_directory` + - `memory` + - `replicated` + - `users_xml` (ro) + - `ldap` (ro) + +Examples: + +```sql +MOVE USER test TO local_directory +``` + +```sql +MOVE ROLE test TO memory +``` diff --git a/docs/en/sql-reference/statements/select/with.md b/docs/en/sql-reference/statements/select/with.md index 4654f249548..a59ef463419 100644 --- a/docs/en/sql-reference/statements/select/with.md +++ b/docs/en/sql-reference/statements/select/with.md @@ -5,7 +5,27 @@ sidebar_label: WITH # WITH Clause -ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)), that is provides to use results of `WITH` clause in the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression. +ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)) and substitutes the code defined in the `WITH` clause in all places of use for the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression. + +Please note that CTEs do not guarantee the same results in all places they are called because the query will be re-executed for each use case. + +An example of such behavior is below +``` sql +with cte_numbers as +( + select + num + from generateRandom('num UInt64', NULL) + limit 1000000 +) +select + count() +from cte_numbers +where num in (select num from cte_numbers) +``` +If CTEs were to pass exactly the results and not just a piece of code, you would always see `1000000` + +However, due to the fact that we are referring `cte_numbers` twice, random numbers are generated each time and, accordingly, we see different random results, `280501, 392454, 261636, 196227` and so on... ## Syntax diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 336b93db9d5..1c399d2072b 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -205,7 +205,7 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ The optional keyword `FULL` causes the output to include the collation, comment and privilege columns. -`SHOW COLUMNS` produces a result table with the following structure: +The statement produces a result table with the following structure: - field - The name of the column (String) - type - The column data type (String) - null - If the column data type is Nullable (UInt8) @@ -272,6 +272,10 @@ SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2 Displays a list of primary and data skipping indexes of a table. +This statement mostly exists for compatibility with MySQL. System tables [system.tables](../../operations/system-tables/tables.md) (for +primary keys) and [system.data_skipping_indices](../../operations/system-tables/data_skipping_indices.md) (for data skipping indices) +provide equivalent information but in a fashion more native to ClickHouse. + ```sql SHOW [EXTENDED] {INDEX | INDEXES | INDICES | KEYS } {FROM | IN} [{FROM | IN} ] [WHERE ] [INTO OUTFILE ] [FORMAT ] ``` @@ -281,22 +285,22 @@ equivalent. If no database is specified, the query assumes the current database The optional keyword `EXTENDED` currently has no effect, it only exists for MySQL compatibility. -`SHOW INDEX` produces a result table with the following structure: -- table - The name of the table (String) -- non_unique - 0 if the index cannot contain duplicates, 1 otherwise (UInt8) -- key_name - The name of the index, `PRIMARY` if the index is a primary key index (String) -- seq_in_index - Currently unused -- column_name - Currently unused -- collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String)) -- cardinality - Currently unused -- sub_part - Currently unused -- packed - Currently unused +The statement produces a result table with the following structure: +- table - The name of the table. (String) +- non_unique - Always `1` as ClickHouse does not support uniqueness constraints. (UInt8) +- key_name - The name of the index, `PRIMARY` if the index is a primary key index. (String) +- seq_in_index - For a primary key index, the position of the column starting from `1`. For a data skipping index: always `1`. (UInt8) +- column_name - For a primary key index, the name of the column. For a data skipping index: `''` (empty string), see field "expression". (String) +- collation - The sorting of the column in the index: `A` if ascending, `D` if descending, `NULL` if unsorted. (Nullable(String)) +- cardinality - An estimation of the index cardinality (number of unique values in the index). Currently always 0. (UInt64) +- sub_part - Always `NULL` because ClickHouse does not support index prefixes like MySQL. (Nullable(String)) +- packed - Always `NULL` because ClickHouse does not support packed indexes (like MySQL). (Nullable(String)) - null - Currently unused -- index_type - The index type, e.g. `primary`, `minmax`, `bloom_filter` etc. (String) -- comment - Currently unused -- index_comment - Currently unused -- visible - If the index is visible to the optimizer, always `YES` (String) -- expression - The index expression (String) +- index_type - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String) +- comment - Additional information about the index, currently always `''` (empty string). (String) +- index_comment - `''` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field (like in MySQL). (String) +- visible - If the index is visible to the optimizer, always `YES`. (String) +- expression - For a data skipping index, the index expression. For a primary key index: `''` (empty string). (String) **Examples** @@ -310,11 +314,12 @@ Result: ``` text ┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐ -│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ bloom_filter │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ d, b │ -│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ a, c, d │ -│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, d, e │ -│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ primary │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, a │ -│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ set │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ e │ +│ tbl │ 1 │ blf_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ +│ tbl │ 1 │ mm1_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ +│ tbl │ 1 │ mm2_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ +│ tbl │ 1 │ PRIMARY │ 1 │ c │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │ +│ tbl │ 1 │ PRIMARY │ 2 │ a │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │ +│ tbl │ 1 │ set_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ └───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘ ``` diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 65a35f03fbe..59970dbeccd 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -314,6 +314,22 @@ Provides possibility to start background fetch tasks from replication queues whi SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] ``` +### STOP PULLING REPLICATION LOG + +Stops loading new entries from replication log to replication queue in a `ReplicatedMergeTree` table. + +``` sql +SYSTEM STOP PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] +``` + +### START PULLING REPLICATION LOG + +Cancels `SYSTEM STOP PULLING REPLICATION LOG`. + +``` sql +SYSTEM START PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] +``` + ### SYNC REPLICA Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster, but no more than `receive_timeout` seconds. @@ -414,3 +430,29 @@ Will do sync syscall. ```sql SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name] ``` + + +### SYSTEM STOP LISTEN + +Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol. + +However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect. + +```sql +SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +``` + +- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped. +- If `QUERIES ALL` modifier is specified, all protocols are stopped. +- If `QUERIES DEFAULT` modifier is specified, all default protocols are stopped. +- If `QUERIES CUSTOM` modifier is specified, all custom protocols are stopped. + +### SYSTEM START LISTEN + +Allows new connections to be established on the specified protocols. + +However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect. + +```sql +SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +``` diff --git a/docs/en/sql-reference/statements/truncate.md b/docs/en/sql-reference/statements/truncate.md index 457031a2157..4b46210aa09 100644 --- a/docs/en/sql-reference/statements/truncate.md +++ b/docs/en/sql-reference/statements/truncate.md @@ -4,8 +4,9 @@ sidebar_position: 52 sidebar_label: TRUNCATE --- -# TRUNCATE Statement +# TRUNCATE Statements +## TRUNCATE TABLE ``` sql TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` @@ -21,3 +22,10 @@ You can specify how long (in seconds) to wait for inactive replicas to execute ` :::note If the `alter_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. ::: + +## TRUNCATE DATABASE +``` sql +TRUNCATE DATBASE [IF EXISTS] [db.]name [ON CLUSTER cluster] +``` + +Removes all tables from a database but keeps the database itself. When the clause `IF EXISTS` is omitted, the query returns an error if the database does not exist. diff --git a/docs/en/sql-reference/table-functions/azureBlobStorage.md b/docs/en/sql-reference/table-functions/azureBlobStorage.md index 7bb5d892c47..59c92e1327e 100644 --- a/docs/en/sql-reference/table-functions/azureBlobStorage.md +++ b/docs/en/sql-reference/table-functions/azureBlobStorage.md @@ -19,7 +19,7 @@ azureBlobStorage(- connection_string|storage_account_url, container_name, blobpa - `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key) - `container_name` - Container name -- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. - `account_name` - if storage_account_url is used, then account name can be specified here - `account_key` - if storage_account_url is used, then account key can be specified here - `format` — The [format](../../interfaces/formats.md#formats) of the file. diff --git a/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md b/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md new file mode 100644 index 00000000000..20dfd35d5db --- /dev/null +++ b/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md @@ -0,0 +1,47 @@ +--- +slug: /en/sql-reference/table-functions/azureBlobStorageCluster +sidebar_position: 55 +sidebar_label: azureBlobStorageCluster +title: "azureBlobStorageCluster Table Function" +--- + +Allows processing files from [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. +This table function is similar to the [s3Cluster function](../../sql-reference/table-functions/s3Cluster.md). + +**Syntax** + +``` sql +azureBlobStorageCluster(cluster_name, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) +``` + +**Arguments** + +- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key) +- `container_name` - Container name +- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `account_name` - if storage_account_url is used, then account name can be specified here +- `account_key` - if storage_account_url is used, then account key can be specified here +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `compression` — Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. (same as setting to `auto`). +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. + +**Returned value** + +A table with the specified structure for reading or writing data in the specified file. + +**Examples** + +Select the count for the file `test_cluster_*.csv`, using all the nodes in the `cluster_simple` cluster: + +``` sql +SELECT count(*) from azureBlobStorageCluster( + 'cluster_simple', 'http://azurite1:10000/devstoreaccount1', 'test_container', 'test_cluster_count.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto', 'key UInt64') +``` + +**See Also** + +- [AzureBlobStorage engine](../../engines/table-engines/integrations/azureBlobStorage.md) +- [azureBlobStorage table function](../../sql-reference/table-functions/azureBlobStorage.md) diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md index 7362c433e0e..a083c6b89a6 100644 --- a/docs/en/sql-reference/table-functions/cluster.md +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -16,14 +16,14 @@ All available clusters are listed in the [system.clusters](../../operations/syst **Syntax** ``` sql -cluster('cluster_name', db.table[, sharding_key]) -cluster('cluster_name', db, table[, sharding_key]) -clusterAllReplicas('cluster_name', db.table[, sharding_key]) -clusterAllReplicas('cluster_name', db, table[, sharding_key]) +cluster(['cluster_name', db.table, sharding_key]) +cluster(['cluster_name', db, table, sharding_key]) +clusterAllReplicas(['cluster_name', db.table, sharding_key]) +clusterAllReplicas(['cluster_name', db, table, sharding_key]) ``` **Arguments** -- `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers, set `default` if not specified. - `db.table` or `db`, `table` - Name of a database and a table. - `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard. diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index c78ffc1d61c..4db9494502e 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -13,16 +13,18 @@ The `file` function can be used in `SELECT` and `INSERT` queries to read from or **Syntax** ``` sql -file(path [,format] [,structure] [,compression]) +file([path_to_archive ::] path [,format] [,structure] [,compression]) ``` **Parameters** - `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings. +- `path_to_archive` - The relative path to zip/tar/7z archive. Path to archive support the same globs as `path`. - `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`. + **Returned value** A table with the specified structure for reading or writing data in the specified file. @@ -128,13 +130,18 @@ file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32'); └─────────┴─────────┴─────────┘ ``` +Getting data from table in table.csv, located in archive1.zip or/and archive2.zip +``` sql +SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv'); +``` + ## Globs in Path Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix). - `*` — Substitutes any number of any characters except `/` including empty string. - `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`. - `{N..M}` — Substitutes any number in range from N to M including both borders. - `**` - Fetches all files inside the folder recursively. diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md index 01b4e4f6a69..48c2381696e 100644 --- a/docs/en/sql-reference/table-functions/gcs.md +++ b/docs/en/sql-reference/table-functions/gcs.md @@ -22,7 +22,7 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. :::note GCS The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: diff --git a/docs/en/sql-reference/table-functions/hdfsCluster.md b/docs/en/sql-reference/table-functions/hdfsCluster.md index 832be46d05f..75100eeb4f3 100644 --- a/docs/en/sql-reference/table-functions/hdfsCluster.md +++ b/docs/en/sql-reference/table-functions/hdfsCluster.md @@ -17,7 +17,7 @@ hdfsCluster(cluster_name, URI, format, structure) **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/en/sql-reference/table-functions/iceberg.md b/docs/en/sql-reference/table-functions/iceberg.md index 30db0ef00aa..fa86b436a5e 100644 --- a/docs/en/sql-reference/table-functions/iceberg.md +++ b/docs/en/sql-reference/table-functions/iceberg.md @@ -21,7 +21,7 @@ iceberg(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure]) - `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. By default `Parquet` is used. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -Engine parameters can be specified using [Named Collections](../../operations/named-collections.md) +Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md). **Returned value** diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index fba3ea55653..59ed4bf1985 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -13,10 +13,10 @@ Both functions can be used in `SELECT` and `INSERT` queries. ## Syntax ``` sql -remote('addresses_expr', db, table[, 'user'[, 'password'], sharding_key]) -remote('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) -remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key]) -remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) +remote('addresses_expr', [db, table, 'user'[, 'password'], sharding_key]) +remote('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) +remoteSecure('addresses_expr', [db, table, 'user'[, 'password'], sharding_key]) +remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) ``` ## Parameters @@ -29,6 +29,8 @@ remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) The port is required for an IPv6 address. + If only specify this parameter, `db` and `table` will use `system.one` by default. + Type: [String](../../sql-reference/data-types/string.md). - `db` — Database name. Type: [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 55c825b8b9b..07addafcf58 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -23,7 +23,7 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_ **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). :::note GCS The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index d5bdc85f9f8..675aef54d34 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -16,7 +16,7 @@ s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [, **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 2ab43f1b895..859de86f019 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -56,6 +56,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. +- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default. **See Also** diff --git a/docs/en/sql-reference/transactions.md b/docs/en/sql-reference/transactions.md index 68fbfe0b22a..cb89a091d68 100644 --- a/docs/en/sql-reference/transactions.md +++ b/docs/en/sql-reference/transactions.md @@ -3,23 +3,46 @@ slug: /en/guides/developer/transactional --- # Transactional (ACID) support -INSERT into one partition* in one table* of MergeTree* family up to max_insert_block_size rows* is transactional (ACID): -- Atomic: INSERT is succeeded or rejected as a whole: if confirmation is sent to the client, all rows INSERTed; if error is sent to the client, no rows INSERTed. +## Case 1: INSERT into one partition, of one table, of the MergeTree* family + +This is transactional (ACID) if the inserted rows are packed and inserted as a single block (see Notes): +- Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted. - Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted. -- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as if before INSERT or after successful INSERT; no partial state is seen; -- Durable: successful INSERT is written to the filesystem before answering to the client, on single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting). -* If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own; -* INSERT into multiple tables with one statement is possible if materialized views are involved; -* INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional; -* another example: insert into Buffer tables is neither atomic nor isolated or consistent or durable; -* atomicity is ensured even if `async_insert` is enabled, but it can be turned off by the wait_for_async_insert setting; -* max_insert_block_size is 1 000 000 by default and can be adjusted as needed; -* if client did not receive the answer from the server, the client does not know if transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties; -* ClickHouse is using MVCC with snapshot isolation internally; -* all ACID properties are valid even in case of server kill / crash; -* either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in typical setup; -* "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) -* this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. +- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen +- Durable: a successful INSERT is written to the filesystem before answering to the client, on a single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting). +- INSERT into multiple tables with one statement is possible if materialized views are involved (the INSERT from the client is to a table which has associate materialized views). + +## Case 2: INSERT into multiple partitions, of one table, of the MergeTree* family + +Same as Case 1 above, with this detail: +- If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own + + +## Case 3: INSERT into one distributed table of the MergeTree* family + +Same as Case 1 above, with this detail: +- INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional + +## Case 4: Using a Buffer table + +- insert into Buffer tables is neither atomic nor isolated nor consistent nor durable + +## Case 5: Using async_insert + +Same as Case 1 above, with this detail: +- atomicity is ensured even if `async_insert` is enabled and `wait_for_async_insert` is set to 1 (the default), but if `wait_for_async_insert` is set to 0, then atomicity is not ensured. + +## Notes +- rows inserted from the client in some data format are packed into a single block when: + - the insert format is row-based (like CSV, TSV, Values, JSONEachRow, etc) and the data contains less then `max_insert_block_size` rows (~1 000 000 by default) or less then `min_chunk_bytes_for_parallel_parsing` bytes (10 MB by default) in case of parallel parsing is used (enabled by default) + - the insert format is column-based (like Native, Parquet, ORC, etc) and the data contains only one block of data +- the size of the inserted block in general may depend on many settings (for example: `max_block_size`, `max_insert_block_size`, `min_insert_block_size_rows`, `min_insert_block_size_bytes`, `preferred_block_size_bytes`, etc) +- if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties +- ClickHouse is using MVCC with snapshot isolation internally +- all ACID properties are valid even in the case of server kill/crash +- either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup +- "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) +- this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. (see the next section on Transactions, Commit, and Rollback) ## Transactions, Commit, and Rollback diff --git a/docs/ru/development/build-osx.md b/docs/ru/development/build-osx.md index 9a1f9c9347d..6b4e612b13f 100644 --- a/docs/ru/development/build-osx.md +++ b/docs/ru/development/build-osx.md @@ -68,7 +68,7 @@ $ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/ $ rm -rf build $ mkdir build $ cd build - $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER==$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. + $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. $ cmake --build . --config RelWithDebInfo $ cd .. diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md index 574d9273088..a4e58e66e0c 100644 --- a/docs/ru/engines/table-engines/special/buffer.md +++ b/docs/ru/engines/table-engines/special/buffer.md @@ -9,7 +9,7 @@ sidebar_label: Buffer Буферизует записываемые данные в оперативке, периодически сбрасывая их в другую таблицу. При чтении, производится чтение данных одновременно из буфера и из другой таблицы. ``` sql -Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) +Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]]) ``` Параметры движка: diff --git a/docs/ru/getting-started/example-datasets/wikistat.md b/docs/ru/getting-started/example-datasets/wikistat.md deleted file mode 100644 index 479616d667b..00000000000 --- a/docs/ru/getting-started/example-datasets/wikistat.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -slug: /ru/getting-started/example-datasets/wikistat -sidebar_position: 17 -sidebar_label: WikiStat ---- - -# WikiStat {#wikistat} - -См: http://dumps.wikimedia.org/other/pagecounts-raw/ - -Создание таблицы: - -``` sql -CREATE TABLE wikistat -( - date Date, - time DateTime, - project String, - subproject String, - path String, - hits UInt64, - size UInt64 -) ENGINE = MergeTree(date, (path, time), 8192); -``` - -Загрузка данных: - -``` bash -$ for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt -$ cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done -$ ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done -``` diff --git a/docs/ru/getting-started/example-datasets/wikistat.md b/docs/ru/getting-started/example-datasets/wikistat.md new file mode 120000 index 00000000000..2d429d00984 --- /dev/null +++ b/docs/ru/getting-started/example-datasets/wikistat.md @@ -0,0 +1 @@ +../../../en/getting-started/example-datasets/wikistat.md \ No newline at end of file diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 48a6132170a..5571936f4c5 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -401,8 +401,8 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR - [output_format_csv_crlf_end_of_line](../operations/settings/settings.md#output_format_csv_crlf_end_of_line) - если установлено значение true, конец строки в формате вывода CSV будет `\r\n` вместо `\n`. Значение по умолчанию - `false`. - [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - пропустить указанное количество строк в начале данных. Значение по умолчанию - `0`. - [input_format_csv_detect_header](../operations/settings/settings.md#input_format_csv_detect_header) - обнаружить заголовок с именами и типами в формате CSV. Значение по умолчанию - `true`. -- [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. -Значение по умолчанию - `true`. +- [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. Значение по умолчанию - `true`. +- [input_format_csv_allow_variable_number_of_columns](../operations/settings/settings.md/#input_format_csv_allow_variable_number_of_columns) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`. ## CSVWithNames {#csvwithnames} @@ -1353,8 +1353,6 @@ ClickHouse поддерживает настраиваемую точность $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). - Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Parquet, используйте команду следующего вида: ``` bash @@ -1413,8 +1411,6 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). - ### Вывод данных {#selecting-data-arrow} Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Arrow, используйте команду следующего вида: @@ -1471,8 +1467,6 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). - ### Вывод данных {#selecting-data-2} Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида: diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index b8c5ee77f0c..981f1c7b5a2 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -50,7 +50,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} 1 ``` @@ -266,9 +266,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 Прогресс выполнения запроса можно отслеживать с помощью заголовков ответа `X-ClickHouse-Progress`. Для этого включите [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Пример последовательности заголовков: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"} ``` Возможные поля заголовка: @@ -529,7 +529,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact Say Hi!% @@ -569,7 +569,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact
% @@ -621,7 +621,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Absolute Path File * Connection #0 to host localhost left intact @@ -640,7 +640,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Relative Path File * Connection #0 to host localhost left intact diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md index 2b824ce91bd..085761d80c7 100644 --- a/docs/ru/operations/configuration-files.md +++ b/docs/ru/operations/configuration-files.md @@ -85,6 +85,43 @@ $ cat /etc/clickhouse-server/users.d/alice.xml Сервер следит за изменениями конфигурационных файлов, а также файлов и ZooKeeper-узлов, которые были использованы при выполнении подстановок и переопределений, и перезагружает настройки пользователей и кластеров на лету. То есть, можно изменять кластера, пользователей и их настройки без перезапуска сервера. +## Шифрование {#encryption} + +Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encrypted_by` с именем кодека шифрования как значение к элементу, который надо зашифровать. + +В отличии от аттрибутов `from_zk`, `from_env` и `incl` (или элемента `include`), подстановка, т.е. расшифровка зашифрованного значения, не выподняется в файле предобработки. Расшифровка происходит только во время исполнения в серверном процессе. + +Пример: + +```xml + + + + + 00112233445566778899aabbccddeeff + + + + + admin + 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 + + + +``` + +Чтобы получить зашифрованное значение может быть использовано приложение-пример `encrypt_decrypt` . + +Пример: + +``` bash +./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd +``` + +``` text +961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 +``` + ## Примеры записи конфигурации на YAML {#example} Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example). diff --git a/docs/ru/operations/named-collections.md b/docs/ru/operations/named-collections.md index ba6b47116ad..48ee7c9f15d 100644 --- a/docs/ru/operations/named-collections.md +++ b/docs/ru/operations/named-collections.md @@ -88,7 +88,6 @@ SELECT * FROM s3_engine_table LIMIT 3; 3306 test 8 - 1 1 diff --git a/docs/ru/operations/optimizing-performance/profile-guided-optimization.md b/docs/ru/operations/optimizing-performance/profile-guided-optimization.md new file mode 120000 index 00000000000..31cb656bd99 --- /dev/null +++ b/docs/ru/operations/optimizing-performance/profile-guided-optimization.md @@ -0,0 +1 @@ +../../../en/operations/optimizing-performance/profile-guided-optimization.md \ No newline at end of file diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 5430469ea18..7b026244624 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -575,14 +575,60 @@ ClickHouse поддерживает динамическое изменение - `errorlog` - Файл лога ошибок. - `size` - Размер файла. Действует для `log` и `errorlog`. Как только файл достиг размера `size`, ClickHouse архивирует и переименовывает его, а на его месте создает новый файл лога. - `count` - Количество заархивированных файлов логов, которые сохраняет ClickHouse. +- `stream_compress` – Сжимать `log` и `errorlog` с помощью алгоритма `lz4`. Чтобы активировать, узтановите значение `1` или `true`. + +Имена файлов `log` и `errorlog` (только имя файла, а не директорий) поддерживают спецификаторы шаблонов даты и времени. + +**Спецификаторы форматирования** +С помощью следующих спецификаторов, можно определить шаблон для формирования имени файла. Столбец “Пример” показывает возможные значения на момент времени `2023-07-06 18:32:07`. + +| Спецификатор | Описание | Пример | +|--------------|---------------------------------------------------------------------------------------------------------------------|--------------------------| +| %% | Литерал % | % | +| %n | Символ новой строки | | +| %t | Символ горизонтальной табуляции | | +| %Y | Год как десятичное число, например, 2017 | 2023 | +| %y | Последние 2 цифры года в виде десятичного числа (диапазон [00,99]) | 23 | +| %C | Первые 2 цифры года в виде десятичного числа (диапазон [00,99]) | 20 | +| %G | Год по неделям согласно [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), то есть год, который содержит указанную неделю. Обычно используется вместе с %V. | 2023 | +| %g | Последние 2 цифры [года по неделям ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), т.е. года, содержащего указанную неделю (диапазон [00,99]). | 23 | +| %b | Сокращённое название месяца, например Oct (зависит от локали) | Jul | +| %h | Синоним %b | Jul | +| %B | Полное название месяца, например, October (зависит от локали) | July | +| %m | Месяц в виде десятичного числа (диапазон [01,12]) | 07 | +| %U | Неделя года в виде десятичного числа (воскресенье - первый день недели) (диапазон [00,53]) | 27 | +| %W | Неделя года в виде десятичного числа (понедельник - первый день недели) (диапазон [00,53]) | 27 | +| %V | Неделя года ISO 8601 (диапазон [01,53]) | 27 | +| %j | День года в виде десятичного числа (диапазон [001,366]) | 187 | +| %d | День месяца в виде десятичного числа (диапазон [01,31]) Перед одиночной цифрой ставится ноль. | 06 | +| %e | День месяца в виде десятичного числа (диапазон [1,31]). Перед одиночной цифрой ставится пробел. |   6 | +| %a | Сокращённое название дня недели, например, Fri (зависит от локали) | Thu | +| %A | Полный день недели, например, Friday (зависит от локали) | Thursday | +| %w | День недели в виде десятичного числа, где воскресенье равно 0 (диапазон [0-6]) | 4 | +| %u | День недели в виде десятичного числа, где понедельник равен 1 (формат ISO 8601) (диапазон [1-7]) | 4 | +| %H | Час в виде десятичного числа, 24-часовой формат (диапазон [00-23]) | 18 | +| %I | Час в виде десятичного числа, 12-часовой формат (диапазон [01,12]) | 06 | +| %M | Минуты в виде десятичного числа (диапазон [00,59]) | 32 | +| %S | Секунды как десятичное число (диапазон [00,60]) | 07 | +| %c | Стандартная строка даты и времени, например, Sun Oct 17 04:41:13 2010 (зависит от локали) | Thu Jul 6 18:32:07 2023 | +| %x | Локализованное представление даты (зависит от локали) | 07/06/23 | +| %X | Локализованное представление времени, например, 18:40:20 или 6:40:20 PM (зависит от локали) | 18:32:07 | +| %D | Эквивалентно "%m/%d/%y" | 07/06/23 | +| %F | Эквивалентно "%Y-%m-%d" (формат даты ISO 8601) | 2023-07-06 | +| %r | Локализованное 12-часовое время (зависит от локали) | 06:32:07 PM | +| %R | Эквивалентно "%H:%M" | 18:32 | +| %T | Эквивалентно "%H:%M:%S" (формат времени ISO 8601) | 18:32:07 | +| %p | Локализованное обозначение a.m. или p.m. (зависит от локали) | PM | +| %z | Смещение от UTC в формате ISO 8601 (например, -0430), или без символов, если информация о часовом поясе недоступна | +0800 | +| %Z | Зависящее от локали название или аббревиатура часового пояса, если информация о часовом поясе доступна | Z AWST | **Пример** ``` xml trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.err.log 1000M 10 @@ -1012,6 +1058,10 @@ ClickHouse использует потоки из глобального пул
metric_log
7500 1000 + 1048576 + 8192 + 524288 + false ``` @@ -1056,7 +1106,7 @@ ClickHouse использует потоки из глобального пул - verificationMode - Способ проверки сертификатов узла. Подробности находятся в описании класса [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h). Допустимые значения: `none`, `relaxed`, `strict`, `once`. - verificationDepth - Максимальная длина верификационной цепи. Верификация завершится ошибкой, если длина цепи сертификатов превысит установленное значение. - loadDefaultCAFile - Признак того, что будут использоваться встроенные CA-сертификаты для OpenSSL. Допустимые значения: `true`, `false`. \| -- cipherList - Поддерживаемые OpenSSL-шифры. Например, `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. +- cipherList - Поддерживаемые OpenSSL-шифры. Например, `ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH`. - cacheSessions - Включение/выключение кеширования сессии. Использовать обязательно вместе с `sessionIdContext`. Допустимые значения: `true`, `false`. - sessionIdContext - Уникальный набор произвольных символов, которые сервер добавляет к каждому сгенерированному идентификатору. Длина строки не должна превышать `SSL_MAX_SSL_SESSION_ID_LENGTH`. Рекомендуется к использованию всегда, поскольку позволяет избежать проблем как в случае, если сервер кеширует сессию, так и если клиент затребовал кеширование. По умолчанию `${application.name}`. - sessionCacheSize - Максимальное количество сессий, которые кэширует сервер. По умолчанию - 1024\*20. 0 - неограниченное количество сессий. @@ -1109,12 +1159,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. - +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. **Пример** ``` xml @@ -1123,6 +1180,10 @@ ClickHouse использует потоки из глобального пул part_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1172,11 +1233,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы, куда будет записываться лог; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1188,6 +1257,10 @@ ClickHouse использует потоки из глобального пул query_log
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1199,11 +1272,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы, куда будет записываться лог; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1215,6 +1296,10 @@ ClickHouse использует потоки из глобального пул query_thread_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1226,11 +1311,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` – имя базы данных. -- `table` – имя системной таблицы, где будут логироваться запросы. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать, если задан параметр `engine`. -- `engine` — устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать, если задан параметр `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1242,6 +1335,10 @@ ClickHouse использует потоки из глобального пул query_views_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1251,12 +1348,20 @@ ClickHouse использует потоки из глобального пул Параметры: -- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице. -- `database` — имя базы данных для хранения таблицы. -- `table` — имя таблицы, куда будут записываться текстовые сообщения. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. **Пример** ```xml @@ -1266,6 +1371,10 @@ ClickHouse использует потоки из глобального пул system text_log
7500 + 1048576 + 8192 + 524288 + false Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day @@ -1277,13 +1386,21 @@ ClickHouse использует потоки из глобального пул Настройки для [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation. -Parameters: +Параметры: -- `database` — Database for storing a table. -- `table` — Table name. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. По умолчанию файл настроек сервера `config.xml` содержит следующие настройки: @@ -1293,9 +1410,84 @@ Parameters: trace_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 ``` +## asynchronous_insert_log {#server_configuration_parameters-asynchronous_insert_log} + +Настройки для asynchronous_insert_log Система для логирования ассинхронных вставок. + +Параметры: + +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. + +**Пример** + +```xml + + + system + asynchronous_insert_log
+ 7500 + toYYYYMM(event_date) + 1048576 + 8192 + 524288 + +
+
+``` + +## crash_log {#server_configuration_parameters-crash_log} + +Настройки для таблицы [crash_log](../../operations/system-tables/crash-log.md). + +Параметры: + +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1024. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 1024. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: true. + +**Пример** + +``` xml + + system + crash_log
+ toYYYYMM(event_date) + 7500 + 1024 + 1024 + 512 + true +
+``` + ## query_masking_rules {#query-masking-rules} Правила, основанные на регулярных выражениях, которые будут применены для всех запросов, а также для всех сообщений перед сохранением их в лог на сервере, diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md index de9bb969085..fb3b18bd46a 100644 --- a/docs/ru/operations/settings/query-complexity.md +++ b/docs/ru/operations/settings/query-complexity.md @@ -311,6 +311,52 @@ FORMAT Null; **Подробности** -При вставке данных, ClickHouse вычисляет количество партиций во вставленном блоке. Если число партиций больше, чем `max_partitions_per_insert_block`, ClickHouse генерирует исключение со следующим текстом: +При вставке данных ClickHouse проверяет количество партиций во вставляемом блоке. Если количество разделов превышает число `max_partitions_per_insert_block`, ClickHouse либо логирует предупреждение, либо выбрасывает исключение в зависимости от значения `throw_on_max_partitions_per_insert_block`. Исключения имеют следующий текст: -> «Too many partitions for single INSERT block (more than» + toString(max_parts) + «). The limit is controlled by ‘max_partitions_per_insert_block’ setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).» +> “Too many partitions for a single INSERT block (`partitions_count` partitions, limit is ” + toString(max_partitions) + “). The limit is controlled by the ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).” + +## throw_on_max_partitions_per_insert_block {#settings-throw_on_max_partition_per_insert_block} + +Позволяет контролировать поведение при достижении `max_partitions_per_insert_block` + +- `true` - Когда вставляемый блок достигает `max_partitions_per_insert_block`, возникает исключение. +- `false` - Записывает предупреждение при достижении `max_partitions_per_insert_block`. + +Значение по умолчанию: `true` + +## max_sessions_for_user {#max-sessions-per-user} + +Максимальное количество одновременных сессий на одного аутентифицированного пользователя. + +Пример: + +``` xml + + + 1 + + + 2 + + + 0 + + + + + + single_session_profile + + + + two_sessions_profile + + + + unlimited_sessions_profile + + +``` + +Значение по умолчанию: 0 (неограниченное количество сессий). diff --git a/docs/ru/operations/settings/settings-profiles.md b/docs/ru/operations/settings/settings-profiles.md index ba2cb9a601f..0d094c637ac 100644 --- a/docs/ru/operations/settings/settings-profiles.md +++ b/docs/ru/operations/settings/settings-profiles.md @@ -39,7 +39,7 @@ SET profile = 'web' 8 - + 1000000000 100000000000 @@ -67,6 +67,7 @@ SET profile = 'web' 50 100 + 4 1 diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index f83d05ff710..d3db890ad7a 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -238,39 +238,6 @@ ClickHouse применяет настройку в тех случаях, ко В случае превышения `input_format_allow_errors_ratio` ClickHouse генерирует исключение. -## input_format_parquet_import_nested {#input_format_parquet_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Parquet](../../interfaces/formats.md#data-format-parquet). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - -## input_format_arrow_import_nested {#input_format_arrow_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Arrow](../../interfaces/formats.md#data_types-matching-arrow). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - -## input_format_orc_import_nested {#input_format_orc_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [ORC](../../interfaces/formats.md#data-format-orc). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - ## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions} Включает или отключает парсер SQL, если потоковый парсер не может проанализировать данные. Этот параметр используется только для формата [Values](../../interfaces/formats.md#data-format-values) при вставке данных. Дополнительные сведения о парсерах читайте в разделе [Синтаксис](../../sql-reference/syntax.md). @@ -1686,7 +1653,7 @@ SELECT * FROM table_with_enum_column_for_csv_insert; ## input_format_csv_detect_header {#input_format_csv_detect_header} Обнаружить заголовок с именами и типами в формате CSV. - + Значение по умолчанию - `true`. ## input_format_csv_skip_first_lines {#input_format_csv_skip_first_lines} @@ -1727,6 +1694,12 @@ echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --in " string " ``` +## input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns} + +Игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. + +Выключено по умолчанию. + ## output_format_tsv_crlf_end_of_line {#settings-output-format-tsv-crlf-end-of-line} Использовать в качестве разделителя строк для TSV формата CRLF (DOC/Windows стиль) вместо LF (Unix стиль). @@ -4195,6 +4168,7 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi ### Шаблон Шаблон поддерживает следующие виды плейсхолдеров: +- `%a` — Полное исходное имя файла (например "sample.csv"). - `%f` — Исходное имя файла без расширения (например "sample"). - `%e` — Оригинальное расширение файла с точкой (например ".csv"). - `%t` — Текущее время (в микросекундах). @@ -4206,3 +4180,29 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi - Запрос: `SELECT * FROM file('sample.csv')` Если чтение и обработка `sample.csv` прошли успешно, файл будет переименован в `processed_sample_1683473210851438.csv`. + +## precise_float_parsing {#precise_float_parsing} + +Позволяет выбрать алгоритм, используемый при парсинге [Float32/Float64](../../sql-reference/data-types/float.md): +* Если установлено значение `1`, то используется точный метод. Он более медленный, но всегда возвращает число, наиболее близкое к входному значению. +* В противном случае используется быстрый метод (поведение по умолчанию). Обычно результат его работы совпадает с результатом, полученным точным методом, однако в редких случаях он может отличаться на 1 или 2 наименее значимых цифры. + +Возможные значения: `0`, `1`. + +Значение по умолчанию: `0`. + +Пример: + +```sql +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7090999999999998 │ 15008753.000000002 │ +└─────────────────────┴──────────────────────────┘ + +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7091 │ 15008753 │ +└─────────────────────┴──────────────────────────┘ +``` diff --git a/docs/ru/operations/system-tables/index.md b/docs/ru/operations/system-tables/index.md index 7ff368b1910..24f79cae212 100644 --- a/docs/ru/operations/system-tables/index.md +++ b/docs/ru/operations/system-tables/index.md @@ -45,6 +45,10 @@ sidebar_label: "Системные таблицы" ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024 --> 7500 + 1048576 + 8192 + 524288 + false ``` diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index e8d4a3ee9fd..80d844a1713 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -122,6 +122,7 @@ FROM dt - [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format) - [Настройка `date_time_output_format`](../../operations/settings/index.md) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) - [Тип данных `Date`](date.md) - [Тип данных `DateTime64`](datetime64.md) diff --git a/docs/ru/sql-reference/data-types/datetime64.md b/docs/ru/sql-reference/data-types/datetime64.md index da2f81f4828..78ad43e4764 100644 --- a/docs/ru/sql-reference/data-types/datetime64.md +++ b/docs/ru/sql-reference/data-types/datetime64.md @@ -102,6 +102,7 @@ FROM dt; - [Настройка `date_time_input_format`](../../operations/settings/settings.md#settings-date_time_input_format) - [Настройка `date_time_output_format`](../../operations/settings/settings.md) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) - [Тип данных `Date`](date.md) - [Тип данных `DateTime`](datetime.md) diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index c43323d68fd..439eddfd752 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -145,6 +145,8 @@ range([start, ] end [, step]) - Если в результате запроса создаются массивы суммарной длиной больше, чем количество элементов, указанное настройкой [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block), то генерируется исключение. +- Возвращает Null если любой аргумент Nullable(Nothing) типа. Генерируется исключение если любой аргумент Null (Nullable(T) тип). + **Примеры** Запрос: diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 17ab04b7799..4db8a1ec6f8 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -599,29 +599,33 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we ## toYearWeek(date[,mode]) {#toyearweek} Возвращает год и неделю для даты. Год в результате может отличаться от года в аргументе даты для первой и последней недели года. -Аргумент mode работает точно так же, как аргумент mode [toWeek()](#toweek). Если mode не задан, используется режим 0. +Аргумент mode работает так же, как аргумент mode [toWeek()](#toweek), значение mode по умолчанию -- `0`. -`toISOYear() ` эквивалентно `intDiv(toYearWeek(date,3),100)`. +`toISOYear() ` эквивалентно `intDiv(toYearWeek(date,3),100)` + +:::warning +Однако, есть отличие в работе функций `toWeek()` и `toYearWeek()`. `toWeek()` возвращает номер недели в контексте заданного года, и в случае, когда `toWeek()` вернёт `0`, `toYearWeek()` вернёт значение, соответствующее последней неделе предыдущего года (см. `prev_yearWeek` в примере). +::: **Пример** Запрос: ```sql -SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9; +SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9, toYearWeek(toDate('2022-01-01')) AS prev_yearWeek; ``` Результат: ```text -┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┐ -│ 2016-12-27 │ 201652 │ 201652 │ 201701 │ -└────────────┴───────────┴───────────┴───────────┘ +┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┬─prev_yearWeek─┐ +│ 2016-12-27 │ 201652 │ 201652 │ 201701 │ 202152 │ +└────────────┴───────────┴───────────┴───────────┴───────────────┘ ``` ## age -Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду. +Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду. Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`. **Синтаксис** @@ -635,6 +639,8 @@ age('unit', startdate, enddate, [timezone]) - `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). Возможные значения: + - `microsecond` (возможные сокращения: `us`, `u`) + - `millisecond` (возможные сокращения: `ms`) - `second` (возможные сокращения: `ss`, `s`) - `minute` (возможные сокращения: `mi`, `n`) - `hour` (возможные сокращения: `hh`, `h`) @@ -708,6 +714,8 @@ date_diff('unit', startdate, enddate, [timezone]) - `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). Возможные значения: + - `microsecond` (возможные сокращения: `us`, `u`) + - `millisecond` (возможные сокращения: `ms`) - `second` (возможные сокращения: `ss`, `s`) - `minute` (возможные сокращения: `mi`, `n`) - `hour` (возможные сокращения: `hh`, `h`) diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index 9638e25d488..276dfc2ef20 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -1113,3 +1113,50 @@ A text with tags . The content within CDATA Do Nothing for 2 Minutes 2:00   ``` + +## initcap {#initcap} + +Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами. + +## initcapUTF8 {#initcapUTF8} + +Как [initcap](#initcap), предполагая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. +Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным. +Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным. +Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. + +## firstLine + +Возвращает первую строку в многострочном тексте. + +**Синтаксис** + +```sql +firstLine(val) +``` + +**Аргументы** + +- `val` - текст для обработки. [String](../data-types/string.md) + +**Returned value** + +- Первая строка текста или весь текст, если переносы строк отсутствуют. + +Тип: [String](../data-types/string.md) + +**Пример** + +Запрос: + +```sql +select firstLine('foo\nbar\nbaz'); +``` + +Результат: + +```result +┌─firstLine('foo\nbar\nbaz')─┐ +│ foo │ +└────────────────────────────┘ +``` diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index ea4f90d4f66..6e3830869cd 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -801,3 +801,55 @@ SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв'); │ 3 │ └────────────────────────────────────────────────────────────┘ ``` + +## hasSubsequence(haystack, needle) {#hasSubsequence} + +Возвращает 1 если needle является подпоследовательностью haystack, иначе 0. + + +**Синтаксис** + +``` sql +hasSubsequence(haystack, needle) +``` + +**Аргументы** + +- `haystack` — строка, по которой выполняется поиск. [Строка](../syntax.md#syntax-string-literal). +- `needle` — подпоследовательность, которую необходимо найти. [Строка](../syntax.md#syntax-string-literal). + +**Возвращаемые значения** + +- 1, если +- 0, если подстрока не найдена. + +Тип: `UInt8`. + +**Примеры** + +Запрос: + +``` sql +SELECT hasSubsequence('garbage', 'arg') ; +``` + +Результат: + +``` text +┌─hasSubsequence('garbage', 'arg')─┐ +│ 1 │ +└──────────────────────────────────┘ +``` + + +## hasSubsequenceCaseInsensitive + +Такая же, как и [hasSubsequence](#hasSubsequence), но работает без учета регистра. + +## hasSubsequenceUTF8 + +Такая же, как и [hasSubsequence](#hasSubsequence) при допущении что `haystack` и `needle` содержат набор кодовых точек, представляющий текст в кодировке UTF-8. + +## hasSubsequenceCaseInsensitiveUTF8 + +Такая же, как и [hasSubsequenceUTF8](#hasSubsequenceUTF8), но работает без учета регистра. diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index d5e6246fe9e..e53104d8d71 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -284,7 +284,13 @@ toDateTime(expr[, time_zone ]) - `expr` — Значение для преобразования. [String](/docs/ru/sql-reference/data-types/string.md), [Int](/docs/ru/sql-reference/data-types/int-uint.md), [Date](/docs/ru/sql-reference/data-types/date.md) или [DateTime](/docs/ru/sql-reference/data-types/datetime.md). - `time_zone` — Часовой пояс. [String](/docs/ru/sql-reference/data-types/string.md). -Если `expr` является числом, оно интерпретируется как количество секунд от начала unix эпохи. +:::note +Если `expr` является числом, то оно интерпретируется как число секунд с начала Unix-эпохи (Unix Timestamp). + +Если же `expr` -- [строка (String)](/docs/ru/sql-reference/data-types/string.md), то оно может быть интерпретировано и как Unix Timestamp, и как строковое представление даты / даты со временем. +Ввиду неоднозначности запрещён парсинг строк длиной 4 и меньше. Так, строка `'1999'` могла бы представлять собой как год (неполное строковое представление даты или даты со временем), так и Unix Timestamp. +Строки длиной 5 символов и более не несут неоднозначности, а следовательно, их парсинг разрешён. +::: **Возвращаемое значение** diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index a8ace213075..92be30b101a 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -182,7 +182,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) Синтаксис: ```sql -ALTER TABLE table_name MODIFY column_name REMOVE property; +ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property; ``` **Пример** diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index d3846aac289..1a60dc0716c 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -73,7 +73,7 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view). ::: ```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... ``` `LIVE VIEW` хранит результат запроса [SELECT](../../../sql-reference/statements/select/index.md), указанного при создании, и обновляется сразу же при изменении этого результата. Конечный результат запроса и промежуточные данные, из которых формируется результат, хранятся в оперативной памяти, и это обеспечивает высокую скорость обработки для повторяющихся запросов. LIVE-представления могут отправлять push-уведомления при изменении результата исходного запроса `SELECT`. Для этого используйте запрос [WATCH](../../../sql-reference/statements/watch.md). diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 4fa6ac4ce66..747e36b8809 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -11,7 +11,7 @@ sidebar_label: INSERT INTO **Синтаксис** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). @@ -100,7 +100,7 @@ INSERT INTO t FORMAT TabSeparated **Синтаксис** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ... ``` Соответствие столбцов определяется их позицией в секции SELECT. При этом, их имена в выражении SELECT и в таблице для INSERT, могут отличаться. При необходимости выполняется приведение типов данных, эквивалентное соответствующему оператору CAST. @@ -120,7 +120,7 @@ INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... **Синтаксис** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name ``` Используйте этот синтаксис, чтобы вставить данные из файла, который хранится на стороне **клиента**. `file_name` и `type` задаются в виде строковых литералов. [Формат](../../interfaces/formats.md) входного файла должен быть задан в секции `FORMAT`. diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md index 0983c51d954..83ef115aacd 100644 --- a/docs/ru/sql-reference/table-functions/file.md +++ b/docs/ru/sql-reference/table-functions/file.md @@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U - `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов. - `?` — заменяет ровно один любой символ. -- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. +- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`. - `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули). Конструкция с `{}` аналогична табличной функции [remote](remote.md). diff --git a/docs/zh/engines/table-engines/special/buffer.md b/docs/zh/engines/table-engines/special/buffer.md index bb95ecdc583..f92a819f3c3 100644 --- a/docs/zh/engines/table-engines/special/buffer.md +++ b/docs/zh/engines/table-engines/special/buffer.md @@ -5,7 +5,7 @@ slug: /zh/engines/table-engines/special/buffer 缓冲数据写入 RAM 中,周期性地将数据刷新到另一个表。在读取操作时,同时从缓冲区和另一个表读取数据。 - Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) + Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]]) 引擎的参数:database,table - 要刷新数据的表。可以使用返回字符串的常量表达式而不是数据库名称。 num_layers - 并行层数。在物理上,该表将表示为 num_layers 个独立缓冲区。建议值为16。min_time,max_time,min_rows,max_rows,min_bytes,max_bytes - 从缓冲区刷新数据的条件。 diff --git a/docs/zh/getting-started/example-datasets/wikistat.md b/docs/zh/getting-started/example-datasets/wikistat.md deleted file mode 100644 index 4ce13b0f1d3..00000000000 --- a/docs/zh/getting-started/example-datasets/wikistat.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -slug: /zh/getting-started/example-datasets/wikistat -sidebar_position: 17 -sidebar_label: WikiStat ---- - -# WikiStat {#wikistat} - -参考: http://dumps.wikimedia.org/other/pagecounts-raw/ - -创建表结构: - -``` sql -CREATE TABLE wikistat -( - date Date, - time DateTime, - project String, - subproject String, - path String, - hits UInt64, - size UInt64 -) ENGINE = MergeTree(date, (path, time), 8192); -``` - -加载数据: - -``` bash -$ for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt -$ cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done -$ ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done -``` diff --git a/docs/zh/getting-started/example-datasets/wikistat.md b/docs/zh/getting-started/example-datasets/wikistat.md new file mode 120000 index 00000000000..2d429d00984 --- /dev/null +++ b/docs/zh/getting-started/example-datasets/wikistat.md @@ -0,0 +1 @@ +../../../en/getting-started/example-datasets/wikistat.md \ No newline at end of file diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md index eedc913cf82..3f42f3f8da4 100644 --- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md +++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md @@ -346,9 +346,7 @@ UserID.bin,URL.bin,和EventTime.bin是UserID - 我们将主键列(UserID, URL)中的一些列值标记为橙色。 - 这些橙色标记的列值是每个颗粒中每个主键列的最小值。这里的例外是最后一个颗粒(上图中的颗粒1082),最后一个颗粒我们标记的是最大的值。 - - 正如我们将在下面看到的,这些橙色标记的列值将是表主索引中的条目。 + 这些橙色标记的列值是每个颗粒中第一行的主键列值。正如我们将在下面看到的,这些橙色标记的列值将是表主索引中的条目。 - 我们从0开始对行进行编号,以便与ClickHouse内部行编号方案对齐,该方案也用于记录消息。 ::: @@ -1071,13 +1069,6 @@ ClickHouse服务器日志文件中相应的跟踪日志确认了ClickHouse正在 ## 通过projections使用联合主键索引 -Projections目前是一个实验性的功能,因此我们需要告诉ClickHouse: - -```sql -SET optimize_use_projections = 1; -``` - - 在原表上创建projection: ```sql ALTER TABLE hits_UserID_URL @@ -1096,10 +1087,12 @@ ALTER TABLE hits_UserID_URL :::note - 该projection正在创建一个隐藏表,该表的行顺序和主索引基于该projection的给定order BY子句 -- 我们使用MATERIALIZE关键字,以便立即用源表hits_UserID_URL的所有887万行导入隐藏表 +- `SHOW TABLES` 语句查询是不会列出这个隐藏表的 +- 我们使用`MATERIALIZE`关键字,以便立即用源表hits_UserID_URL的所有887万行导入隐藏表 - 如果在源表hits_UserID_URL中插入了新行,那么这些行也会自动插入到隐藏表中 - 查询总是(从语法上)针对源表hits_UserID_URL,但是如果隐藏表的行顺序和主索引允许更有效地执行查询,那么将使用该隐藏表 -- 实际上,隐式创建的隐藏表的行顺序和主索引与我们显式创建的辅助表相同: +- 请注意,投影(projections)不会使 `ORDER BY` 查询语句的效率更高,即使 `ORDER BY` 匹配上了 projection 的 `ORDER BY` 语句(请参阅:https://github.com/ClickHouse/ClickHouse/issues/47333) +- 实际上,隐式创建的隐藏表的行顺序和主索引与我们显式创建的辅助表相同: @@ -1163,7 +1156,7 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引 ``` -## 移除无效的主键列 +## 小结 带有联合主键(UserID, URL)的表的主索引对于加快UserID的查询过滤非常有用。但是,尽管URL列是联合主键的一部分,但该索引在加速URL查询过滤方面并没有提供显著的帮助。 @@ -1176,4 +1169,12 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引 但是,如果复合主键中的键列在基数上有很大的差异,那么查询按基数升序对主键列进行排序是有益的。 -主键键列之间的基数差越大,主键键列的顺序越重要。我们将在以后的文章中对此进行演示。请继续关注。 +主键键列之间的基数差得越大,主键中的列的顺序越重要。我们将在下一章节对此进行演示。 + +# 高效地为键列排序 + +TODO + +# 高效地识别单行 + +TODO diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md index c7a0f355a92..f84768beccc 100644 --- a/docs/zh/interfaces/http.md +++ b/docs/zh/interfaces/http.md @@ -53,7 +53,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} 1 ``` @@ -262,9 +262,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 您可以在`X-ClickHouse-Progress`响应头中收到查询进度的信息。为此,启用[Http Header携带进度](../operations/settings/settings.md#settings-send_progress_in_http_headers)。示例: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"} ``` 显示字段信息: @@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query' < X-ClickHouse-Format: Template < X-ClickHouse-Timezone: Asia/Shanghai < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < # HELP "Query" "Number of executing queries" # TYPE "Query" counter @@ -521,7 +521,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact Say Hi!% @@ -561,7 +561,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact
% @@ -613,7 +613,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Absolute Path File * Connection #0 to host localhost left intact @@ -632,7 +632,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Relative Path File * Connection #0 to host localhost left intact diff --git a/docs/zh/operations/optimizing-performance/profile-guided-optimization.md b/docs/zh/operations/optimizing-performance/profile-guided-optimization.md new file mode 120000 index 00000000000..31cb656bd99 --- /dev/null +++ b/docs/zh/operations/optimizing-performance/profile-guided-optimization.md @@ -0,0 +1 @@ +../../../en/operations/optimizing-performance/profile-guided-optimization.md \ No newline at end of file diff --git a/docs/zh/operations/server-configuration-parameters/settings.md b/docs/zh/operations/server-configuration-parameters/settings.md index f6106d8734e..8e2cb389f04 100644 --- a/docs/zh/operations/server-configuration-parameters/settings.md +++ b/docs/zh/operations/server-configuration-parameters/settings.md @@ -455,7 +455,7 @@ SSL客户端/服务器配置。 - verificationMode – The method for checking the node’s certificates. Details are in the description of the [A.背景](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) 同学们 可能的值: `none`, `relaxed`, `strict`, `once`. - verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. - loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `true`, `false`. \| -- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. +- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH`. - cacheSessions – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. 可接受的值: `true`, `false`. - sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. 始终建议使用此参数,因为如果服务器缓存会话,以及客户端请求缓存,它有助于避免出现问题。 默认值: `${application.name}`. - sessionCacheSize – The maximum number of sessions that the server caches. Default value: 1024\*20. 0 – Unlimited sessions. diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index 53dadc23c6d..e4b70322477 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -643,6 +643,8 @@ date_diff('unit', startdate, enddate, [timezone]) - `unit` — `value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。 可能的值: + - `microsecond` + - `millisecond` - `second` - `minute` - `hour` diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index 8ce2d20a10c..bce0994ecd2 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -72,7 +72,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中 使用[allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view)设置启用实时视图和`WATCH`查询的使用。 输入命令`set allow_experimental_live_view = 1`。 ```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... ``` 实时视图存储相应[SELECT](../../../sql-reference/statements/select/index.md)查询的结果,并在查询结果更改时随时更新。 查询结果以及与新数据结合所需的部分结果存储在内存中,为重复查询提供更高的性能。当使用[WATCH](../../../sql-reference/statements/watch.md)查询更改查询结果时,实时视图可以提供推送通知。 diff --git a/docs/zh/sql-reference/statements/insert-into.md b/docs/zh/sql-reference/statements/insert-into.md index 9acc1655f9a..f80c0a8a8ea 100644 --- a/docs/zh/sql-reference/statements/insert-into.md +++ b/docs/zh/sql-reference/statements/insert-into.md @@ -8,7 +8,7 @@ INSERT INTO 语句主要用于向系统中添加数据. 查询的基本格式: ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` 您可以在查询中指定要插入的列的列表,如:`[(c1, c2, c3)]`。您还可以使用列[匹配器](../../sql-reference/statements/select/index.md#asterisk)的表达式,例如`*`和/或[修饰符](../../sql-reference/statements/select/index.md#select-modifiers),例如 [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier)。 @@ -71,7 +71,7 @@ INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set 例如,下面的查询所使用的输入格式就与上面INSERT … VALUES的中使用的输入格式相同: ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... ``` ClickHouse会清除数据前所有的空白字符与一个换行符(如果有换行符的话)。所以在进行查询时,我们建议您将数据放入到输入输出格式名称后的新的一行中去(如果数据是以空白字符开始的,这将非常重要)。 @@ -93,7 +93,7 @@ INSERT INTO t FORMAT TabSeparated ### 使用`SELECT`的结果写入 {#inserting-the-results-of-select} ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ... ``` 写入与SELECT的列的对应关系是使用位置来进行对应的,尽管它们在SELECT表达式与INSERT中的名称可能是不同的。如果需要,会对它们执行对应的类型转换。 diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service index 7742d8b278a..42dc5bd380d 100644 --- a/packages/clickhouse-server.service +++ b/packages/clickhouse-server.service @@ -29,6 +29,7 @@ EnvironmentFile=-/etc/default/clickhouse LimitCORE=infinity LimitNOFILE=500000 CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE +AmbientCapabilities=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE [Install] # ClickHouse should not start from the rescue shell (rescue.target). diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml index 018e88ef828..66299fddd4a 100644 --- a/packages/clickhouse-server.yaml +++ b/packages/clickhouse-server.yaml @@ -55,6 +55,9 @@ contents: - src: clickhouse dst: /usr/bin/clickhouse-keeper type: symlink +- src: clickhouse + dst: /usr/bin/clickhouse-keeper-client + type: symlink - src: root/usr/bin/clickhouse-report dst: /usr/bin/clickhouse-report - src: root/usr/bin/clickhouse-server diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 466a0c194f7..c5acd10f791 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -1,8 +1,6 @@ #include #include -#include #include -#include #include #include #include @@ -18,9 +16,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -38,8 +34,6 @@ #include -namespace fs = std::filesystem; - /** A tool for evaluating ClickHouse performance. * The tool emulates a case with fixed amount of simultaneously executing queries. */ @@ -79,7 +73,6 @@ public: bool randomize_, size_t max_iterations_, double max_time_, - const String & json_path_, size_t confidence_, const String & query_id_, const String & query_to_execute_, @@ -98,7 +91,6 @@ public: cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_), - json_path(json_path_), confidence(confidence_), query_id(query_id_), query_to_execute(query_to_execute_), @@ -165,9 +157,6 @@ public: int main(const std::vector &) override { - if (!json_path.empty() && fs::exists(json_path)) /// Clear file with previous results - fs::remove(json_path); - readQueries(); runBenchmark(); return 0; @@ -197,7 +186,6 @@ private: bool cumulative; size_t max_iterations; double max_time; - String json_path; size_t confidence; String query_id; String query_to_execute; @@ -226,26 +214,23 @@ private: size_t read_bytes = 0; size_t result_rows = 0; size_t result_bytes = 0; - double work_time = 0; using Sampler = ReservoirSampler; Sampler sampler {1 << 16}; - void add(double seconds, size_t read_rows_inc, size_t read_bytes_inc, size_t result_rows_inc, size_t result_bytes_inc) + void add(double duration, size_t read_rows_inc, size_t read_bytes_inc, size_t result_rows_inc, size_t result_bytes_inc) { ++queries; - work_time += seconds; read_rows += read_rows_inc; read_bytes += read_bytes_inc; result_rows += result_rows_inc; result_bytes += result_bytes_inc; - sampler.insert(seconds); + sampler.insert(duration); } void clear() { queries = 0; - work_time = 0; read_rows = 0; read_bytes = 0; result_rows = 0; @@ -331,10 +316,13 @@ private: return false; } - if (delay > 0 && delay_watch.elapsedSeconds() > delay) + double seconds = delay_watch.elapsedSeconds(); + if (delay > 0 && seconds > delay) { printNumberOfQueriesExecuted(queries_executed); - cumulative ? report(comparison_info_total) : report(comparison_info_per_interval); + cumulative + ? report(comparison_info_total, total_watch.elapsedSeconds()) + : report(comparison_info_per_interval, seconds); delay_watch.restart(); } } @@ -350,16 +338,7 @@ private: try { for (size_t i = 0; i < concurrency; ++i) - { - EntryPtrs connection_entries; - connection_entries.reserve(connections.size()); - - for (const auto & connection : connections) - connection_entries.emplace_back(std::make_shared( - connection->get(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings)))); - - pool.scheduleOrThrowOnError([this, connection_entries]() mutable { thread(connection_entries); }); - } + pool.scheduleOrThrowOnError([this]() mutable { thread(); }); } catch (...) { @@ -389,21 +368,18 @@ private: pool.wait(); total_watch.stop(); - if (!json_path.empty()) - reportJSON(comparison_info_total, json_path); - printNumberOfQueriesExecuted(queries_executed); - report(comparison_info_total); + report(comparison_info_total, total_watch.elapsedSeconds()); } - void thread(EntryPtrs & connection_entries) + void thread() { Query query; /// Randomly choosing connection index pcg64 generator(randomSeed()); - std::uniform_int_distribution distribution(0, connection_entries.size() - 1); + std::uniform_int_distribution distribution(0, connections.size() - 1); /// In these threads we do not accept INT signal. sigset_t sig_set; @@ -423,15 +399,13 @@ private: extracted = queue.tryPop(query, 100); if (shutdown || (max_iterations && queries_executed == max_iterations)) - { return; - } } const auto connection_index = distribution(generator); try { - execute(connection_entries, query, connection_index); + execute(query, connection_index); consecutive_errors = 0; } catch (...) @@ -460,17 +434,18 @@ private: } } - void execute(EntryPtrs & connection_entries, Query & query, size_t connection_index) + void execute(Query & query, size_t connection_index) { Stopwatch watch; - Connection & connection = **connection_entries[connection_index]; + ConnectionPool::Entry entry = connections[connection_index]->get( + ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings)); if (reconnect) - connection.disconnect(); + entry->disconnect(); RemoteQueryExecutor executor( - connection, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage); + *entry, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage); if (!query_id.empty()) executor.setQueryId(query_id); @@ -485,19 +460,19 @@ private: executor.finish(); - double seconds = (display_client_side_time || progress.elapsed_ns == 0) + double duration = (display_client_side_time || progress.elapsed_ns == 0) ? watch.elapsedSeconds() : progress.elapsed_ns / 1e9; std::lock_guard lock(mutex); size_t info_index = round_robin ? 0 : connection_index; - comparison_info_per_interval[info_index]->add(seconds, progress.read_rows, progress.read_bytes, info.rows, info.bytes); - comparison_info_total[info_index]->add(seconds, progress.read_rows, progress.read_bytes, info.rows, info.bytes); - t_test.add(info_index, seconds); + comparison_info_per_interval[info_index]->add(duration, progress.read_rows, progress.read_bytes, info.rows, info.bytes); + comparison_info_total[info_index]->add(duration, progress.read_rows, progress.read_bytes, info.rows, info.bytes); + t_test.add(info_index, duration); } - void report(MultiStats & infos) + void report(MultiStats & infos, double seconds) { std::lock_guard lock(mutex); @@ -510,8 +485,6 @@ private: if (0 == info->queries) return; - double seconds = info->work_time / concurrency; - std::string connection_description = connections[i]->getDescription(); if (round_robin) { @@ -525,10 +498,10 @@ private: } std::cerr << connection_description << ", " - << "queries " << info->queries << ", "; + << "queries: " << info->queries << ", "; if (info->errors) { - std::cerr << "errors " << info->errors << ", "; + std::cerr << "errors: " << info->errors << ", "; } std::cerr << "QPS: " << (info->queries / seconds) << ", " @@ -567,62 +540,6 @@ private: } } - void reportJSON(MultiStats & infos, const std::string & filename) - { - WriteBufferFromFile json_out(filename); - - std::lock_guard lock(mutex); - - auto print_key_value = [&](auto key, auto value, bool with_comma = true) - { - json_out << double_quote << key << ": " << value << (with_comma ? ",\n" : "\n"); - }; - - auto print_percentile = [&json_out](Stats & info, auto percent, bool with_comma = true) - { - json_out << "\"" << percent << "\": " << info.sampler.quantileNearest(percent / 100.0) << (with_comma ? ",\n" : "\n"); - }; - - json_out << "{\n"; - - for (size_t i = 0; i < infos.size(); ++i) - { - const auto & info = infos[i]; - - json_out << double_quote << connections[i]->getDescription() << ": {\n"; - json_out << double_quote << "statistics" << ": {\n"; - - double seconds = info->work_time / concurrency; - - print_key_value("QPS", info->queries.load() / seconds); - print_key_value("RPS", info->read_rows / seconds); - print_key_value("MiBPS", info->read_bytes / seconds / 1048576); - print_key_value("RPS_result", info->result_rows / seconds); - print_key_value("MiBPS_result", info->result_bytes / seconds / 1048576); - print_key_value("num_queries", info->queries.load()); - print_key_value("num_errors", info->errors, false); - - json_out << "},\n"; - json_out << double_quote << "query_time_percentiles" << ": {\n"; - - if (info->queries != 0) - { - for (int percent = 0; percent <= 90; percent += 10) - print_percentile(*info, percent); - - print_percentile(*info, 95); - print_percentile(*info, 99); - print_percentile(*info, 99.9); - print_percentile(*info, 99.99, false); - } - - json_out << "}\n"; - json_out << (i == infos.size() - 1 ? "}\n" : "},\n"); - } - - json_out << "}\n"; - } - public: ~Benchmark() override @@ -675,7 +592,6 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) ("iterations,i", value()->default_value(0), "amount of queries to be executed") ("timelimit,t", value()->default_value(0.), "stop launch of queries after specified time limit") ("randomize,r", "randomize order of execution") - ("json", value()->default_value(""), "write final report to specified file in JSON format") ("host,h", value()->multitoken(), "list of hosts") ("port", value()->multitoken(), "list of ports") ("roundrobin", "Instead of comparing queries for different --host/--port just pick one random --host/--port for every query and send query to it.") @@ -739,7 +655,6 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options.count("randomize"), options["iterations"].as(), options["timelimit"].as(), - options["json"].as(), options["confidence"].as(), options["query_id"].as(), options["query"].as(), diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 19b601b9a7b..e73f77819ad 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -812,6 +812,11 @@ bool Client::processWithFuzzing(const String & full_query) } catch (...) { + if (!ast_to_process) + fmt::print(stderr, + "Error while forming new query: {}\n", + getCurrentExceptionMessage(true)); + // Some functions (e.g. protocol parsers) don't throw, but // set last_exception instead, so we'll also do it here for // uniformity. @@ -1173,12 +1178,12 @@ void Client::processOptions(const OptionsDescription & options_description, { String traceparent = options["opentelemetry-traceparent"].as(); String error; - if (!global_context->getClientInfo().client_trace_context.parseTraceparentHeader(traceparent, error)) + if (!global_context->getClientTraceContext().parseTraceparentHeader(traceparent, error)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse OpenTelemetry traceparent '{}': {}", traceparent, error); } if (options.count("opentelemetry-tracestate")) - global_context->getClientInfo().client_trace_context.tracestate = options["opentelemetry-tracestate"].as(); + global_context->getClientTraceContext().tracestate = options["opentelemetry-tracestate"].as(); } @@ -1238,10 +1243,9 @@ void Client::processConfig() global_context->getSettingsRef().max_insert_block_size); } - ClientInfo & client_info = global_context->getClientInfo(); - client_info.setInitialQuery(); - client_info.quota_key = config().getString("quota_key", ""); - client_info.query_kind = query_kind; + global_context->setQueryKindInitial(); + global_context->setQuotaClientKey(config().getString("quota_key", "")); + global_context->setQueryKind(query_kind); } @@ -1404,10 +1408,9 @@ void Client::readArguments( else if (arg == "--password" && ((arg_num + 1) >= argc || std::string_view(argv[arg_num + 1]).starts_with('-'))) { common_arguments.emplace_back(arg); - /// No password was provided by user. Add '\n' as implicit password, - /// which encodes that client should ask user for the password. - /// '\n' is used because there is hardly a chance that a user would use '\n' as a password. - common_arguments.emplace_back("\n"); + /// if the value of --password is omitted, the password will be asked before + /// connection start + common_arguments.emplace_back(ConnectionParameters::ASK_PASSWORD); } else common_arguments.emplace_back(arg); diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index efe7121cace..556eca808f6 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -608,6 +608,8 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t ss << "ALTER TABLE " << getQuotedTable(original_table) << ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") << partition_name; UInt64 num_shards_drop_partition = executeQueryOnCluster(task_table.cluster_push, ss.str(), task_cluster->settings_push, ClusterExecutionMode::ON_EACH_SHARD); + if (num_shards_drop_partition != task_table.cluster_push->getShardCount()) + return TaskStatus::Error; LOG_INFO(log, "Drop partition {} in original table {} have been executed successfully on {} shards of {}", partition_name, getQuotedTable(original_table), num_shards_drop_partition, task_table.cluster_push->getShardCount()); diff --git a/programs/disks/ICommand.cpp b/programs/disks/ICommand.cpp index 52d1a2196a9..86188fb6db1 100644 --- a/programs/disks/ICommand.cpp +++ b/programs/disks/ICommand.cpp @@ -1,4 +1,6 @@ #include "ICommand.h" +#include + namespace DB { diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 43c66a32302..d7d61bbcd3b 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -163,13 +163,15 @@ int mainEntryClickHouseFormat(int argc, char ** argv) { ASTPtr res = parseQueryAndMovePosition( parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth); - /// For insert query with data(INSERT INTO ... VALUES ...), will lead to format fail, - /// should throw exception early and make exception message more readable. + + /// For insert query with data(INSERT INTO ... VALUES ...), that will lead to the formatting failure, + /// we should throw an exception early, and make exception message more readable. if (const auto * insert_query = res->as(); insert_query && insert_query->data) { throw Exception(DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA, "Can't format ASTInsertQuery with data, since data will be lost"); } + if (!quiet) { if (!backslash) diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index d83e189f7ef..e10a9fea86b 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -20,10 +20,7 @@ #include #include #include -#include -#include #include -#include #include #include #include @@ -35,6 +32,14 @@ #include +#include + +#include "config.h" + +/// Embedded configuration files used inside the install program +INCBIN(resource_config_xml, SOURCE_DIR "/programs/server/config.xml"); +INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml"); + /** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary. * It also allows to avoid dependency on systemd, upstart, SysV init. @@ -560,7 +565,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (!fs::exists(main_config_file)) { - std::string_view main_config_content = getResource("config.xml"); + std::string_view main_config_content(reinterpret_cast(gresource_config_xmlData), gresource_config_xmlSize); if (main_config_content.empty()) { fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string()); @@ -672,7 +677,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (!fs::exists(users_config_file)) { - std::string_view users_config_content = getResource("users.xml"); + std::string_view users_config_content(reinterpret_cast(gresource_users_xmlData), gresource_users_xmlSize); if (users_config_content.empty()) { fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string()); @@ -992,7 +997,9 @@ namespace { /// sudo respects limits in /etc/security/limits.conf e.g. open files, /// that's why we are using it instead of the 'clickhouse su' tool. - command = fmt::format("sudo -u '{}' {}", user, command); + /// by default, sudo resets all the ENV variables, but we should preserve + /// the values /etc/default/clickhouse in /etc/init.d/clickhouse file + command = fmt::format("sudo --preserve-env -u '{}' {}", user, command); } fmt::print("Will run {}\n", command); diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp index 05928a0d20b..c822a631798 100644 --- a/programs/keeper-client/Commands.cpp +++ b/programs/keeper-client/Commands.cpp @@ -1,5 +1,6 @@ #include "Commands.h" +#include #include "KeeperClient.h" @@ -24,8 +25,18 @@ void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con else path = client->cwd; - for (const auto & child : client->zookeeper->getChildren(path)) - std::cout << child << " "; + auto children = client->zookeeper->getChildren(path); + std::sort(children.begin(), children.end()); + + bool need_space = false; + for (const auto & child : children) + { + if (std::exchange(need_space, true)) + std::cout << " "; + + std::cout << child; + } + std::cout << "\n"; } @@ -77,7 +88,7 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co client->zookeeper->set( client->getAbsolutePath(query->args[0].safeGet()), query->args[1].safeGet(), - static_cast(query->args[2].safeGet())); + static_cast(query->args[2].get())); } bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const @@ -115,6 +126,21 @@ void CreateCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) static_cast(query->args[2].safeGet())); } +bool TouchCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String arg; + if (!parseKeeperPath(pos, expected, arg)) + return false; + node->args.push_back(std::move(arg)); + + return true; +} + +void TouchCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + client->zookeeper->createIfNotExists(client->getAbsolutePath(query->args[0].safeGet()), ""); +} + bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { String arg; @@ -130,6 +156,173 @@ void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet())) << "\n"; } +bool GetStatCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String arg; + if (!parseKeeperPath(pos, expected, arg)) + return true; + + node->args.push_back(std::move(arg)); + return true; +} + +void GetStatCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + Coordination::Stat stat; + String path; + if (!query->args.empty()) + path = client->getAbsolutePath(query->args[0].safeGet()); + else + path = client->cwd; + + client->zookeeper->get(path, &stat); + + std::cout << "cZxid = " << stat.czxid << "\n"; + std::cout << "mZxid = " << stat.mzxid << "\n"; + std::cout << "pZxid = " << stat.pzxid << "\n"; + std::cout << "ctime = " << stat.ctime << "\n"; + std::cout << "mtime = " << stat.mtime << "\n"; + std::cout << "version = " << stat.version << "\n"; + std::cout << "cversion = " << stat.cversion << "\n"; + std::cout << "aversion = " << stat.aversion << "\n"; + std::cout << "ephemeralOwner = " << stat.ephemeralOwner << "\n"; + std::cout << "dataLength = " << stat.dataLength << "\n"; + std::cout << "numChildren = " << stat.numChildren << "\n"; +} + +bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + ASTPtr threshold; + if (!ParserUnsignedInteger{}.parse(pos, threshold, expected)) + return false; + + node->args.push_back(threshold->as().value); + + String path; + if (!parseKeeperPath(pos, expected, path)) + path = "."; + + node->args.push_back(std::move(path)); + return true; +} + +void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + auto threshold = query->args[0].safeGet(); + auto path = client->getAbsolutePath(query->args[1].safeGet()); + + Coordination::Stat stat; + client->zookeeper->get(path, &stat); + + if (stat.numChildren >= static_cast(threshold)) + { + std::cout << static_cast(path) << "\t" << stat.numChildren << "\n"; + return; + } + + auto children = client->zookeeper->getChildren(path); + std::sort(children.begin(), children.end()); + for (const auto & child : children) + { + auto next_query = *query; + next_query.args[1] = DB::Field(path / child); + execute(&next_query, client); + } +} + +bool DeleteStaleBackups::parse(IParser::Pos & /* pos */, std::shared_ptr & /* node */, Expected & /* expected */) const +{ + return true; +} + +void DeleteStaleBackups::execute(const ASTKeeperQuery * /* query */, KeeperClient * client) const +{ + client->askConfirmation( + "You are going to delete all inactive backups in /clickhouse/backups.", + [client] + { + fs::path backup_root = "/clickhouse/backups"; + auto backups = client->zookeeper->getChildren(backup_root); + std::sort(backups.begin(), backups.end()); + + for (const auto & child : backups) + { + auto backup_path = backup_root / child; + std::cout << "Found backup " << backup_path << ", checking if it's active\n"; + + String stage_path = backup_path / "stage"; + auto stages = client->zookeeper->getChildren(stage_path); + + bool is_active = false; + for (const auto & stage : stages) + { + if (startsWith(stage, "alive")) + { + is_active = true; + break; + } + } + + if (is_active) + { + std::cout << "Backup " << backup_path << " is active, not going to delete\n"; + continue; + } + + std::cout << "Backup " << backup_path << " is not active, deleting it\n"; + client->zookeeper->removeRecursive(backup_path); + } + }); +} + +bool FindBigFamily::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String path; + if (!parseKeeperPath(pos, expected, path)) + path = "."; + + node->args.push_back(std::move(path)); + + ASTPtr count; + if (ParserUnsignedInteger{}.parse(pos, count, expected)) + node->args.push_back(count->as().value); + else + node->args.push_back(UInt64(10)); + + return true; +} + +void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + auto path = client->getAbsolutePath(query->args[0].safeGet()); + auto n = query->args[1].safeGet(); + + std::vector> result; + + std::queue queue; + queue.push(path); + while (!queue.empty()) + { + auto next_path = queue.front(); + queue.pop(); + + auto children = client->zookeeper->getChildren(next_path); + std::transform(children.cbegin(), children.cend(), children.begin(), [&](const String & child) { return next_path / child; }); + + auto response = client->zookeeper->get(children); + + for (size_t i = 0; i < response.size(); ++i) + { + result.emplace_back(response[i].stat.numChildren, children[i]); + queue.push(children[i]); + } + } + + std::sort(result.begin(), result.end(), std::greater()); + for (UInt64 i = 0; i < std::min(result.size(), static_cast(n)); ++i) + std::cout << std::get<1>(result[i]) << "\t" << std::get<0>(result[i]) << "\n"; +} + bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const { String arg; @@ -170,7 +363,7 @@ bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptrgetHelpMessage() << "\n"; + std::cout << pair.second->generateHelpString() << "\n"; } bool FourLetterWordCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const diff --git a/programs/keeper-client/Commands.h b/programs/keeper-client/Commands.h index e4debd53e42..7d64fef1738 100644 --- a/programs/keeper-client/Commands.h +++ b/programs/keeper-client/Commands.h @@ -21,6 +21,12 @@ public: virtual String getName() const = 0; virtual ~IKeeperClientCommand() = default; + + String generateHelpString() const + { + return fmt::vformat(getHelpMessage(), fmt::make_format_args(getName())); + } + }; using Command = std::shared_ptr; @@ -34,7 +40,7 @@ class LSCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "ls [path] -- Lists the nodes for the given path (default: cwd)"; } + String getHelpMessage() const override { return "{} [path] -- Lists the nodes for the given path (default: cwd)"; } }; class CDCommand : public IKeeperClientCommand @@ -45,7 +51,7 @@ class CDCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "cd [path] -- Change the working path (default `.`)"; } + String getHelpMessage() const override { return "{} [path] -- Change the working path (default `.`)"; } }; class SetCommand : public IKeeperClientCommand @@ -58,7 +64,7 @@ class SetCommand : public IKeeperClientCommand String getHelpMessage() const override { - return "set [version] -- Updates the node's value. Only update if version matches (default: -1)"; + return "{} [version] -- Updates the node's value. Only update if version matches (default: -1)"; } }; @@ -70,7 +76,18 @@ class CreateCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "create -- Creates new node"; } + String getHelpMessage() const override { return "{} [mode] -- Creates new node with the set value"; } +}; + +class TouchCommand : public IKeeperClientCommand +{ + String getName() const override { return "touch"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "{} -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists"; } }; class GetCommand : public IKeeperClientCommand @@ -81,9 +98,63 @@ class GetCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "get -- Returns the node's value"; } + String getHelpMessage() const override { return "{} -- Returns the node's value"; } }; +class GetStatCommand : public IKeeperClientCommand +{ + String getName() const override { return "get_stat"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "{} [path] -- Returns the node's stat (default `.`)"; } +}; + +class FindSuperNodes : public IKeeperClientCommand +{ + String getName() const override { return "find_super_nodes"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override + { + return "{} [path] -- Finds nodes with number of children larger than some threshold for the given path (default `.`)"; + } +}; + +class DeleteStaleBackups : public IKeeperClientCommand +{ + String getName() const override { return "delete_stale_backups"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override + { + return "{} -- Deletes ClickHouse nodes used for backups that are now inactive"; + } +}; + +class FindBigFamily : public IKeeperClientCommand +{ + String getName() const override { return "find_big_family"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override + { + return "{} [path] [n] -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)"; + } +}; + + class RMCommand : public IKeeperClientCommand { String getName() const override { return "rm"; } @@ -92,7 +163,7 @@ class RMCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "remove -- Remove the node"; } + String getHelpMessage() const override { return "{} -- Remove the node"; } }; class RMRCommand : public IKeeperClientCommand @@ -103,7 +174,7 @@ class RMRCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "rmr -- Recursively deletes path. Confirmation required"; } + String getHelpMessage() const override { return "{} -- Recursively deletes path. Confirmation required"; } }; class HelpCommand : public IKeeperClientCommand @@ -114,7 +185,7 @@ class HelpCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "help -- Prints this message"; } + String getHelpMessage() const override { return "{} -- Prints this message"; } }; class FourLetterWordCommand : public IKeeperClientCommand @@ -125,7 +196,7 @@ class FourLetterWordCommand : public IKeeperClientCommand void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; - String getHelpMessage() const override { return "flwc -- Executes four-letter-word command"; } + String getHelpMessage() const override { return "{} -- Executes four-letter-word command"; } }; } diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index f41dca1e27a..893be4d25d1 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -131,7 +131,7 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options) .binding("host")); options.addOption( - Poco::Util::Option("port", "p", "server port. default `2181`") + Poco::Util::Option("port", "p", "server port. default `9181`") .argument("") .binding("port")); @@ -176,7 +176,12 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */) std::make_shared(), std::make_shared(), std::make_shared(), + std::make_shared(), std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared(), std::make_shared(), std::make_shared(), std::make_shared(), @@ -266,8 +271,16 @@ void KeeperClient::runInteractive() LineReader::Patterns query_extenders = {"\\"}; LineReader::Patterns query_delimiters = {}; + char word_break_characters[] = " \t\v\f\a\b\r\n/"; - ReplxxLineReader lr(suggest, history_file, false, query_extenders, query_delimiters, {}); + ReplxxLineReader lr( + suggest, + history_file, + /* multiline= */ false, + query_extenders, + query_delimiters, + word_break_characters, + /* highlighter_= */ {}); lr.enableBracketedPaste(); while (true) @@ -299,7 +312,7 @@ int KeeperClient::main(const std::vector & /* args */) } auto host = config().getString("host", "localhost"); - auto port = config().getString("port", "2181"); + auto port = config().getString("port", "9181"); zk_args.hosts = {host + ":" + port}; zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000; zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000; diff --git a/programs/keeper-client/Parser.cpp b/programs/keeper-client/Parser.cpp index 0f3fc39704e..3420ccb2219 100644 --- a/programs/keeper-client/Parser.cpp +++ b/programs/keeper-client/Parser.cpp @@ -30,7 +30,7 @@ bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path) return parseIdentifierOrStringLiteral(pos, expected, path); String result; - while (pos->type == TokenType::BareWord || pos->type == TokenType::Slash || pos->type == TokenType::Dot) + while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream) { result.append(pos->begin, pos->end); ++pos; diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 18bdc8f317c..a43a312ba54 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -1,16 +1,3 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - -if (OS_LINUX) - set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") - # for some reason INTERFACE linkage doesn't work for standalone binary - set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") -endif () - -clickhouse_embed_binaries( - TARGET clickhouse_keeper_configs - RESOURCES keeper_config.xml keeper_embedded.xml -) - set(CLICKHOUSE_KEEPER_SOURCES Keeper.cpp ) @@ -29,11 +16,12 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_program_add(keeper) install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper) -add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs) if (BUILD_STANDALONE_KEEPER) # Straight list of all required sources set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperReconfiguration.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/RaftServerConfig.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/CoordinationSettings.cpp @@ -69,13 +57,16 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/IO/ReadBuffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPPathHints.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperTCPHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/TCPServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/NotFoundHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ProtocolServerAdapter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/CertificateReloader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp @@ -91,6 +82,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp @@ -212,7 +204,6 @@ if (BUILD_STANDALONE_KEEPER) ${LINK_RESOURCE_LIB_STANDALONE_KEEPER} ) - add_dependencies(clickhouse-keeper clickhouse_keeper_configs) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) if (SPLIT_DEBUG_SYMBOLS) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index db35e30bd30..4791f42312e 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -42,6 +42,7 @@ #if USE_SSL # include # include +# include #endif #include @@ -109,19 +110,18 @@ void Keeper::createServer(const std::string & listen_host, const char * port_nam } catch (const Poco::Exception &) { - std::string message = "Listen [" + listen_host + "]:" + std::to_string(port) + " failed: " + getCurrentExceptionMessage(false); - if (listen_try) { - LOG_WARNING(&logger(), "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to " + LOG_WARNING(&logger(), "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, " + "then consider to " "specify not disabled IPv4 or IPv6 address to listen in element of configuration " "file. Example for disabled IPv6: 0.0.0.0 ." " Example for disabled IPv4: ::", - message); + listen_host, port, getCurrentExceptionMessage(false)); } else { - throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false)); } } } @@ -149,7 +149,7 @@ int Keeper::run() } if (config().hasOption("version")) { - std::cout << DBMS_NAME << " keeper version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; + std::cout << VERSION_NAME << " keeper version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; return 0; } @@ -287,13 +287,27 @@ try std::string path; if (config().has("keeper_server.storage_path")) + { path = config().getString("keeper_server.storage_path"); + } + else if (std::filesystem::is_directory(std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination")) + { + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "By default 'keeper.storage_path' could be assigned to {}, but the directory {} already exists. Please specify 'keeper.storage_path' in the keeper configuration explicitly", + KEEPER_DEFAULT_PATH, String{std::filesystem::path{config().getString("path", DBMS_DEFAULT_PATH)} / "coordination"}); + } else if (config().has("keeper_server.log_storage_path")) + { path = std::filesystem::path(config().getString("keeper_server.log_storage_path")).parent_path(); + } else if (config().has("keeper_server.snapshot_storage_path")) + { path = std::filesystem::path(config().getString("keeper_server.snapshot_storage_path")).parent_path(); + } else - path = std::filesystem::path{KEEPER_DEFAULT_PATH}; + { + path = KEEPER_DEFAULT_PATH; + } std::filesystem::create_directories(path); @@ -329,6 +343,7 @@ try auto global_context = Context::createGlobal(shared_context.get()); global_context->makeGlobalContext(); + global_context->setApplicationType(Context::ApplicationType::KEEPER); global_context->setPath(path); global_context->setRemoteHostFilter(config()); @@ -364,7 +379,7 @@ try } /// Initialize keeper RAFT. Do nothing if no keeper_server in config. - global_context->initializeKeeperDispatcher(/* start_async = */ true); + global_context->initializeKeeperDispatcher(/* start_async = */ false); FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); auto config_getter = [&] () -> const Poco::Util::AbstractConfiguration & @@ -451,10 +466,20 @@ try zkutil::EventPtr unused_event = std::make_shared(); zkutil::ZooKeeperNodeCache unused_cache([] { return nullptr; }); + + const std::string cert_path = config().getString("openSSL.server.certificateFile", ""); + const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); + + std::vector extra_paths = {include_from_path}; + if (!cert_path.empty()) + extra_paths.emplace_back(cert_path); + if (!key_path.empty()) + extra_paths.emplace_back(key_path); + /// ConfigReloader have to strict parameters which are redundant in our case auto main_config_reloader = std::make_unique( config_path, - include_from_path, + extra_paths, config().getString("path", ""), std::move(unused_cache), unused_event, @@ -462,6 +487,10 @@ try { if (config->has("keeper_server")) global_context->updateKeeperConfiguration(*config); + +#if USE_SSL + CertificateReloader::instance().tryLoad(*config); +#endif }, /* already_loaded = */ false); /// Reload it right now (initial loading) diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 1cacc391ca5..dd0bf67cb64 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -13,10 +13,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES library-bridge.cpp ) -if (OS_LINUX) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") -endif () - clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) target_link_libraries(clickhouse-library-bridge PRIVATE diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 96924e3c8d9..b38e17ecade 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -266,6 +268,10 @@ void LocalServer::tryInitPath() global_context->setUserFilesPath(""); // user's files are everywhere + std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/"); + global_context->setUserScriptsPath(user_scripts_path); + fs::create_directories(user_scripts_path); + /// top_level_domains_lists const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/"); if (!top_level_domains_path.empty()) @@ -490,6 +496,17 @@ try applyCmdSettings(global_context); + /// try to load user defined executable functions, throw on error and die + try + { + global_context->loadOrReloadUserDefinedExecutableFunctions(config()); + } + catch (...) + { + tryLogCurrentException(&logger(), "Caught exception while loading user defined executable functions."); + throw; + } + if (is_interactive) { clearTerminal(); @@ -569,7 +586,9 @@ void LocalServer::processConfig() } print_stack_trace = config().getBool("stacktrace", false); - load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false); + const std::string clickhouse_dialect{"clickhouse"}; + load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false) + && config().getString("dialect", clickhouse_dialect) == clickhouse_dialect; auto logging = (config().has("logger.console") || config().has("logger.level") @@ -638,43 +657,66 @@ void LocalServer::processConfig() /// There is no need for concurrent queries, override max_concurrent_queries. global_context->getProcessList().setMaxSize(0); - /// Size of cache for uncompressed blocks. Zero means disabled. - String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", ""); - size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0); + const size_t memory_amount = getMemoryAmount(); + const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5); + const size_t max_cache_size = static_cast(memory_amount * cache_size_to_ram_max_ratio); + + String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY); + size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE); + if (uncompressed_cache_size > max_cache_size) + { + uncompressed_cache_size = max_cache_size; + LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } if (uncompressed_cache_size) global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size); - /// Size of cache for marks (index of MergeTree family of tables). - String mark_cache_policy = config().getString("mark_cache_policy", ""); - size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); + String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY); + size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE); + if (!mark_cache_size) + LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation."); + if (mark_cache_size > max_cache_size) + { + mark_cache_size = max_cache_size; + LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size)); + } if (mark_cache_size) global_context->setMarkCache(mark_cache_policy, mark_cache_size); - /// Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled. - size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", 0); + size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE); + if (index_uncompressed_cache_size > max_cache_size) + { + index_uncompressed_cache_size = max_cache_size; + LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } if (index_uncompressed_cache_size) global_context->setIndexUncompressedCache(index_uncompressed_cache_size); - /// Size of cache for index marks (index of MergeTree skip indices). - size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", 0); + size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE); + if (index_mark_cache_size > max_cache_size) + { + index_mark_cache_size = max_cache_size; + LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } if (index_mark_cache_size) global_context->setIndexMarkCache(index_mark_cache_size); - /// A cache for mmapped files. - size_t mmap_cache_size = config().getUInt64("mmap_cache_size", 1000); /// The choice of default is arbitrary. + size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE); + if (mmap_cache_size > max_cache_size) + { + mmap_cache_size = max_cache_size; + LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } if (mmap_cache_size) global_context->setMMappedFileCache(mmap_cache_size); + /// In Server.cpp (./clickhouse-server), we would initialize the query cache here. + /// Intentionally not doing this in clickhouse-local as it doesn't make sense. + #if USE_EMBEDDED_COMPILER - /// 128 MB - constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128; - size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default); - - constexpr size_t compiled_expression_cache_elements_size_default = 10000; - size_t compiled_expression_cache_elements_size - = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default); - - CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size); + size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE); + size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES); + CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements); #endif /// NOTE: it is important to apply any overrides before @@ -737,9 +779,8 @@ void LocalServer::processConfig() for (const auto & [key, value] : prompt_substitutions) boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value); - ClientInfo & client_info = global_context->getClientInfo(); - client_info.setInitialQuery(); - client_info.query_kind = query_kind; + global_context->setQueryKindInitial(); + global_context->setQueryKind(query_kind); } diff --git a/programs/main.cpp b/programs/main.cpp index 9a3ad47a86e..4af9e3a3067 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -466,6 +466,11 @@ int main(int argc_, char ** argv_) checkHarmfulEnvironmentVariables(argv_); #endif + /// This is used for testing. For example, + /// clickhouse-local should be able to run a simple query without throw/catch. + if (getenv("CLICKHOUSE_TERMINATE_ON_ANY_EXCEPTION")) // NOLINT(concurrency-mt-unsafe) + DB::terminate_on_any_exception = true; + /// Reset new handler to default (that throws std::bad_alloc) /// It is needed because LLVM library clobbers it. std::set_new_handler(nullptr); diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 3042ae2bb57..31288b4aa01 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -365,17 +365,14 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI hash.update(seed); hash.update(i); + const auto checksum = getSipHash128AsArray(hash); if (size >= 16) { - char * hash_dst = reinterpret_cast(std::min(pos, end - 16)); - hash.get128(hash_dst); + auto * hash_dst = std::min(pos, end - 16); + memcpy(hash_dst, checksum.data(), checksum.size()); } else - { - char value[16]; - hash.get128(value); - memcpy(dst, value, end - dst); - } + memcpy(dst, checksum.data(), end - dst); pos += 16; ++i; @@ -401,7 +398,7 @@ static void transformUUID(const UUID & src_uuid, UUID & dst_uuid, UInt64 seed) hash.update(reinterpret_cast(&src), sizeof(UUID)); /// Saving version and variant from an old UUID - hash.get128(reinterpret_cast(&dst)); + dst = hash.get128(); dst.items[1] = (dst.items[1] & 0x1fffffffffffffffull) | (src.items[1] & 0xe000000000000000ull); dst.items[0] = (dst.items[0] & 0xffffffffffff0fffull) | (src.items[0] & 0x000000000000f000ull); diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 118610e4dcd..56373601b95 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -15,12 +15,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES validateODBCConnectionString.cpp ) -if (OS_LINUX) - # clickhouse-odbc-bridge is always a separate binary. - # Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") -endif () - clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) target_link_libraries(clickhouse-odbc-bridge PRIVATE diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index 855973d10e1..b8241afa1eb 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -1,12 +1,8 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - set(CLICKHOUSE_SERVER_SOURCES MetricsTransmitter.cpp Server.cpp ) -set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") - set (CLICKHOUSE_SERVER_LINK PRIVATE clickhouse_aggregate_functions @@ -34,9 +30,3 @@ endif() clickhouse_program_add(server) install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) - -clickhouse_embed_binaries( - TARGET clickhouse_server_configs - RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js -) -add_dependencies(clickhouse-server-lib clickhouse_server_configs) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 071f7d3177e..96f89c867a2 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -88,7 +89,6 @@ #include #include #include -#include #include #include #include @@ -109,6 +109,7 @@ #if USE_SSL # include +# include #endif #if USE_GRPC @@ -128,6 +129,10 @@ # include #endif +#include +/// A minimal file used when the server is run without installation +INCBIN(resource_embedded_xml, SOURCE_DIR "/programs/server/embedded.xml"); + namespace CurrentMetrics { extern const Metric Revision; @@ -321,19 +326,18 @@ void Server::createServer( } catch (const Poco::Exception &) { - std::string message = "Listen [" + listen_host + "]:" + std::to_string(port) + " failed: " + getCurrentExceptionMessage(false); - if (listen_try) { - LOG_WARNING(&logger(), "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to " + LOG_WARNING(&logger(), "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, " + "then consider to " "specify not disabled IPv4 or IPv6 address to listen in element of configuration " "file. Example for disabled IPv6: 0.0.0.0 ." " Example for disabled IPv4: ::", - message); + listen_host, port, getCurrentExceptionMessage(false)); } else { - throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false)); } } } @@ -385,7 +389,7 @@ int Server::run() } if (config().hasOption("version")) { - std::cout << DBMS_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; + std::cout << VERSION_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; return 0; } return Application::run(); // NOLINT @@ -393,6 +397,7 @@ int Server::run() void Server::initialize(Poco::Util::Application & self) { + ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast(gresource_embedded_xmlData), gresource_embedded_xmlSize)); BaseDaemon::initialize(self); logger().information("starting up"); @@ -653,10 +658,10 @@ try global_context->addWarningMessage("Server was built with sanitizer. It will work slowly."); #endif - const auto memory_amount = getMemoryAmount(); + const size_t physical_server_memory = getMemoryAmount(); LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.", - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(physical_server_memory), getNumberOfPhysicalCPUCores(), // on ARM processors it can show only enabled at current moment cores std::thread::hardware_concurrency()); @@ -739,11 +744,13 @@ try [&]() -> std::vector { std::vector metrics; - metrics.reserve(servers_to_start_before_tables.size()); + + std::lock_guard lock(servers_lock); + metrics.reserve(servers_to_start_before_tables.size() + servers.size()); + for (const auto & server : servers_to_start_before_tables) metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); - std::lock_guard lock(servers_lock); for (const auto & server : servers) metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); return metrics; @@ -887,6 +894,7 @@ try #endif global_context->setRemoteHostFilter(config()); + global_context->setHTTPHeaderFilter(config()); std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)); fs::path path = path_str; @@ -1027,6 +1035,11 @@ try /// Initialize merge tree metadata cache if (config().has("merge_tree_metadata_cache")) { + global_context->addWarningMessage("The setting 'merge_tree_metadata_cache' is enabled." + " But the feature of 'metadata cache in RocksDB' is experimental and is not ready for production." + " The usage of this feature can lead to data corruption and loss. The setting should be disabled in production." + " See the corresponding report at https://github.com/ClickHouse/ClickHouse/issues/51182"); + fs::create_directories(path / "rocksdb/"); size_t size = config().getUInt64("merge_tree_metadata_cache.lru_cache_size", 256 << 20); bool continue_if_corrupted = config().getBool("merge_tree_metadata_cache.continue_if_corrupted", false); @@ -1100,9 +1113,18 @@ try SensitiveDataMasker::setInstance(std::make_unique(config(), "query_masking_rules")); } + const std::string cert_path = config().getString("openSSL.server.certificateFile", ""); + const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); + + std::vector extra_paths = {include_from_path}; + if (!cert_path.empty()) + extra_paths.emplace_back(cert_path); + if (!key_path.empty()) + extra_paths.emplace_back(key_path); + auto main_config_reloader = std::make_unique( config_path, - include_from_path, + extra_paths, config().getString("path", ""), std::move(main_config_zk_node_cache), main_config_zk_changed_event, @@ -1114,9 +1136,10 @@ try server_settings_.loadSettingsFromConfig(*config); size_t max_server_memory_usage = server_settings_.max_server_memory_usage; - double max_server_memory_usage_to_ram_ratio = server_settings_.max_server_memory_usage_to_ram_ratio; - size_t default_max_server_memory_usage = static_cast(memory_amount * max_server_memory_usage_to_ram_ratio); + + size_t current_physical_server_memory = getMemoryAmount(); /// With cgroups, the amount of memory available to the server can be changed dynamically. + size_t default_max_server_memory_usage = static_cast(current_physical_server_memory * max_server_memory_usage_to_ram_ratio); if (max_server_memory_usage == 0) { @@ -1124,7 +1147,7 @@ try LOG_INFO(log, "Setting max_server_memory_usage was set to {}" " ({} available * {:.2f} max_server_memory_usage_to_ram_ratio)", formatReadableSizeWithBinarySuffix(max_server_memory_usage), - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(current_physical_server_memory), max_server_memory_usage_to_ram_ratio); } else if (max_server_memory_usage > default_max_server_memory_usage) @@ -1135,7 +1158,7 @@ try " calculated as {} available" " * {:.2f} max_server_memory_usage_to_ram_ratio", formatReadableSizeWithBinarySuffix(max_server_memory_usage), - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(current_physical_server_memory), max_server_memory_usage_to_ram_ratio); } @@ -1145,14 +1168,14 @@ try size_t merges_mutations_memory_usage_soft_limit = server_settings_.merges_mutations_memory_usage_soft_limit; - size_t default_merges_mutations_server_memory_usage = static_cast(memory_amount * server_settings_.merges_mutations_memory_usage_to_ram_ratio); + size_t default_merges_mutations_server_memory_usage = static_cast(current_physical_server_memory * server_settings_.merges_mutations_memory_usage_to_ram_ratio); if (merges_mutations_memory_usage_soft_limit == 0) { merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage; LOG_INFO(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)", formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit), - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(current_physical_server_memory), server_settings_.merges_mutations_memory_usage_to_ram_ratio); } else if (merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage) @@ -1161,7 +1184,7 @@ try LOG_WARNING(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)", formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit), - formatReadableSizeWithBinarySuffix(memory_amount), + formatReadableSizeWithBinarySuffix(current_physical_server_memory), server_settings_.merges_mutations_memory_usage_to_ram_ratio); } @@ -1193,6 +1216,7 @@ try } global_context->setRemoteHostFilter(*config); + global_context->setHTTPHeaderFilter(*config); global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop); global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop); @@ -1295,7 +1319,7 @@ try global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config); std::lock_guard lock(servers_lock); - updateServers(*config, server_pool, async_metrics, servers); + updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables); } global_context->updateStorageConfiguration(*config); @@ -1397,10 +1421,27 @@ try } - for (auto & server : servers_to_start_before_tables) { - server.start(); - LOG_INFO(log, "Listening for {}", server.getDescription()); + std::lock_guard lock(servers_lock); + /// We should start interserver communications before (and more imporant shutdown after) tables. + /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down. + /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can + /// communicate with zookeeper, execute merges, etc. + createInterserverServers( + config(), + interserver_listen_hosts, + listen_try, + server_pool, + async_metrics, + servers_to_start_before_tables, + /* start_servers= */ false); + + + for (auto & server : servers_to_start_before_tables) + { + server.start(); + LOG_INFO(log, "Listening for {}", server.getDescription()); + } } /// Initialize access storages. @@ -1422,70 +1463,108 @@ try access_control.reload(AccessControl::ReloadMode::USERS_CONFIG_ONLY); }); + global_context->setStopServersCallback([&](const ServerType & server_type) + { + stopServers(servers, server_type); + }); + + global_context->setStartServersCallback([&](const ServerType & server_type) + { + createServers( + config(), + listen_hosts, + listen_try, + server_pool, + async_metrics, + servers, + /* start_servers= */ true, + server_type); + }); + /// Limit on total number of concurrently executed queries. global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries); /// Set up caches. - size_t max_cache_size = static_cast(memory_amount * server_settings.cache_size_to_ram_max_ratio); + const size_t max_cache_size = static_cast(physical_server_memory * server_settings.cache_size_to_ram_max_ratio); String uncompressed_cache_policy = server_settings.uncompressed_cache_policy; - LOG_INFO(log, "Uncompressed cache policy name {}", uncompressed_cache_policy); size_t uncompressed_cache_size = server_settings.uncompressed_cache_size; if (uncompressed_cache_size > max_cache_size) { uncompressed_cache_size = max_cache_size; - LOG_INFO(log, "Uncompressed cache size was lowered to {} because the system has low amount of memory", - formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); } global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size); /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(config()); - const Settings & settings = global_context->getSettingsRef(); /// Initialize background executors after we load default_profile config. /// This is needed to load proper values of background_pool_size etc. global_context->initializeBackgroundExecutorsIfNeeded(); - if (settings.async_insert_threads) + if (server_settings.async_insert_threads) + { global_context->setAsynchronousInsertQueue(std::make_shared( global_context, - settings.async_insert_threads)); + server_settings.async_insert_threads, + server_settings.async_insert_queue_flush_on_shutdown)); + } - size_t mark_cache_size = server_settings.mark_cache_size; String mark_cache_policy = server_settings.mark_cache_policy; + size_t mark_cache_size = server_settings.mark_cache_size; if (!mark_cache_size) LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation."); if (mark_cache_size > max_cache_size) { mark_cache_size = max_cache_size; - LOG_INFO(log, "Mark cache size was lowered to {} because the system has low amount of memory", - formatReadableSizeWithBinarySuffix(mark_cache_size)); + LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size)); } global_context->setMarkCache(mark_cache_policy, mark_cache_size); - if (server_settings.index_uncompressed_cache_size) + size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size; + if (index_uncompressed_cache_size > max_cache_size) + { + index_uncompressed_cache_size = max_cache_size; + LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + if (index_uncompressed_cache_size) global_context->setIndexUncompressedCache(server_settings.index_uncompressed_cache_size); - if (server_settings.index_mark_cache_size) + size_t index_mark_cache_size = server_settings.index_mark_cache_size; + if (index_mark_cache_size > max_cache_size) + { + index_mark_cache_size = max_cache_size; + LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + if (index_mark_cache_size) global_context->setIndexMarkCache(server_settings.index_mark_cache_size); - if (server_settings.mmap_cache_size) + size_t mmap_cache_size = server_settings.mmap_cache_size; + if (mmap_cache_size > max_cache_size) + { + mmap_cache_size = max_cache_size; + LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + if (mmap_cache_size) global_context->setMMappedFileCache(server_settings.mmap_cache_size); - /// A cache for query results. - global_context->setQueryCache(config()); + size_t query_cache_max_size_in_bytes = config().getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE); + size_t query_cache_max_entries = config().getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES); + size_t query_cache_query_cache_max_entry_size_in_bytes = config().getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES); + size_t query_cache_max_entry_size_in_rows = config().getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS); + if (query_cache_max_size_in_bytes > max_cache_size) + { + query_cache_max_size_in_bytes = max_cache_size; + LOG_INFO(log, "Lowered query cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + } + global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows); #if USE_EMBEDDED_COMPILER - /// 128 MB - constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128; - size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default); - - constexpr size_t compiled_expression_cache_elements_size_default = 10000; - size_t compiled_expression_cache_elements_size = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default); - - CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size); + size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE); + size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES); + CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements); #endif /// Set path for format schema files @@ -1520,10 +1599,13 @@ try { LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); size_t current_connections = 0; - for (auto & server : servers_to_start_before_tables) { - server.stop(); - current_connections += server.currentConnections(); + std::lock_guard lock(servers_lock); + for (auto & server : servers_to_start_before_tables) + { + server.stop(); + current_connections += server.currentConnections(); + } } if (current_connections) @@ -1587,18 +1669,16 @@ try database_catalog.initializeAndLoadTemporaryDatabase(); loadMetadataSystem(global_context); maybeConvertSystemDatabase(global_context); + /// This has to be done before the initialization of system logs, + /// otherwise there is a race condition between the system database initialization + /// and creation of new tables in the database. + startupSystemTables(); /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); /// Build loggers before tables startup to make log messages from tables /// attach available in system.text_log - { - String level_str = config().getString("text_log.level", ""); - int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str); - setTextLog(global_context->getTextLog(), level); - - buildLoggers(config(), logger()); - } + buildLoggers(config(), logger()); /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) attachSystemTablesServer(global_context, *database_catalog.getSystemDatabase(), has_zookeeper); attachInformationSchema(global_context, *database_catalog.getDatabase(DatabaseCatalog::INFORMATION_SCHEMA)); @@ -1611,7 +1691,6 @@ try /// Then, load remaining databases loadMetadata(global_context, default_database); convertDatabasesEnginesIfNeed(global_context); - startupSystemTables(); database_catalog.startupBackgroundCleanup(); /// After loading validate that default database exists database_catalog.assertDatabaseExists(default_database); @@ -1634,17 +1713,26 @@ try global_context->initializeTraceCollector(); /// Set up server-wide memory profiler (for total memory tracker). - UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0); - if (total_memory_profiler_step) + if (server_settings.total_memory_profiler_step) { - total_memory_tracker.setProfilerStep(total_memory_profiler_step); + total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step); } - double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0); - if (total_memory_tracker_sample_probability > 0.0) + if (server_settings.total_memory_tracker_sample_probability > 0.0) { - total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability); + total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability); } + + if (server_settings.total_memory_profiler_sample_min_allocation_size) + { + total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size); + } + + if (server_settings.total_memory_profiler_sample_max_allocation_size) + { + total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); + } + } #endif @@ -1702,7 +1790,7 @@ try { std::lock_guard lock(servers_lock); - createServers(config(), listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers); + createServers(config(), listen_hosts, listen_try, server_pool, async_metrics, servers); if (servers.empty()) throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No servers started (add valid listen_host and 'tcp_port' or 'http_port' " @@ -1960,12 +2048,12 @@ HTTPContextPtr Server::httpContext() const void Server::createServers( Poco::Util::AbstractConfiguration & config, const Strings & listen_hosts, - const Strings & interserver_listen_hosts, bool listen_try, Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers) + bool start_servers, + const ServerType & server_type) { const Settings & settings = global_context->getSettingsRef(); @@ -1979,24 +2067,26 @@ void Server::createServers( for (const auto & protocol : protocols) { + std::string prefix = "protocols." + protocol + "."; + std::string port_name = prefix + "port"; + std::string description {" protocol"}; + if (config.has(prefix + "description")) + description = config.getString(prefix + "description"); + + if (!config.has(prefix + "port")) + continue; + + if (!server_type.shouldStart(ServerType::Type::CUSTOM, port_name)) + continue; + std::vector hosts; - if (config.has("protocols." + protocol + ".host")) - hosts.push_back(config.getString("protocols." + protocol + ".host")); + if (config.has(prefix + "host")) + hosts.push_back(config.getString(prefix + "host")); else hosts = listen_hosts; for (const auto & host : hosts) { - std::string conf_name = "protocols." + protocol; - std::string prefix = conf_name + "."; - - if (!config.has(prefix + "port")) - continue; - - std::string description {" protocol"}; - if (config.has(prefix + "description")) - description = config.getString(prefix + "description"); - std::string port_name = prefix + "port"; bool is_secure = false; auto stack = buildProtocolStackFromConfig(config, protocol, http_params, async_metrics, is_secure); @@ -2025,219 +2115,310 @@ void Server::createServers( for (const auto & listen_host : listen_hosts) { - /// HTTP - const char * port_name = "http_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); + const char * port_name; - return ProtocolServerAdapter( - listen_host, - port_name, - "http://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); - }); - - /// HTTPS - port_name = "https_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::HTTP)) { + /// HTTP + port_name = "http_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + + return ProtocolServerAdapter( + listen_host, + port_name, + "http://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); + }); + } + + if (server_type.shouldStart(ServerType::Type::HTTPS)) + { + /// HTTPS + port_name = "https_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { #if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "https://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "https://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); #else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); #endif - }); + }); + } - /// TCP - port_name = "tcp_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::TCP)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "native protocol (tcp): " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); + /// TCP + port_name = "tcp_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } - /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt - port_name = "tcp_with_proxy_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "native protocol (tcp) with PROXY: " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); + /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt + port_name = "tcp_with_proxy_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp) with PROXY: " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } - /// TCP with SSL - port_name = "tcp_port_secure"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::TCP_SECURE)) { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "secure native protocol (tcp_secure): " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), - server_pool, - socket, - new Poco::Net::TCPServerParams)); -#else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -#endif - }); + /// TCP with SSL + port_name = "tcp_port_secure"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + #if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "secure native protocol (tcp_secure): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + #else + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); + #endif + }); + } - port_name = "mysql_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::MYSQL)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "MySQL compatibility protocol: " + address.toString(), - std::make_unique(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); - }); + port_name = "mysql_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "MySQL compatibility protocol: " + address.toString(), + std::make_unique(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + }); + } - port_name = "postgresql_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::POSTGRESQL)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "PostgreSQL compatibility protocol: " + address.toString(), - std::make_unique(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); - }); + port_name = "postgresql_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "PostgreSQL compatibility protocol: " + address.toString(), + std::make_unique(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + }); + } #if USE_GRPC - port_name = "grpc_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::GRPC)) { - Poco::Net::SocketAddress server_address(listen_host, port); - return ProtocolServerAdapter( - listen_host, - port_name, - "gRPC protocol: " + server_address.toString(), - std::make_unique(*this, makeSocketAddress(listen_host, port, &logger()))); - }); + port_name = "grpc_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::SocketAddress server_address(listen_host, port); + return ProtocolServerAdapter( + listen_host, + port_name, + "gRPC protocol: " + server_address.toString(), + std::make_unique(*this, makeSocketAddress(listen_host, port, &logger()))); + }); + } #endif - - /// Prometheus (if defined and not setup yet with http_port) - port_name = "prometheus.port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::PROMETHEUS)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "Prometheus: http://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); - }); + /// Prometheus (if defined and not setup yet with http_port) + port_name = "prometheus.port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "Prometheus: http://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); + }); + } } +} + +void Server::createInterserverServers( + Poco::Util::AbstractConfiguration & config, + const Strings & interserver_listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers, + const ServerType & server_type) +{ + const Settings & settings = global_context->getSettingsRef(); + + Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(settings.http_receive_timeout); + http_params->setKeepAliveTimeout(keep_alive_timeout); /// Now iterate over interserver_listen_hosts for (const auto & interserver_listen_host : interserver_listen_hosts) { - /// Interserver IO HTTP - const char * port_name = "interserver_http_port"; - createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, interserver_listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - interserver_listen_host, - port_name, - "replica communication (interserver): http://" + address.toString(), - std::make_unique( - httpContext(), - createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), - server_pool, - socket, - http_params)); - }); + const char * port_name; - port_name = "interserver_https_port"; - createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP)) { + /// Interserver IO HTTP + port_name = "interserver_http_port"; + createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, interserver_listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + interserver_listen_host, + port_name, + "replica communication (interserver): http://" + address.toString(), + std::make_unique( + httpContext(), + createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), + server_pool, + socket, + http_params)); + }); + } + + if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS)) + { + port_name = "interserver_https_port"; + createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { #if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - interserver_listen_host, - port_name, - "secure replica communication (interserver): https://" + address.toString(), - std::make_unique( - httpContext(), - createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"), - server_pool, - socket, - http_params)); + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + interserver_listen_host, + port_name, + "secure replica communication (interserver): https://" + address.toString(), + std::make_unique( + httpContext(), + createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"), + server_pool, + socket, + http_params)); #else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif - }); + }); + } + } +} + +void Server::stopServers( + std::vector & servers, + const ServerType & server_type +) const +{ + Poco::Logger * log = &logger(); + + /// Remove servers once all their connections are closed + auto check_server = [&log](const char prefix[], auto & server) + { + if (!server.isStopping()) + return false; + size_t current_connections = server.currentConnections(); + LOG_DEBUG(log, "Server {}{}: {} ({} connections)", + server.getDescription(), + prefix, + !current_connections ? "finished" : "waiting", + current_connections); + return !current_connections; + }; + + std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)")); + + for (auto & server : servers) + { + if (!server.isStopping()) + { + const std::string server_port_name = server.getPortName(); + + if (server_type.shouldStop(server_port_name)) + server.stop(); + } } + std::erase_if(servers, std::bind_front(check_server, "")); } void Server::updateServers( Poco::Util::AbstractConfiguration & config, Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, - std::vector & servers) + std::vector & servers, + std::vector & servers_to_start_before_tables) { Poco::Logger * log = &logger(); @@ -2263,11 +2444,19 @@ void Server::updateServers( Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : this->config(); + std::vector all_servers; + all_servers.reserve(servers.size() + servers_to_start_before_tables.size()); for (auto & server : servers) + all_servers.push_back(&server); + + for (auto & server : servers_to_start_before_tables) + all_servers.push_back(&server); + + for (auto * server : all_servers) { - if (!server.isStopping()) + if (!server->isStopping()) { - std::string port_name = server.getPortName(); + std::string port_name = server->getPortName(); bool has_host = false; bool is_http = false; if (port_name.starts_with("protocols.")) @@ -2305,27 +2494,29 @@ void Server::updateServers( /// NOTE: better to compare using getPortName() over using /// dynamic_cast<> since HTTPServer is also used for prometheus and /// internal replication communications. - is_http = server.getPortName() == "http_port" || server.getPortName() == "https_port"; + is_http = server->getPortName() == "http_port" || server->getPortName() == "https_port"; } if (!has_host) - has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end(); + has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server->getListenHost()) != listen_hosts.end(); bool has_port = !config.getString(port_name, "").empty(); bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers"); if (force_restart) - LOG_TRACE(log, " had been changed, will reload {}", server.getDescription()); + LOG_TRACE(log, " had been changed, will reload {}", server->getDescription()); - if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber() || force_restart) + if (!has_host || !has_port || config.getInt(server->getPortName()) != server->portNumber() || force_restart) { - server.stop(); - LOG_INFO(log, "Stopped listening for {}", server.getDescription()); + server->stop(); + LOG_INFO(log, "Stopped listening for {}", server->getDescription()); } } } - createServers(config, listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true); + createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true); + createInterserverServers(config, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers_to_start_before_tables, /* start_servers= */ true); std::erase_if(servers, std::bind_front(check_server, "")); + std::erase_if(servers_to_start_before_tables, std::bind_front(check_server, "")); } } diff --git a/programs/server/Server.h b/programs/server/Server.h index e9ae6d8d937..3f03dd137ef 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -3,8 +3,9 @@ #include #include -#include "Server/HTTP/HTTPContext.h" +#include #include +#include #include /** Server provides three interfaces: @@ -102,18 +103,34 @@ private: void createServers( Poco::Util::AbstractConfiguration & config, const Strings & listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers = false, + const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL)); + + void createInterserverServers( + Poco::Util::AbstractConfiguration & config, const Strings & interserver_listen_hosts, bool listen_try, Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers = false); + bool start_servers = false, + const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL)); void updateServers( Poco::Util::AbstractConfiguration & config, Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, - std::vector & servers); + std::vector & servers, + std::vector & servers_to_start_before_tables); + + void stopServers( + std::vector & servers, + const ServerType & server_type + ) const; }; } diff --git a/programs/server/config.d/clusters.xml b/programs/server/config.d/clusters.xml new file mode 120000 index 00000000000..97289377736 --- /dev/null +++ b/programs/server/config.d/clusters.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/clusters.xml \ No newline at end of file diff --git a/programs/server/config.xml b/programs/server/config.xml index 65d325ad6c4..85cdda63558 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -317,7 +317,7 @@ 0 - 100 + 1000 + + + 7500 + + 1048576 + + 8192 + + 524288 + + false @@ -1031,6 +1047,11 @@ toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + + false @@ -1076,7 +1109,11 @@ system metric_log
7500 + 1048576 + 8192 + 524288 1000 + false @@ -1143,6 +1196,10 @@ toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false - - don't replace it - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string); } } @@ -263,14 +263,14 @@ void QueryCache::Writer::finalizeWrite() if (std::chrono::duration_cast(std::chrono::system_clock::now() - query_start_time) < min_query_runtime) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query not expensive enough), query: {}", key.query_string); return; } if (auto entry = cache.getWithKey(key); entry.has_value() && !IsStale()(entry->key)) { /// Same check as in ctor because a parallel Writer could have inserted the current key in the meantime - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (non-stale entry found), query: {}", key.query_string); return; } @@ -353,7 +353,7 @@ void QueryCache::Writer::finalizeWrite() if ((new_entry_size_in_bytes > max_entry_size_in_bytes) || (new_entry_size_in_rows > max_entry_size_in_rows)) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Skipped insert (query result too big), new_entry_size_in_bytes: {} ({}), new_entry_size_in_rows: {} ({}), query: {}", new_entry_size_in_bytes, max_entry_size_in_bytes, new_entry_size_in_rows, max_entry_size_in_rows, key.query_string); return; } @@ -388,7 +388,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar if (!entry.has_value()) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.query_string); return; } @@ -397,13 +397,13 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar if (!entry_key.is_shared && entry_key.user_name != key.user_name) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.query_string); return; } if (IsStale()(entry_key)) { - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found for query {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found for query {}", key.query_string); return; } @@ -441,7 +441,7 @@ QueryCache::Reader::Reader(Cache & cache_, const Key & key, const std::lock_guar buildSourceFromChunks(entry_key.header, std::move(decompressed_chunks), entry_mapped->totals, entry_mapped->extremes); } - LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.queryStringFromAst()); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.query_string); } bool QueryCache::Reader::hasCacheEntryForKey() const @@ -493,7 +493,16 @@ void QueryCache::reset() cache.reset(); std::lock_guard lock(mutex); times_executed.clear(); - cache_size_in_bytes = 0; +} + +size_t QueryCache::weight() const +{ + return cache.weight(); +} + +size_t QueryCache::count() const +{ + return cache.count(); } size_t QueryCache::recordQueryRun(const Key & key) @@ -501,7 +510,7 @@ size_t QueryCache::recordQueryRun(const Key & key) std::lock_guard lock(mutex); size_t times = ++times_executed[key]; // Regularly drop times_executed to avoid DOS-by-unlimited-growth. - static constexpr size_t TIMES_EXECUTED_MAX_SIZE = 10'000; + static constexpr auto TIMES_EXECUTED_MAX_SIZE = 10'000uz; if (times_executed.size() > TIMES_EXECUTED_MAX_SIZE) times_executed.clear(); return times; @@ -512,23 +521,19 @@ std::vector QueryCache::dump() const return cache.dump(); } -QueryCache::QueryCache() +QueryCache::QueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_) : cache(std::make_unique>(std::make_unique())) { + updateConfiguration(max_size_in_bytes, max_entries, max_entry_size_in_bytes_, max_entry_size_in_rows_); } -void QueryCache::updateConfiguration(const Poco::Util::AbstractConfiguration & config) +void QueryCache::updateConfiguration(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_) { std::lock_guard lock(mutex); - - size_t max_size_in_bytes = config.getUInt64("query_cache.max_size_in_bytes", 1_GiB); cache.setMaxSize(max_size_in_bytes); - - size_t max_entries = config.getUInt64("query_cache.max_entries", 1024); cache.setMaxCount(max_entries); - - max_entry_size_in_bytes = config.getUInt64("query_cache.max_entry_size_in_bytes", 1_MiB); - max_entry_size_in_rows = config.getUInt64("query_cache.max_entry_rows_in_rows", 30'000'000); + max_entry_size_in_bytes = max_entry_size_in_bytes_; + max_entry_size_in_rows = max_entry_size_in_rows_; } } diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index 6ef7cc60918..0c0674c6302 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include @@ -24,13 +23,21 @@ bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context); class QueryCache { public: + enum class Usage + { + Unknown, /// we don't know what what happened + None, /// query result neither written nor read into/from query cache + Write, /// query result written into query cache + Read, /// query result read from query cache + }; + /// Represents a query result in the cache. struct Key { /// ---------------------------------------------------- /// The actual key (data which gets hashed): - /// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select) + /// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select). const ASTPtr ast; /// Note: For a transactionally consistent cache, we would need to include the system settings in the cache key or invalidate the @@ -58,6 +65,11 @@ public: /// (we could theoretically apply compression also to the totals and extremes but it's an obscure use case) const bool is_compressed; + /// The SELECT query as plain string, displayed in SYSTEM.QUERY_CACHE. Stored explicitly, i.e. not constructed from the AST, for the + /// sole reason that QueryCache-related SETTINGS are pruned from the AST (see removeQueryCacheSettings()) which will look ugly in + /// SYSTEM.QUERY_CACHE. + const String query_string; + /// Ctor to construct a Key for writing into query cache. Key(ASTPtr ast_, Block header_, @@ -69,7 +81,6 @@ public: Key(ASTPtr ast_, const String & user_name_); bool operator==(const Key & other) const; - String queryStringFromAst() const; }; struct Entry @@ -98,9 +109,6 @@ private: /// query --> query result using Cache = CacheBase; - /// query --> query execution count - using TimesExecuted = std::unordered_map; - public: /// Buffers multiple partial query result chunks (buffer()) and eventually stores them as cache entry (finalizeWrite()). /// @@ -165,15 +173,18 @@ public: friend class QueryCache; /// for createReader() }; - QueryCache(); + QueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_); - void updateConfiguration(const Poco::Util::AbstractConfiguration & config); + void updateConfiguration(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_); Reader createReader(const Key & key); Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime, bool squash_partial_results, size_t max_block_size, size_t max_query_cache_size_in_bytes_quota, size_t max_query_cache_entries_quota); void reset(); + size_t weight() const; + size_t count() const; + /// Record new execution of query represented by key. Returns number of executions so far. size_t recordQueryRun(const Key & key); @@ -181,17 +192,18 @@ public: std::vector dump() const; private: - Cache cache; + Cache cache; /// has its own locking --> not protected by mutex mutable std::mutex mutex; + + /// query --> query execution count + using TimesExecuted = std::unordered_map; TimesExecuted times_executed TSA_GUARDED_BY(mutex); /// Cache configuration size_t max_entry_size_in_bytes TSA_GUARDED_BY(mutex) = 0; size_t max_entry_size_in_rows TSA_GUARDED_BY(mutex) = 0; - size_t cache_size_in_bytes TSA_GUARDED_BY(mutex) = 0; /// Updated in each cache insert/delete - friend class StorageSystemQueryCache; }; diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index d50289a5728..85597ce881a 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -44,11 +45,25 @@ void WriteBufferToFileSegment::nextImpl() size_t bytes_to_write = offset(); + FileCacheReserveStat reserve_stat; /// In case of an error, we don't need to finalize the file segment /// because it will be deleted soon and completed in the holder's destructor. - bool ok = file_segment->reserve(bytes_to_write); + bool ok = file_segment->reserve(bytes_to_write, &reserve_stat); + if (!ok) - throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve space for the file cache ({})", file_segment->getInfoForLog()); + { + String reserve_stat_msg; + for (const auto & [kind, stat] : reserve_stat.stat_by_kind) + reserve_stat_msg += fmt::format("{} hold {}, can release {}; ", + toString(kind), ReadableSize(stat.non_releasable_size), ReadableSize(stat.releasable_size)); + + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve {} bytes for {}: {}(segment info: {})", + bytes_to_write, + file_segment->getKind() == FileSegmentKind::Temporary ? "temporary file" : "the file in cache", + reserve_stat_msg, + file_segment->getInfoForLog() + ); + } try { diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 413ff1db6bc..6c09b327ca1 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -194,9 +194,9 @@ void ClientInfo::setInitialQuery() query_kind = QueryKind::INITIAL_QUERY; fillOSUserHostNameAndVersionInfo(); if (client_name.empty()) - client_name = DBMS_NAME; + client_name = VERSION_NAME; else - client_name = (DBMS_NAME " ") + client_name; + client_name = (VERSION_NAME " ") + client_name; } @@ -210,9 +210,9 @@ void ClientInfo::fillOSUserHostNameAndVersionInfo() client_hostname = getFQDNOrHostName(); - client_version_major = DBMS_VERSION_MAJOR; - client_version_minor = DBMS_VERSION_MINOR; - client_version_patch = DBMS_VERSION_PATCH; + client_version_major = VERSION_MAJOR; + client_version_minor = VERSION_MINOR; + client_version_patch = VERSION_PATCH; client_tcp_protocol_version = DBMS_TCP_PROTOCOL_VERSION; } diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index de10a445d01..b90acd1d576 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -144,12 +144,6 @@ public: UInt32 shard_index_ = 0, UInt32 replica_index_ = 0); - Address( - const String & host_port_, - const ClusterConnectionParameters & params, - UInt32 shard_index_, - UInt32 replica_index_); - Address( const DatabaseReplicaInfo & info, const ClusterConnectionParameters & params, diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 0cf3f360994..953e38d56cd 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -124,6 +124,7 @@ void SelectStreamFactory::createForShard( { remote_shards.emplace_back(Shard{ .query = query_ast, + .main_table = main_table, .header = header, .shard_info = shard_info, .lazy = lazy, diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 030c0b77dd5..ca07fd5deda 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -50,6 +50,8 @@ public: { /// Query and header may be changed depending on shard. ASTPtr query; + /// Used to check the table existence on remote node + StorageID main_table; Block header; Cluster::ShardInfo shard_info; @@ -58,9 +60,6 @@ public: /// (When there is a local replica with big delay). bool lazy = false; time_t local_delay = 0; - - /// Set only if parallel reading from replicas is used. - std::shared_ptr coordinator; }; using Shards = std::vector; diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index e2f1dfe8ba7..f2d7132b174 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -28,14 +28,18 @@ namespace DB namespace ErrorCodes { extern const int TOO_LARGE_DISTRIBUTED_DEPTH; - extern const int LOGICAL_ERROR; extern const int SUPPORT_IS_DISABLED; } namespace ClusterProxy { -ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info, Poco::Logger * log) +ContextMutablePtr updateSettingsForCluster(bool interserver_mode, + ContextPtr context, + const Settings & settings, + const StorageID & main_table, + const SelectQueryInfo * query_info, + Poco::Logger * log) { Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); @@ -43,7 +47,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c /// If "secret" (in remote_servers) is not in use, /// user on the shard is not the same as the user on the initiator, /// hence per-user limits should not be applied. - if (cluster.getSecret().empty()) + if (!interserver_mode) { /// Does not matter on remote servers, because queries are sent under different user. new_settings.max_concurrent_queries_for_user = 0; @@ -170,17 +174,15 @@ void executeQuery( std::vector plans; SelectStreamFactory::Shards remote_shards; - auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, main_table, &query_info, log); - new_context->getClientInfo().distributed_depth += 1; + auto new_context = updateSettingsForCluster(!query_info.getCluster()->getSecret().empty(), context, settings, main_table, &query_info, log); + new_context->increaseDistributedDepth(); size_t shards = query_info.getCluster()->getShardCount(); for (const auto & shard_info : query_info.getCluster()->getShardsInfo()) { - ASTPtr query_ast_for_shard; - if (query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) + ASTPtr query_ast_for_shard = query_ast->clone(); + if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) { - query_ast_for_shard = query_ast->clone(); - OptimizeShardingKeyRewriteInVisitor::Data visitor_data{ sharding_key_expr, sharding_key_expr->getSampleBlock().getByPosition(0).type, @@ -191,8 +193,6 @@ void executeQuery( OptimizeShardingKeyRewriteInVisitor visitor(visitor_data); visitor.visit(query_ast_for_shard); } - else - query_ast_for_shard = query_ast->clone(); if (shard_filter_generator) { @@ -280,7 +280,6 @@ void executeQueryWithParallelReplicas( auto all_replicas_count = std::min(static_cast(settings.max_parallel_replicas), new_cluster->getShardCount()); auto coordinator = std::make_shared(all_replicas_count); auto remote_plan = std::make_unique(); - auto plans = std::vector(); /// This is a little bit weird, but we construct an "empty" coordinator without /// any specified reading/coordination method (like Default, InOrder, InReverseOrder) @@ -308,20 +307,7 @@ void executeQueryWithParallelReplicas( &Poco::Logger::get("ReadFromParallelRemoteReplicasStep"), query_info.storage_limits); - remote_plan->addStep(std::move(read_from_remote)); - remote_plan->addInterpreterContext(context); - plans.emplace_back(std::move(remote_plan)); - - if (std::all_of(plans.begin(), plans.end(), [](const QueryPlanPtr & plan) { return !plan; })) - throw Exception(ErrorCodes::LOGICAL_ERROR, "No plans were generated for reading from shard. This is a bug"); - - DataStreams input_streams; - input_streams.reserve(plans.size()); - for (const auto & plan : plans) - input_streams.emplace_back(plan->getCurrentDataStream()); - - auto union_step = std::make_unique(std::move(input_streams)); - query_plan.unitePlans(std::move(union_step), std::move(plans)); + query_plan.addStep(std::move(read_from_remote)); } } diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 41f6da55686..511914e99e4 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -34,8 +34,12 @@ class SelectStreamFactory; /// - optimize_skip_unused_shards_nesting /// /// @return new Context with adjusted settings -ContextMutablePtr updateSettingsForCluster( - const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr); +ContextMutablePtr updateSettingsForCluster(bool interserver_mode, + ContextPtr context, + const Settings & settings, + const StorageID & main_table, + const SelectQueryInfo * query_info = nullptr, + Poco::Logger * log = nullptr); using AdditionalShardFilterGenerator = std::function; /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. diff --git a/src/Interpreters/ComparisonGraph.h b/src/Interpreters/ComparisonGraph.h index 70543227b58..4fd90dad371 100644 --- a/src/Interpreters/ComparisonGraph.h +++ b/src/Interpreters/ComparisonGraph.h @@ -118,7 +118,7 @@ private: { size_t operator() (const IAST::Hash & hash) const { - return hash.first; + return hash.low64; } }; diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index 1283879971d..85e0c5a0ae7 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -36,6 +36,7 @@ public: explicit ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_ = false); ~ConcurrentHashJoin() override = default; + std::string getName() const override { return "ConcurrentHashJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 5019933c2af..4f58e7c3401 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2,14 +2,10 @@ #include #include #include -#include #include -#include #include #include -#include #include -#include #include #include #include @@ -17,11 +13,10 @@ #include #include #include -#include #include #include #include -#include +#include #include #include #include @@ -33,8 +28,6 @@ #include #include #include -#include -#include #include #include #include @@ -43,7 +36,7 @@ #include #include #include -#include +#include #include #include #include @@ -54,7 +47,6 @@ #include #include #include -#include #include #include #include @@ -68,7 +60,6 @@ #include #include #include -#include #include #include #include @@ -85,8 +76,6 @@ #include #include #include -#include -#include #include #include #include @@ -96,13 +85,12 @@ #include #include #include -#include #include +#include #include #include #include #include -#include #include #include #include @@ -113,11 +101,10 @@ #include #include #include -#include +#include +#include +#include -#if USE_ROCKSDB -#include -#endif namespace fs = std::filesystem; @@ -153,13 +140,13 @@ namespace CurrentMetrics extern const Metric IOWriterThreadsActive; } + namespace DB { namespace ErrorCodes { extern const int BAD_ARGUMENTS; - extern const int BAD_GET; extern const int UNKNOWN_DATABASE; extern const int UNKNOWN_TABLE; extern const int TABLE_ALREADY_EXISTS; @@ -174,6 +161,7 @@ namespace ErrorCodes extern const int UNKNOWN_FUNCTION; extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int CLUSTER_DOESNT_EXIST; } #define SHUTDOWN(log, desc, ptr, method) do \ @@ -257,26 +245,27 @@ struct ContextSharedPart : boost::noncopyable std::optional backups_worker; - String default_profile_name; /// Default profile name used for default values. - String system_profile_name; /// Profile used by system processes - String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying + String default_profile_name; /// Default profile name used for default values. + String system_profile_name; /// Profile used by system processes + String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying std::unique_ptr access_control; mutable ResourceManagerPtr resource_manager; - mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks. - mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. - mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache. - mutable std::unique_ptr prefetch_threadpool; /// Threadpool for loading marks cache. - mutable UncompressedCachePtr index_uncompressed_cache; /// The cache of decompressed blocks for MergeTree indices. - mutable MarkCachePtr index_mark_cache; /// Cache of marks in compressed files of MergeTree indices. - mutable QueryCachePtr query_cache; /// Cache of query results. - mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. - ProcessList process_list; /// Executing queries at the moment. + mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks. + mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. + mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache. + mutable std::unique_ptr prefetch_threadpool; /// Threadpool for loading marks cache. + mutable UncompressedCachePtr index_uncompressed_cache; /// The cache of decompressed blocks for MergeTree indices. + mutable QueryCachePtr query_cache; /// Cache of query results. + mutable MarkCachePtr index_mark_cache; /// Cache of marks in compressed files of MergeTree indices. + mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. + ProcessList process_list; /// Executing queries at the moment. + SessionTracker session_tracker; GlobalOvercommitTracker global_overcommit_tracker; - MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree) - MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree) + MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree) + MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree) ReplicatedFetchList replicated_fetch_list; - ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. - InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. + ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. + InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. mutable std::unique_ptr buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables. mutable std::unique_ptr schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) @@ -327,9 +316,10 @@ struct ContextSharedPart : boost::noncopyable OrdinaryBackgroundExecutorPtr fetch_executor; OrdinaryBackgroundExecutorPtr common_executor; - RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml + RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml + HTTPHeaderFilter http_header_filter; /// Forbidden HTTP headers from config.xml - std::optional trace_collector; /// Thread collecting traces from threads executing queries + std::optional trace_collector; /// Thread collecting traces from threads executing queries /// Clusters for distributed tables /// Initialized on demand (on distributed storages initialization) since Settings should be initialized @@ -355,6 +345,9 @@ struct ContextSharedPart : boost::noncopyable Context::ConfigReloadCallback config_reload_callback; + Context::StartStopServersCallback start_servers_callback; + Context::StartStopServersCallback stop_servers_callback; + bool is_server_completely_started = false; #if USE_ROCKSDB @@ -730,6 +723,9 @@ std::unique_lock Context::getLock() const ProcessList & Context::getProcessList() { return shared->process_list; } const ProcessList & Context::getProcessList() const { return shared->process_list; } OvercommitTracker * Context::getGlobalOvercommitTracker() const { return &shared->global_overcommit_tracker; } + +SessionTracker & Context::getSessionTracker() { return shared->session_tracker; } + MergeList & Context::getMergeList() { return shared->merge_list; } const MergeList & Context::getMergeList() const { return shared->merge_list; } MovesList & Context::getMovesList() { return shared->moves_list; } @@ -782,15 +778,32 @@ Strings Context::getWarnings() const auto lock = getLock(); common_warnings = shared->warnings; } + /// Make setting's name ordered + std::set obsolete_settings; for (const auto & setting : settings) { if (setting.isValueChanged() && setting.isObsolete()) - { - common_warnings.emplace_back("Some obsolete setting is changed. " - "Check 'select * from system.settings where changed' and read the changelog."); - break; - } + obsolete_settings.emplace(setting.getName()); } + + if (!obsolete_settings.empty()) + { + bool single_element = obsolete_settings.size() == 1; + String res = single_element ? "Obsolete setting [" : "Obsolete settings ["; + + bool first = true; + for (const auto & setting : obsolete_settings) + { + res += first ? "" : ", "; + res += "'" + setting + "'"; + first = false; + } + res = res + "]" + (single_element ? " is" : " are") + + " changed. " + "Please check 'select * from system.settings where changed and is_obsolete' and read the changelog."; + common_warnings.emplace_back(res); + } + return common_warnings; } @@ -1057,25 +1070,34 @@ ConfigurationPtr Context::getUsersConfig() return shared->users_config; } -void Context::setUser(const UUID & user_id_) +void Context::setUser(const UUID & user_id_, const std::optional> & current_roles_) { + /// Prepare lists of user's profiles, constraints, settings, roles. + /// NOTE: AccessControl::read() and other AccessControl's functions may require some IO work, + /// so Context::getLock() must be unlocked while we're doing this. + + auto user = getAccessControl().read(user_id_); + + auto new_current_roles = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles); + auto enabled_roles = getAccessControl().getEnabledRolesInfo(new_current_roles, {}); + auto enabled_profiles = getAccessControl().getEnabledSettingsInfo(user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles); + const auto & database = user->default_database; + + /// Apply user's profiles, constraints, settings, roles. + auto lock = getLock(); - user_id = user_id_; + setUserID(user_id_); - access = getAccessControl().getContextAccess( - user_id_, /* current_roles = */ {}, /* use_default_roles = */ true, settings, current_database, client_info); + /// A profile can specify a value and a readonly constraint for same setting at the same time, + /// so we shouldn't check constraints here. + setCurrentProfiles(*enabled_profiles, /* check_constraints= */ false); - auto user = access->getUser(); + setCurrentRoles(new_current_roles); - current_roles = std::make_shared>(user->granted_roles.findGranted(user->default_roles)); - - auto default_profile_info = access->getDefaultProfileInfo(); - settings_constraints_and_current_profiles = default_profile_info->getConstraintsAndProfileIDs(); - applySettingsChanges(default_profile_info->settings); - - if (!user->default_database.empty()) - setCurrentDatabase(user->default_database); + /// It's optional to specify the DEFAULT DATABASE in the user's definition. + if (!database.empty()) + setCurrentDatabase(database); } std::shared_ptr Context::getUser() const @@ -1088,6 +1110,13 @@ String Context::getUserName() const return getAccess()->getUserName(); } +void Context::setUserID(const UUID & user_id_) +{ + auto lock = getLock(); + user_id = user_id_; + need_recalculate_access = true; +} + std::optional Context::getUserID() const { auto lock = getLock(); @@ -1095,20 +1124,14 @@ std::optional Context::getUserID() const } -void Context::setQuotaKey(String quota_key_) -{ - auto lock = getLock(); - client_info.quota_key = std::move(quota_key_); -} - - void Context::setCurrentRoles(const std::vector & current_roles_) { auto lock = getLock(); - if (current_roles ? (*current_roles == current_roles_) : current_roles_.empty()) - return; - current_roles = std::make_shared>(current_roles_); - calculateAccessRights(); + if (current_roles_.empty()) + current_roles = nullptr; + else + current_roles = std::make_shared>(current_roles_); + need_recalculate_access = true; } void Context::setCurrentRolesDefault() @@ -1133,20 +1156,6 @@ std::shared_ptr Context::getRolesInfo() const } -void Context::calculateAccessRights() -{ - auto lock = getLock(); - if (user_id) - access = getAccessControl().getContextAccess( - *user_id, - current_roles ? *current_roles : std::vector{}, - /* use_default_roles = */ false, - settings, - current_database, - client_info); -} - - template void Context::checkAccessImpl(const Args &... args) const { @@ -1166,32 +1175,55 @@ void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, void Context::checkAccess(const AccessRightsElement & element) const { return checkAccessImpl(element); } void Context::checkAccess(const AccessRightsElements & elements) const { return checkAccessImpl(elements); } - std::shared_ptr Context::getAccess() const { - auto lock = getLock(); - return access ? access : ContextAccess::getFullAccess(); + /// A helper function to collect parameters for calculating access rights, called with Context::getLock() acquired. + auto get_params = [this]() + { + /// If setUserID() was never called then this must be the global context with the full access. + bool full_access = !user_id; + + return ContextAccessParams{user_id, full_access, /* use_default_roles= */ false, current_roles, settings, current_database, client_info}; + }; + + /// Check if the current access rights are still valid, otherwise get parameters for recalculating access rights. + std::optional params; + + { + auto lock = getLock(); + if (access && !need_recalculate_access) + return access; /// No need to recalculate access rights. + + params.emplace(get_params()); + + if (access && (access->getParams() == *params)) + { + need_recalculate_access = false; + return access; /// No need to recalculate access rights. + } + } + + /// Calculate new access rights according to the collected parameters. + /// NOTE: AccessControl::getContextAccess() may require some IO work, so Context::getLock() must be unlocked while we're doing this. + auto res = getAccessControl().getContextAccess(*params); + + { + /// If the parameters of access rights were not changed while we were calculated them + /// then we store the new access rights in the Context to allow reusing it later. + auto lock = getLock(); + if (get_params() == *params) + { + access = res; + need_recalculate_access = false; + } + } + + return res; } RowPolicyFilterPtr Context::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const { - auto lock = getLock(); - RowPolicyFilterPtr row_filter_of_initial_user; - if (row_policies_of_initial_user) - row_filter_of_initial_user = row_policies_of_initial_user->getFilter(database, table_name, filter_type); - return getAccess()->getRowPolicyFilter(database, table_name, filter_type, row_filter_of_initial_user); -} - -void Context::enableRowPoliciesOfInitialUser() -{ - auto lock = getLock(); - row_policies_of_initial_user = nullptr; - if (client_info.initial_user == client_info.current_user) - return; - auto initial_user_id = getAccessControl().find(client_info.initial_user); - if (!initial_user_id) - return; - row_policies_of_initial_user = getAccessControl().tryGetDefaultRowPolicies(*initial_user_id); + return getAccess()->getRowPolicyFilter(database, table_name, filter_type); } @@ -1207,13 +1239,12 @@ std::optional Context::getQuotaUsage() const } -void Context::setCurrentProfile(const String & profile_name) +void Context::setCurrentProfile(const String & profile_name, bool check_constraints) { - auto lock = getLock(); try { UUID profile_id = getAccessControl().getID(profile_name); - setCurrentProfile(profile_id); + setCurrentProfile(profile_id, check_constraints); } catch (Exception & e) { @@ -1222,15 +1253,20 @@ void Context::setCurrentProfile(const String & profile_name) } } -void Context::setCurrentProfile(const UUID & profile_id) +void Context::setCurrentProfile(const UUID & profile_id, bool check_constraints) { - auto lock = getLock(); auto profile_info = getAccessControl().getSettingsProfileInfo(profile_id); - checkSettingsConstraints(profile_info->settings); - applySettingsChanges(profile_info->settings); - settings_constraints_and_current_profiles = profile_info->getConstraintsAndProfileIDs(settings_constraints_and_current_profiles); + setCurrentProfiles(*profile_info, check_constraints); } +void Context::setCurrentProfiles(const SettingsProfilesInfo & profiles_info, bool check_constraints) +{ + auto lock = getLock(); + if (check_constraints) + checkSettingsConstraints(profiles_info.settings, SettingSource::PROFILE); + applySettingsChanges(profiles_info.settings); + settings_constraints_and_current_profiles = profiles_info.getConstraintsAndProfileIDs(settings_constraints_and_current_profiles); +} std::vector Context::getCurrentProfiles() const { @@ -1409,15 +1445,24 @@ void Context::addQueryAccessInfo( void Context::addQueryAccessInfo(const Names & partition_names) { if (isGlobalContext()) - { throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); - } std::lock_guard lock(query_access_info.mutex); for (const auto & partition_name : partition_names) - { query_access_info.partitions.emplace(partition_name); - } +} + +void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name) +{ + if (!qualified_projection_name) + return; + + if (isGlobalContext()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); + + std::lock_guard lock(query_access_info.mutex); + query_access_info.projections.emplace(fmt::format( + "{}.{}", qualified_projection_name.storage_id.getFullTableName(), backQuoteIfNeed(qualified_projection_name.projection_name))); } void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const @@ -1498,12 +1543,25 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const { if (table.get()->isView() && table->as() && table->as()->isParameterizedView()) { + auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); + NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression); + StorageView::replaceQueryParametersIfParametrizedView(query, parameterized_view_values); + + ASTCreateQuery create; + create.select = query->as(); + auto sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(query, getQueryContext()); + auto res = std::make_shared(StorageID(database_name, table_name), + create, + ColumnsDescription(sample_block.getNamesAndTypesList()), + /* comment */ "", + /* is_parameterized_view */ true); + res->startup(); function->prefer_subquery_to_function_formatting = true; - return table; + return res; } } auto hash = table_expression->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + auto key = toString(hash); StoragePtr & res = table_function_results[key]; if (!res) { @@ -1524,7 +1582,11 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions; if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable()) { - const auto & insert_structure = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns(); + const auto & insert_structure = DatabaseCatalog::instance() + .getTable(getInsertionTable(), shared_from_this()) + ->getInMemoryMetadataPtr() + ->getColumns() + .getInsertable(); DB::ColumnsDescription structure_hint; bool use_columns_from_insert_query = true; @@ -1650,7 +1712,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const auto new_hash = table_expression->getTreeHash(); if (hash != new_hash) { - key = toString(new_hash.first) + '_' + toString(new_hash.second); + key = toString(new_hash); table_function_results[key] = res; } } @@ -1659,8 +1721,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr) { - auto hash = table_expression->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + const auto hash = table_expression->getTreeHash(); + const auto key = toString(hash); StoragePtr & res = table_function_results[key]; if (!res) @@ -1700,27 +1762,8 @@ Settings Context::getSettings() const void Context::setSettings(const Settings & settings_) { auto lock = getLock(); - const auto old_readonly = settings.readonly; - const auto old_allow_ddl = settings.allow_ddl; - const auto old_allow_introspection_functions = settings.allow_introspection_functions; - const auto old_display_secrets = settings.format_display_secrets_in_show_and_select; - settings = settings_; - - if ((settings.readonly != old_readonly) - || (settings.allow_ddl != old_allow_ddl) - || (settings.allow_introspection_functions != old_allow_introspection_functions) - || (settings.format_display_secrets_in_show_and_select != old_display_secrets)) - calculateAccessRights(); -} - -void Context::recalculateAccessRightsIfNeeded(std::string_view name) -{ - if (name == "readonly" - || name == "allow_ddl" - || name == "allow_introspection_functions" - || name == "format_display_secrets_in_show_and_select") - calculateAccessRights(); + need_recalculate_access = true; } void Context::setSetting(std::string_view name, const String & value) @@ -1732,7 +1775,8 @@ void Context::setSetting(std::string_view name, const String & value) return; } settings.set(name, value); - recalculateAccessRightsIfNeeded(name); + if (ContextAccessParams::dependsOnSettingName(name)) + need_recalculate_access = true; } void Context::setSetting(std::string_view name, const Field & value) @@ -1744,7 +1788,8 @@ void Context::setSetting(std::string_view name, const Field & value) return; } settings.set(name, value); - recalculateAccessRightsIfNeeded(name); + if (ContextAccessParams::dependsOnSettingName(name)) + need_recalculate_access = true; } void Context::applySettingChange(const SettingChange & change) @@ -1772,29 +1817,29 @@ void Context::applySettingsChanges(const SettingsChanges & changes) } -void Context::checkSettingsConstraints(const SettingsProfileElements & profile_elements) const +void Context::checkSettingsConstraints(const SettingsProfileElements & profile_elements, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, profile_elements); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, profile_elements, source); } -void Context::checkSettingsConstraints(const SettingChange & change) const +void Context::checkSettingsConstraints(const SettingChange & change, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, change); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, change, source); } -void Context::checkSettingsConstraints(const SettingsChanges & changes) const +void Context::checkSettingsConstraints(const SettingsChanges & changes, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes, source); } -void Context::checkSettingsConstraints(SettingsChanges & changes) const +void Context::checkSettingsConstraints(SettingsChanges & changes, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes); + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, changes, source); } -void Context::clampToSettingsConstraints(SettingsChanges & changes) const +void Context::clampToSettingsConstraints(SettingsChanges & changes, SettingSource source) const { - getSettingsConstraintsAndCurrentProfiles()->constraints.clamp(settings, changes); + getSettingsConstraintsAndCurrentProfiles()->constraints.clamp(settings, changes, source); } void Context::checkMergeTreeSettingsConstraints(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const @@ -1853,7 +1898,7 @@ void Context::setCurrentDatabase(const String & name) DatabaseCatalog::instance().assertDatabaseExists(name); auto lock = getLock(); current_database = name; - calculateAccessRights(); + need_recalculate_access = true; } void Context::setCurrentQueryId(const String & query_id) @@ -2166,9 +2211,9 @@ BackupsWorker & Context::getBackupsWorker() const const bool allow_concurrent_restores = this->getConfigRef().getBool("backups.allow_concurrent_restores", true); const auto & config = getConfigRef(); - const auto & settings_ = getSettingsRef(); - UInt64 backup_threads = config.getUInt64("backup_threads", settings_.backup_threads); - UInt64 restore_threads = config.getUInt64("restore_threads", settings_.restore_threads); + const auto & settings_ref = getSettingsRef(); + UInt64 backup_threads = config.getUInt64("backup_threads", settings_ref.backup_threads); + UInt64 restore_threads = config.getUInt64("restore_threads", settings_ref.restore_threads); if (!shared->backups_worker) shared->backups_worker.emplace(backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores); @@ -2224,7 +2269,7 @@ UncompressedCachePtr Context::getUncompressedCache() const } -void Context::dropUncompressedCache() const +void Context::clearUncompressedCache() const { auto lock = getLock(); if (shared->uncompressed_cache) @@ -2248,7 +2293,7 @@ MarkCachePtr Context::getMarkCache() const return shared->mark_cache; } -void Context::dropMarkCache() const +void Context::clearMarkCache() const { auto lock = getLock(); if (shared->mark_cache) @@ -2270,32 +2315,6 @@ ThreadPool & Context::getLoadMarksThreadpool() const return *shared->load_marks_threadpool; } -static size_t getPrefetchThreadpoolSizeFromConfig(const Poco::Util::AbstractConfiguration & config) -{ - return config.getUInt(".prefetch_threadpool_pool_size", 100); -} - -size_t Context::getPrefetchThreadpoolSize() const -{ - const auto & config = getConfigRef(); - return getPrefetchThreadpoolSizeFromConfig(config); -} - -ThreadPool & Context::getPrefetchThreadpool() const -{ - const auto & config = getConfigRef(); - - auto lock = getLock(); - if (!shared->prefetch_threadpool) - { - auto pool_size = getPrefetchThreadpoolSize(); - auto queue_size = config.getUInt(".prefetch_threadpool_queue_size", 1000000); - shared->prefetch_threadpool = std::make_unique( - CurrentMetrics::IOPrefetchThreads, CurrentMetrics::IOPrefetchThreadsActive, pool_size, pool_size, queue_size); - } - return *shared->prefetch_threadpool; -} - void Context::setIndexUncompressedCache(size_t max_size_in_bytes) { auto lock = getLock(); @@ -2306,7 +2325,6 @@ void Context::setIndexUncompressedCache(size_t max_size_in_bytes) shared->index_uncompressed_cache = std::make_shared(max_size_in_bytes); } - UncompressedCachePtr Context::getIndexUncompressedCache() const { auto lock = getLock(); @@ -2314,7 +2332,7 @@ UncompressedCachePtr Context::getIndexUncompressedCache() const } -void Context::dropIndexUncompressedCache() const +void Context::clearIndexUncompressedCache() const { auto lock = getLock(); if (shared->index_uncompressed_cache) @@ -2338,44 +2356,13 @@ MarkCachePtr Context::getIndexMarkCache() const return shared->index_mark_cache; } -void Context::dropIndexMarkCache() const +void Context::clearIndexMarkCache() const { auto lock = getLock(); if (shared->index_mark_cache) shared->index_mark_cache->reset(); } -void Context::setQueryCache(const Poco::Util::AbstractConfiguration & config) -{ - auto lock = getLock(); - - if (shared->query_cache) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache has been already created."); - - shared->query_cache = std::make_shared(); - shared->query_cache->updateConfiguration(config); -} - -void Context::updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config) -{ - auto lock = getLock(); - if (shared->query_cache) - shared->query_cache->updateConfiguration(config); -} - -QueryCachePtr Context::getQueryCache() const -{ - auto lock = getLock(); - return shared->query_cache; -} - -void Context::dropQueryCache() const -{ - auto lock = getLock(); - if (shared->query_cache) - shared->query_cache->reset(); -} - void Context::setMMappedFileCache(size_t cache_size_in_num_entries) { auto lock = getLock(); @@ -2392,15 +2379,50 @@ MMappedFileCachePtr Context::getMMappedFileCache() const return shared->mmap_cache; } -void Context::dropMMappedFileCache() const +void Context::clearMMappedFileCache() const { auto lock = getLock(); if (shared->mmap_cache) shared->mmap_cache->reset(); } +void Context::setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows) +{ + auto lock = getLock(); -void Context::dropCaches() const + if (shared->query_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache has been already created."); + + shared->query_cache = std::make_shared(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_rows); +} + +void Context::updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = getLock(); + if (shared->query_cache) + { + size_t max_size_in_bytes = config.getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE); + size_t max_entries = config.getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES); + size_t max_entry_size_in_bytes = config.getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES); + size_t max_entry_size_in_rows = config.getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS); + shared->query_cache->updateConfiguration(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_rows); + } +} + +QueryCachePtr Context::getQueryCache() const +{ + auto lock = getLock(); + return shared->query_cache; +} + +void Context::clearQueryCache() const +{ + auto lock = getLock(); + if (shared->query_cache) + shared->query_cache->reset(); +} + +void Context::clearCaches() const { auto lock = getLock(); @@ -2416,11 +2438,31 @@ void Context::dropCaches() const if (shared->index_mark_cache) shared->index_mark_cache->reset(); - if (shared->query_cache) - shared->query_cache->reset(); - if (shared->mmap_cache) shared->mmap_cache->reset(); + + /// Intentionally not dropping the query cache which is transactionally inconsistent by design. +} + +ThreadPool & Context::getPrefetchThreadpool() const +{ + const auto & config = getConfigRef(); + + auto lock = getLock(); + if (!shared->prefetch_threadpool) + { + auto pool_size = getPrefetchThreadpoolSize(); + auto queue_size = config.getUInt(".prefetch_threadpool_queue_size", 1000000); + shared->prefetch_threadpool = std::make_unique( + CurrentMetrics::IOPrefetchThreads, CurrentMetrics::IOPrefetchThreadsActive, pool_size, pool_size, queue_size); + } + return *shared->prefetch_threadpool; +} + +size_t Context::getPrefetchThreadpoolSize() const +{ + const auto & config = getConfigRef(); + return config.getUInt(".prefetch_threadpool_pool_size", 100); } BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const @@ -2626,7 +2668,10 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const const auto & config = shared->zookeeper_config ? *shared->zookeeper_config : getConfigRef(); if (!shared->zookeeper) shared->zookeeper = std::make_shared(config, zkutil::getZooKeeperConfigName(config), getZooKeeperLog()); - else if (shared->zookeeper->expired()) + else if (shared->zookeeper->hasReachedDeadline()) + shared->zookeeper->finalize("ZooKeeper session has reached its deadline"); + + if (shared->zookeeper->expired()) { Stopwatch watch; LOG_DEBUG(shared->log, "Trying to establish a new connection with ZooKeeper"); @@ -2834,16 +2879,6 @@ std::map Context::getAuxiliaryZooKeepers() const } #if USE_ROCKSDB -MergeTreeMetadataCachePtr Context::getMergeTreeMetadataCache() const -{ - auto cache = tryGetMergeTreeMetadataCache(); - if (!cache) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Merge tree metadata cache is not initialized, please add config merge_tree_metadata_cache in config.xml and restart"); - return cache; -} - MergeTreeMetadataCachePtr Context::tryGetMergeTreeMetadataCache() const { return shared->merge_tree_metadata_cache; @@ -2959,6 +2994,16 @@ const RemoteHostFilter & Context::getRemoteHostFilter() const return shared->remote_host_filter; } +void Context::setHTTPHeaderFilter(const Poco::Util::AbstractConfiguration & config) +{ + shared->http_header_filter.setValuesFromConfig(config); +} + +const HTTPHeaderFilter & Context::getHTTPHeaderFilter() const +{ + return shared->http_header_filter; +} + UInt16 Context::getTCPPort() const { auto lock = getLock(); @@ -2986,7 +3031,7 @@ UInt16 Context::getServerPort(const String & port_name) const { auto it = shared->server_ports.find(port_name); if (it == shared->server_ports.end()) - throw Exception(ErrorCodes::BAD_GET, "There is no port named {}", port_name); + throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "There is no port named {}", port_name); else return it->second; } @@ -2995,7 +3040,7 @@ std::shared_ptr Context::getCluster(const std::string & cluster_name) c { if (auto res = tryGetCluster(cluster_name)) return res; - throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name); + throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "Requested cluster '{}' not found", cluster_name); } @@ -3115,7 +3160,12 @@ void Context::setCluster(const String & cluster_name, const std::shared_ptrsystem_logs = std::make_unique(getGlobalContext(), getConfigRef()); } @@ -3131,6 +3181,12 @@ void Context::initializeMergeTreeMetadataCache(const String & dir, size_t size) } #endif +/// Call after unexpected crash happen. +void Context::handleCrash() const +{ + shared->system_logs->handleCrash(); +} + bool Context::hasTraceCollector() const { return shared->hasTraceCollector(); @@ -3310,6 +3366,16 @@ std::shared_ptr Context::getAsynchronousInsertLog() const return shared->system_logs->asynchronous_insert_log; } +std::vector Context::getSystemLogs() const +{ + auto lock = getLock(); + + if (!shared->system_logs) + return {}; + + return shared->system_logs->logs; +} + CompressionCodecPtr Context::chooseCompressionCodec(size_t part_size, double part_size_ratio) const { auto lock = getLock(); @@ -3613,6 +3679,36 @@ void Context::reloadConfig() const shared->config_reload_callback(); } +void Context::setStartServersCallback(StartStopServersCallback && callback) +{ + /// Is initialized at server startup, so lock isn't required. Otherwise use mutex. + shared->start_servers_callback = std::move(callback); +} + +void Context::setStopServersCallback(StartStopServersCallback && callback) +{ + /// Is initialized at server startup, so lock isn't required. Otherwise use mutex. + shared->stop_servers_callback = std::move(callback); +} + +void Context::startServers(const ServerType & server_type) const +{ + /// Use mutex if callback may be changed after startup. + if (!shared->start_servers_callback) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't start servers because start_servers_callback is not set."); + + shared->start_servers_callback(server_type); +} + +void Context::stopServers(const ServerType & server_type) const +{ + /// Use mutex if callback may be changed after startup. + if (!shared->stop_servers_callback) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't stop servers because stop_servers_callback is not set."); + + shared->stop_servers_callback(server_type); +} + void Context::shutdown() { @@ -3817,6 +3913,129 @@ void Context::resetInputCallbacks() } +void Context::setClientInfo(const ClientInfo & client_info_) +{ + client_info = client_info_; + need_recalculate_access = true; +} + +void Context::setClientName(const String & client_name) +{ + client_info.client_name = client_name; +} + +void Context::setClientInterface(ClientInfo::Interface interface) +{ + client_info.interface = interface; + need_recalculate_access = true; +} + +void Context::setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version) +{ + client_info.client_version_major = client_version_major; + client_info.client_version_minor = client_version_minor; + client_info.client_version_patch = client_version_patch; + client_info.client_tcp_protocol_version = client_tcp_protocol_version; +} + +void Context::setClientConnectionId(uint32_t connection_id_) +{ + client_info.connection_id = connection_id_; +} + +void Context::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer) +{ + client_info.http_method = http_method; + client_info.http_user_agent = http_user_agent; + client_info.http_referer = http_referer; + need_recalculate_access = true; +} + +void Context::setForwardedFor(const String & forwarded_for) +{ + client_info.forwarded_for = forwarded_for; + need_recalculate_access = true; +} + +void Context::setQueryKind(ClientInfo::QueryKind query_kind) +{ + client_info.query_kind = query_kind; +} + +void Context::setQueryKindInitial() +{ + /// TODO: Try to combine this function with setQueryKind(). + client_info.setInitialQuery(); +} + +void Context::setQueryKindReplicatedDatabaseInternal() +{ + /// TODO: Try to combine this function with setQueryKind(). + client_info.is_replicated_database_internal = true; +} + +void Context::setCurrentUserName(const String & current_user_name) +{ + /// TODO: Try to combine this function with setUser(). + client_info.current_user = current_user_name; + need_recalculate_access = true; +} + +void Context::setCurrentAddress(const Poco::Net::SocketAddress & current_address) +{ + client_info.current_address = current_address; + need_recalculate_access = true; +} + +void Context::setInitialUserName(const String & initial_user_name) +{ + client_info.initial_user = initial_user_name; + need_recalculate_access = true; +} + +void Context::setInitialAddress(const Poco::Net::SocketAddress & initial_address) +{ + client_info.initial_address = initial_address; +} + +void Context::setInitialQueryId(const String & initial_query_id) +{ + client_info.initial_query_id = initial_query_id; +} + +void Context::setInitialQueryStartTime(std::chrono::time_point initial_query_start_time) +{ + client_info.initial_query_start_time = timeInSeconds(initial_query_start_time); + client_info.initial_query_start_time_microseconds = timeInMicroseconds(initial_query_start_time); +} + +void Context::setQuotaClientKey(const String & quota_key_) +{ + client_info.quota_key = quota_key_; + need_recalculate_access = true; +} + +void Context::setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version) +{ + client_info.connection_client_version_major = client_version_major; + client_info.connection_client_version_minor = client_version_minor; + client_info.connection_client_version_patch = client_version_patch; + client_info.connection_tcp_protocol_version = client_tcp_protocol_version; +} + +void Context::setReplicaInfo(bool collaborate_with_initiator, size_t all_replicas_count, size_t number_of_current_replica) +{ + client_info.collaborate_with_initiator = collaborate_with_initiator; + client_info.count_participating_replicas = all_replicas_count; + client_info.number_of_current_replica = number_of_current_replica; +} + +void Context::increaseDistributedDepth() +{ + ++client_info.distributed_depth; +} + + StorageID Context::resolveStorageID(StorageID storage_id, StorageNamespace where) const { if (storage_id.uuid != UUIDHelpers::Nil) @@ -4304,14 +4523,6 @@ ReadSettings Context::getReadSettings() const return res; } -ReadSettings Context::getBackupReadSettings() const -{ - ReadSettings settings_ = getReadSettings(); - settings_.remote_throttler = getBackupsThrottler(); - settings_.local_throttler = getBackupsThrottler(); - return settings_; -} - WriteSettings Context::getWriteSettings() const { WriteSettings res; @@ -4338,14 +4549,13 @@ std::shared_ptr Context::getAsyncReadCounters() const Context::ParallelReplicasMode Context::getParallelReplicasMode() const { - const auto & settings_ = getSettingsRef(); + const auto & settings_ref = getSettingsRef(); using enum Context::ParallelReplicasMode; - if (!settings_.parallel_replicas_custom_key.value.empty()) + if (!settings_ref.parallel_replicas_custom_key.value.empty()) return CUSTOM_KEY; - if (settings_.allow_experimental_parallel_reading_from_replicas > 0 - && !settings_.use_hedged_requests) + if (settings_ref.allow_experimental_parallel_reading_from_replicas > 0 && !settings_ref.use_hedged_requests) return READ_TASKS; return SAMPLE_KEY; @@ -4353,17 +4563,15 @@ Context::ParallelReplicasMode Context::getParallelReplicasMode() const bool Context::canUseParallelReplicasOnInitiator() const { - const auto & settings_ = getSettingsRef(); - return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS - && settings_.max_parallel_replicas > 1 + const auto & settings_ref = getSettingsRef(); + return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1 && !getClientInfo().collaborate_with_initiator; } bool Context::canUseParallelReplicasOnFollower() const { - const auto & settings_ = getSettingsRef(); - return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS - && settings_.max_parallel_replicas > 1 + const auto & settings_ref = getSettingsRef(); + return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1 && getClientInfo().collaborate_with_initiator; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 6cbb0e58911..587fe402d4e 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -6,8 +6,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -50,8 +52,8 @@ struct ContextSharedPart; class ContextAccess; struct User; using UserPtr = std::shared_ptr; +struct SettingsProfilesInfo; struct EnabledRolesInfo; -class EnabledRowPolicies; struct RowPolicyFilter; using RowPolicyFilterPtr = std::shared_ptr; class EnabledQuota; @@ -84,6 +86,7 @@ struct Progress; struct FileProgress; class Clusters; class QueryCache; +class ISystemLog; class QueryLog; class QueryThreadLog; class QueryViewsLog; @@ -133,6 +136,7 @@ using StoragePolicyPtr = std::shared_ptr; using StoragePoliciesMap = std::map; class StoragePolicySelector; using StoragePolicySelectorPtr = std::shared_ptr; +class ServerType; template class MergeTreeBackgroundExecutor; @@ -200,6 +204,8 @@ using MergeTreeMetadataCachePtr = std::shared_ptr; class PreparedSetsCache; using PreparedSetsCachePtr = std::shared_ptr; +class SessionTracker; + /// An empty interface for an arbitrary object that may be attached by a shared pointer /// to query context, when using ClickHouse as a library. struct IHostContext @@ -248,8 +254,8 @@ private: std::optional user_id; std::shared_ptr> current_roles; std::shared_ptr settings_constraints_and_current_profiles; - std::shared_ptr access; - std::shared_ptr row_policies_of_initial_user; + mutable std::shared_ptr access; + mutable bool need_recalculate_access = true; String current_database; Settings settings; /// Setting for query execution. @@ -529,13 +535,11 @@ public: /// Sets the current user assuming that he/she is already authenticated. /// WARNING: This function doesn't check password! - void setUser(const UUID & user_id_); - + void setUser(const UUID & user_id_, const std::optional> & current_roles_ = {}); UserPtr getUser() const; - String getUserName() const; - std::optional getUserID() const; - void setQuotaKey(String quota_key_); + std::optional getUserID() const; + String getUserName() const; void setCurrentRoles(const std::vector & current_roles_); void setCurrentRolesDefault(); @@ -543,8 +547,9 @@ public: boost::container::flat_set getEnabledRoles() const; std::shared_ptr getRolesInfo() const; - void setCurrentProfile(const String & profile_name); - void setCurrentProfile(const UUID & profile_id); + void setCurrentProfile(const String & profile_name, bool check_constraints = true); + void setCurrentProfile(const UUID & profile_id, bool check_constraints = true); + void setCurrentProfiles(const SettingsProfilesInfo & profiles_info, bool check_constraints = true); std::vector getCurrentProfiles() const; std::vector getEnabledProfiles() const; @@ -567,13 +572,6 @@ public: RowPolicyFilterPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const; - /// Finds and sets extra row policies to be used based on `client_info.initial_user`, - /// if the initial user exists. - /// TODO: we need a better solution here. It seems we should pass the initial row policy - /// because a shard is allowed to not have the initial user or it might be another user - /// with the same name. - void enableRowPoliciesOfInitialUser(); - std::shared_ptr getQuota() const; std::optional getQuotaUsage() const; @@ -597,9 +595,33 @@ public: InputBlocksReader getInputBlocksReaderCallback() const; void resetInputCallbacks(); - ClientInfo & getClientInfo() { return client_info; } + /// Returns information about the client executing a query. const ClientInfo & getClientInfo() const { return client_info; } + /// Modify stored in the context information about the client executing a query. + void setClientInfo(const ClientInfo & client_info_); + void setClientName(const String & client_name); + void setClientInterface(ClientInfo::Interface interface); + void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version); + void setClientConnectionId(uint32_t connection_id); + void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer); + void setForwardedFor(const String & forwarded_for); + void setQueryKind(ClientInfo::QueryKind query_kind); + void setQueryKindInitial(); + void setQueryKindReplicatedDatabaseInternal(); + void setCurrentUserName(const String & current_user_name); + void setCurrentAddress(const Poco::Net::SocketAddress & current_address); + void setInitialUserName(const String & initial_user_name); + void setInitialAddress(const Poco::Net::SocketAddress & initial_address); + void setInitialQueryId(const String & initial_query_id); + void setInitialQueryStartTime(std::chrono::time_point initial_query_start_time); + void setQuotaClientKey(const String & quota_key); + void setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version); + void setReplicaInfo(bool collaborate_with_initiator, size_t all_replicas_count, size_t number_of_current_replica); + void increaseDistributedDepth(); + const OpenTelemetry::TracingContext & getClientTraceContext() const { return client_info.client_trace_context; } + OpenTelemetry::TracingContext & getClientTraceContext() { return client_info.client_trace_context; } + enum StorageNamespace { ResolveGlobal = 1u, /// Database name must be specified @@ -637,6 +659,14 @@ public: const String & view_name = {}); void addQueryAccessInfo(const Names & partition_names); + struct QualifiedProjectionName + { + StorageID storage_id = StorageID::createEmpty(); + String projection_name; + explicit operator bool() const { return !projection_name.empty(); } + }; + void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name); + /// Supported factories for records in query_log enum class QueryLogFactories @@ -705,11 +735,11 @@ public: void applySettingsChanges(const SettingsChanges & changes); /// Checks the constraints. - void checkSettingsConstraints(const SettingsProfileElements & profile_elements) const; - void checkSettingsConstraints(const SettingChange & change) const; - void checkSettingsConstraints(const SettingsChanges & changes) const; - void checkSettingsConstraints(SettingsChanges & changes) const; - void clampToSettingsConstraints(SettingsChanges & changes) const; + void checkSettingsConstraints(const SettingsProfileElements & profile_elements, SettingSource source) const; + void checkSettingsConstraints(const SettingChange & change, SettingSource source) const; + void checkSettingsConstraints(const SettingsChanges & changes, SettingSource source) const; + void checkSettingsConstraints(SettingsChanges & changes, SettingSource source) const; + void clampToSettingsConstraints(SettingsChanges & changes, SettingSource source) const; void checkMergeTreeSettingsConstraints(const MergeTreeSettings & merge_tree_settings, const SettingsChanges & changes) const; /// Reset settings to default value @@ -766,6 +796,10 @@ public: void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config); const RemoteHostFilter & getRemoteHostFilter() const; + /// Storage of forbidden HTTP headers from config.xml + void setHTTPHeaderFilter(const Poco::Util::AbstractConfiguration & config); + const HTTPHeaderFilter & getHTTPHeaderFilter() const; + /// The port that the server listens for executing SQL queries. UInt16 getTCPPort() const; @@ -827,6 +861,8 @@ public: OvercommitTracker * getGlobalOvercommitTracker() const; + SessionTracker & getSessionTracker(); + MergeList & getMergeList(); const MergeList & getMergeList() const; @@ -855,7 +891,6 @@ public: void setClientProtocolVersion(UInt64 version); #if USE_ROCKSDB - MergeTreeMetadataCachePtr getMergeTreeMetadataCache() const; MergeTreeMetadataCachePtr tryGetMergeTreeMetadataCache() const; #endif @@ -880,44 +915,39 @@ public: void setSystemZooKeeperLogAfterInitializationIfNeeded(); + /// --- Caches ------------------------------------------------------------------------------------------ + /// Create a cache of uncompressed blocks of specified size. This can be done only once. void setUncompressedCache(const String & uncompressed_cache_policy, size_t max_size_in_bytes); std::shared_ptr getUncompressedCache() const; - void dropUncompressedCache() const; + void clearUncompressedCache() const; /// Create a cache of marks of specified size. This can be done only once. void setMarkCache(const String & mark_cache_policy, size_t cache_size_in_bytes); std::shared_ptr getMarkCache() const; - void dropMarkCache() const; + void clearMarkCache() const; ThreadPool & getLoadMarksThreadpool() const; - ThreadPool & getPrefetchThreadpool() const; - - /// Note: prefetchThreadpool is different from threadpoolReader - /// in the way that its tasks are - wait for marks to be loaded - /// and make a prefetch by putting a read task to threadpoolReader. - size_t getPrefetchThreadpoolSize() const; - /// Create a cache of index uncompressed blocks of specified size. This can be done only once. void setIndexUncompressedCache(size_t max_size_in_bytes); std::shared_ptr getIndexUncompressedCache() const; - void dropIndexUncompressedCache() const; + void clearIndexUncompressedCache() const; /// Create a cache of index marks of specified size. This can be done only once. void setIndexMarkCache(size_t cache_size_in_bytes); std::shared_ptr getIndexMarkCache() const; - void dropIndexMarkCache() const; + void clearIndexMarkCache() const; /// Create a cache of mapped files to avoid frequent open/map/unmap/close and to reuse from several threads. void setMMappedFileCache(size_t cache_size_in_num_entries); std::shared_ptr getMMappedFileCache() const; - void dropMMappedFileCache() const; + void clearMMappedFileCache() const; /// Create a cache of query results for statements which run repeatedly. - void setQueryCache(const Poco::Util::AbstractConfiguration & config); + void setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows); void updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getQueryCache() const; - void dropQueryCache() const; + void clearQueryCache() const; /** Clear the caches of the uncompressed blocks and marks. * This is usually done when renaming tables, changing the type of columns, deleting a table. @@ -925,7 +955,16 @@ public: * (when deleting a table - it is necessary, since in its place another can appear) * const - because the change in the cache is not considered significant. */ - void dropCaches() const; + void clearCaches() const; + + /// ----------------------------------------------------------------------------------------------------- + + ThreadPool & getPrefetchThreadpool() const; + + /// Note: prefetchThreadpool is different from threadpoolReader + /// in the way that its tasks are - wait for marks to be loaded + /// and make a prefetch by putting a read task to threadpoolReader. + size_t getPrefetchThreadpoolSize() const; /// Settings for MergeTree background tasks stored in config.xml BackgroundTaskSchedulingSettings getBackgroundProcessingTaskSchedulingSettings() const; @@ -964,6 +1003,9 @@ public: void initializeMergeTreeMetadataCache(const String & dir, size_t size); #endif + /// Call after unexpected crash happen. + void handleCrash() const; + bool hasTraceCollector() const; /// Nullptr if the query log is not ready for this moment. @@ -983,6 +1025,8 @@ public: std::shared_ptr getFilesystemReadPrefetchesLog() const; std::shared_ptr getAsynchronousInsertLog() const; + std::vector getSystemLogs() const; + /// Returns an object used to log operations with parts if it possible. /// Provide table name to make required checks. std::shared_ptr getPartLog(const String & part_database) const; @@ -1024,6 +1068,13 @@ public: void setConfigReloadCallback(ConfigReloadCallback && callback); void reloadConfig() const; + using StartStopServersCallback = std::function; + void setStartServersCallback(StartStopServersCallback && callback); + void setStopServersCallback(StartStopServersCallback && callback); + + void startServers(const ServerType & server_type) const; + void stopServers(const ServerType & server_type) const; + void shutdown(); bool isInternalQuery() const { return is_internal_query; } @@ -1122,9 +1173,6 @@ public: /** Get settings for reading from filesystem. */ ReadSettings getReadSettings() const; - /** Get settings for reading from filesystem for BACKUPs. */ - ReadSettings getBackupReadSettings() const; - /** Get settings for writing to filesystem. */ WriteSettings getWriteSettings() const; @@ -1149,9 +1197,7 @@ private: void initGlobal(); - /// Compute and set actual user settings, client_info.current_user should be set - void calculateAccessRights(); - void recalculateAccessRightsIfNeeded(std::string_view setting_name); + void setUserID(const UUID & user_id_); template void checkAccessImpl(const Args &... args) const; diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp index 08c08ffecd1..ec693eb7931 100644 --- a/src/Interpreters/CrashLog.cpp +++ b/src/Interpreters/CrashLog.cpp @@ -83,6 +83,6 @@ void collectCrashLog(Int32 signal, UInt64 thread_id, const String & query_id, co stack_trace.toStringEveryLine([&trace_full](std::string_view line) { trace_full.push_back(line); }); CrashLogElement element{static_cast(time / 1000000000), time, signal, thread_id, query_id, trace, trace_full}; - crash_log_owned->add(element); + crash_log_owned->add(std::move(element)); } } diff --git a/src/Interpreters/CrashLog.h b/src/Interpreters/CrashLog.h index 78794574c82..65714295be4 100644 --- a/src/Interpreters/CrashLog.h +++ b/src/Interpreters/CrashLog.h @@ -45,6 +45,11 @@ public: { crash_log = crash_log_; } + + static consteval size_t getDefaultMaxSize() { return 1024; } + static consteval size_t getDefaultReservedSize() { return 1024; } + static consteval size_t getDefaultFlushIntervalMilliseconds() { return 1000; } + static consteval size_t shouldNotifyFlushOnCrash() { return true; } }; } diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index b24856a6146..4e684f5899f 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -199,7 +199,7 @@ ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const Z auto query_context = Context::createCopy(from_context); query_context->makeQueryContext(); query_context->setCurrentQueryId(""); // generate random query_id - query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; + query_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); if (entry.settings) query_context->applySettingsChanges(*entry.settings); return query_context; @@ -439,8 +439,8 @@ void DatabaseReplicatedTask::parseQueryFromEntry(ContextPtr context) ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & zookeeper) { auto query_context = DDLTaskBase::makeQueryContext(from_context, zookeeper); - query_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; - query_context->getClientInfo().is_replicated_database_internal = true; + query_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); + query_context->setQueryKindReplicatedDatabaseInternal(); query_context->setCurrentDatabase(database->getDatabaseName()); auto txn = std::make_shared(zookeeper, database->zookeeper_path, is_initial_query, entry_path); diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 81c78000ac3..8be334d6223 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -476,7 +476,7 @@ bool DDLWorker::tryExecuteQuery(DDLTaskBase & task, const ZooKeeperPtr & zookeep query_context->setSetting("implicit_transaction", Field{0}); } - query_context->getClientInfo().initial_query_id = task.entry.initial_query_id; + query_context->setInitialQueryId(task.entry.initial_query_id); if (!task.is_initial_query) query_scope.emplace(query_context); @@ -551,7 +551,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) chassert(!task.completely_processed); /// Setup tracing context on current thread for current DDL - OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ , + OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__, task.entry.tracing_context, this->context->getOpenTelemetrySpanLog()); tracing_ctx_holder.root_span.kind = OpenTelemetry::CONSUMER; @@ -574,7 +574,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) if (create_active_res != Coordination::Error::ZNONODE && create_active_res != Coordination::Error::ZNODEEXISTS) { chassert(Coordination::isHardwareError(create_active_res)); - throw Coordination::Exception(create_active_res, active_node_path); + throw Coordination::Exception::fromPath(create_active_res, active_node_path); } /// Status dirs were not created in enqueueQuery(...) or someone is removing entry diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 23a67f4bc2f..3baaa182d51 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -14,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +25,7 @@ #include #include +#include "Interpreters/Context_fwd.h" #include "config.h" #if USE_MYSQL @@ -35,7 +38,6 @@ # include #endif - namespace CurrentMetrics { extern const Metric TablesToDropQueueSize; @@ -59,6 +61,29 @@ namespace ErrorCodes extern const int UNFINISHED; } +class DatabaseNameHints : public IHints<1, DatabaseNameHints> +{ +public: + explicit DatabaseNameHints(const DatabaseCatalog & database_catalog_) + : database_catalog(database_catalog_) + { + } + Names getAllRegisteredNames() const override + { + Names result; + auto databases_list = database_catalog.getDatabases(); + for (const auto & database_name : databases_list | boost::adaptors::map_keys) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; + result.emplace_back(database_name); + } + return result; + } +private: + const DatabaseCatalog & database_catalog; +}; + TemporaryTableHolder::TemporaryTableHolder(ContextPtr context_, const TemporaryTableHolder::Creator & creator, const ASTPtr & query) : WithContext(context_->getGlobalContext()) , temporary_tables(DatabaseCatalog::instance().getDatabaseForTemporaryTables().get()) @@ -313,7 +338,18 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( { assert(!db_and_table.first && !db_and_table.second); if (exception) - exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs())); + { + TableNameHints hints(this->tryGetDatabase(table_id.getDatabaseName()), getContext()); + std::vector names = hints.getHints(table_id.getTableName()); + if (!names.empty()) + { + /// There is two options: first is to print just the name of the table + /// and the second is to print the result in format: db_name.table_name. I'll comment out the second option below + /// I also leave possibility to print several suggestions + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist. Maybe you meant {}?", table_id.getNameForLogs(), backQuoteIfNeed(names[0]))); + } + else exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", table_id.getNameForLogs())); + } return {}; } @@ -336,7 +372,6 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( return db_and_table; } - if (table_id.database_name == TEMPORARY_DATABASE) { /// For temporary tables UUIDs are set in Context::resolveStorageID(...). @@ -349,21 +384,69 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( DatabasePtr database; { - std::lock_guard lock{databases_mutex}; - auto it = databases.find(table_id.getDatabaseName()); - if (databases.end() == it) + // Callers assume that this method doesn't throw exceptions, but getDatabaseName() will throw if there is no database part. + // So, fail early and gracefully... + if (!table_id.hasDatabase()) { if (exception) - exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName()))); + exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Empty database name")); return {}; } - database = it->second; + + std::lock_guard lock{databases_mutex}; + auto it = databases.find(table_id.getDatabaseName()); + if (databases.end() != it) + database = it->second; } - auto table = database->tryGetTable(table_id.table_name, context_); - if (!table && exception) - exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs())); + if (!database) + { + if (exception) + { + DatabaseNameHints hints(*this); + std::vector names = hints.getHints(table_id.getDatabaseName()); + if (names.empty()) + { + exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(table_id.getDatabaseName()))); + } + else + { + exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist. Maybe you meant {}?", backQuoteIfNeed(table_id.getDatabaseName()), backQuoteIfNeed(names[0]))); + } + } + return {}; + } + StoragePtr table; + if (exception) + { + try + { + table = database->getTable(table_id.table_name, context_); + } + catch (const Exception & e) + { + exception->emplace(e); + } + } + else + { + table = database->tryGetTable(table_id.table_name, context_); + } + + if (!table && exception && !exception->has_value()) + { + TableNameHints hints(this->tryGetDatabase(table_id.getDatabaseName()), getContext()); + std::vector names = hints.getHints(table_id.getTableName()); + if (names.empty()) + { + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", table_id.getNameForLogs())); + } + else + { + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist. Maybe you meant {}?", table_id.getNameForLogs(), backQuoteIfNeed(names[0]))); + } + } if (!table) database = nullptr; @@ -414,8 +497,26 @@ bool DatabaseCatalog::isPredefinedTable(const StorageID & table_id) const void DatabaseCatalog::assertDatabaseExists(const String & database_name) const { - std::lock_guard lock{databases_mutex}; - assertDatabaseExistsUnlocked(database_name); + DatabasePtr db; + { + std::lock_guard lock{databases_mutex}; + assert(!database_name.empty()); + if (auto it = databases.find(database_name); it != databases.end()) + db = it->second; + } + if (!db) + { + DatabaseNameHints hints(*this); + std::vector names = hints.getHints(database_name); + if (names.empty()) + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(database_name)); + } + else + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist. Maybe you meant {}?", backQuoteIfNeed(database_name), backQuoteIfNeed(names[0])); + } + } } void DatabaseCatalog::assertDatabaseDoesntExist(const String & database_name) const @@ -424,19 +525,11 @@ void DatabaseCatalog::assertDatabaseDoesntExist(const String & database_name) co assertDatabaseDoesntExistUnlocked(database_name); } -void DatabaseCatalog::assertDatabaseExistsUnlocked(const String & database_name) const -{ - assert(!database_name.empty()); - if (databases.end() == databases.find(database_name)) - throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} doesn't exist", backQuoteIfNeed(database_name)); -} - - void DatabaseCatalog::assertDatabaseDoesntExistUnlocked(const String & database_name) const { assert(!database_name.empty()); if (databases.end() != databases.find(database_name)) - throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists.", backQuoteIfNeed(database_name)); + throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists", backQuoteIfNeed(database_name)); } void DatabaseCatalog::attachDatabase(const String & database_name, const DatabasePtr & database) @@ -456,18 +549,34 @@ DatabasePtr DatabaseCatalog::detachDatabase(ContextPtr local_context, const Stri { if (database_name == TEMPORARY_DATABASE) throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Cannot detach database with temporary tables."); - + assert(!database_name.empty()); DatabasePtr db; { std::lock_guard lock{databases_mutex}; - assertDatabaseExistsUnlocked(database_name); - db = databases.find(database_name)->second; - UUID db_uuid = db->getUUID(); - if (db_uuid != UUIDHelpers::Nil) - removeUUIDMapping(db_uuid); - databases.erase(database_name); - } + if (auto it = databases.find(database_name); it != databases.end()) + { + db = it->second; + UUID db_uuid = db->getUUID(); + if (db_uuid != UUIDHelpers::Nil) + removeUUIDMapping(db_uuid); + databases.erase(database_name); + + } + } + if (!db) + { + DatabaseNameHints hints(*this); + std::vector names = hints.getHints(database_name); + if (names.empty()) + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(database_name)); + } + else + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist. Maybe you meant {}?", backQuoteIfNeed(database_name), backQuoteIfNeed(names[0])); + } + } if (check_empty) { try @@ -503,7 +612,6 @@ DatabasePtr DatabaseCatalog::detachDatabase(ContextPtr local_context, const Stri if (db_uuid != UUIDHelpers::Nil) removeUUIDMappingFinally(db_uuid); } - return db; } @@ -529,9 +637,28 @@ void DatabaseCatalog::updateDatabaseName(const String & old_name, const String & DatabasePtr DatabaseCatalog::getDatabase(const String & database_name) const { - std::lock_guard lock{databases_mutex}; - assertDatabaseExistsUnlocked(database_name); - return databases.find(database_name)->second; + assert(!database_name.empty()); + DatabasePtr db; + { + std::lock_guard lock{databases_mutex}; + if (auto it = databases.find(database_name); it != databases.end()) + db = it->second; + } + + if (!db) + { + DatabaseNameHints hints(*this); + std::vector names = hints.getHints(database_name); + if (names.empty()) + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(database_name)); + } + else + { + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist. Maybe you meant {}?", backQuoteIfNeed(database_name), backQuoteIfNeed(names[0])); + } + } + return db; } DatabasePtr DatabaseCatalog::tryGetDatabase(const String & database_name) const @@ -697,6 +824,7 @@ DatabaseCatalog::DatabaseCatalog(ContextMutablePtr global_context_) , loading_dependencies{"LoadingDeps"} , view_dependencies{"ViewDeps"} , log(&Poco::Logger::get("DatabaseCatalog")) + , first_async_drop_in_queue(tables_marked_dropped.end()) { } @@ -959,9 +1087,17 @@ void DatabaseCatalog::enqueueDroppedTableCleanup(StorageID table_id, StoragePtr std::lock_guard lock(tables_marked_dropped_mutex); if (ignore_delay) - tables_marked_dropped.push_front({table_id, table, dropped_metadata_path, drop_time}); + { + /// Insert it before first_async_drop_in_queue, so sync drop queries will have priority over async ones, + /// but the queue will remain fair for multiple sync drop queries. + tables_marked_dropped.emplace(first_async_drop_in_queue, TableMarkedAsDropped{table_id, table, dropped_metadata_path, drop_time}); + } else + { tables_marked_dropped.push_back({table_id, table, dropped_metadata_path, drop_time + drop_delay_sec}); + if (first_async_drop_in_queue == tables_marked_dropped.end()) + --first_async_drop_in_queue; + } tables_marked_dropped_ids.insert(table_id.uuid); CurrentMetrics::add(CurrentMetrics::TablesToDropQueueSize, 1); @@ -1012,6 +1148,8 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) /// This maybe throw exception. renameNoReplace(latest_metadata_dropped_path, table_metadata_path); + if (first_async_drop_in_queue == it_dropped_table) + ++first_async_drop_in_queue; tables_marked_dropped.erase(it_dropped_table); [[maybe_unused]] auto removed = tables_marked_dropped_ids.erase(dropped_table.table_id.uuid); assert(removed); @@ -1074,6 +1212,8 @@ void DatabaseCatalog::dropTableDataTask() table = std::move(*it); LOG_INFO(log, "Have {} tables in drop queue ({} of them are in use), will try drop {}", tables_marked_dropped.size(), tables_in_use_count, table.table_id.getNameForLogs()); + if (first_async_drop_in_queue == it) + ++first_async_drop_in_queue; tables_marked_dropped.erase(it); /// Schedule the task as soon as possible, while there are suitable tables to drop. schedule_after_ms = 0; @@ -1110,6 +1250,8 @@ void DatabaseCatalog::dropTableDataTask() table.drop_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) + drop_error_cooldown_sec; std::lock_guard lock(tables_marked_dropped_mutex); tables_marked_dropped.emplace_back(std::move(table)); + if (first_async_drop_in_queue == tables_marked_dropped.end()) + --first_async_drop_in_queue; /// If list of dropped tables was empty, schedule a task to retry deletion. if (tables_marked_dropped.size() == 1) { diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index d502505027f..d734e6bc149 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -6,7 +6,10 @@ #include #include #include +#include "Common/NamePrompter.h" #include +#include "Storages/IStorage.h" +#include "Databases/IDatabase.h" #include #include @@ -27,6 +30,32 @@ namespace fs = std::filesystem; namespace DB { +class TableNameHints : public IHints<1, TableNameHints> +{ +public: + TableNameHints(ConstDatabasePtr database_, ContextPtr context_) + : context(context_), + database(database_) + { + } + Names getAllRegisteredNames() const override + { + Names result; + if (database) + { + for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) + { + const auto & storage_id = table_it->table()->getStorageID(); + result.emplace_back(storage_id.getTableName()); + } + } + return result; + } +private: + ContextPtr context; + ConstDatabasePtr database; +}; + class IDatabase; class Exception; class ColumnsDescription; @@ -262,7 +291,6 @@ private: static std::unique_ptr database_catalog; explicit DatabaseCatalog(ContextMutablePtr global_context_); - void assertDatabaseExistsUnlocked(const String & database_name) const TSA_REQUIRES(databases_mutex); void assertDatabaseDoesntExistUnlocked(const String & database_name) const TSA_REQUIRES(databases_mutex); void shutdownImpl(); @@ -323,6 +351,7 @@ private: mutable std::mutex ddl_guards_mutex; TablesMarkedAsDropped tables_marked_dropped TSA_GUARDED_BY(tables_marked_dropped_mutex); + TablesMarkedAsDropped::iterator first_async_drop_in_queue TSA_GUARDED_BY(tables_marked_dropped_mutex); std::unordered_set tables_marked_dropped_ids TSA_GUARDED_BY(tables_marked_dropped_mutex); mutable std::mutex tables_marked_dropped_mutex; diff --git a/src/Interpreters/DirectJoin.h b/src/Interpreters/DirectJoin.h index e55ac278705..5f664314818 100644 --- a/src/Interpreters/DirectJoin.h +++ b/src/Interpreters/DirectJoin.h @@ -30,6 +30,7 @@ public: std::shared_ptr storage_, const Block & right_sample_block_with_storage_column_names_); + std::string getName() const override { return "DirectKeyValueJoin"; } virtual const TableJoin & getTableJoin() const override { return *table_join; } virtual bool addBlockToJoin(const Block &, bool) override; diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 3d821b60e81..ded99df9f74 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -98,7 +98,7 @@ static auto getQueryInterpreter(const ASTSubquery & subquery, ExecuteScalarSubqu void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data) { auto hash = subquery.getTreeHash(); - auto scalar_query_hash_str = toString(hash.first) + "_" + toString(hash.second); + const auto scalar_query_hash_str = toString(hash); std::unique_ptr interpreter = nullptr; bool hit = false; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index f27d23e8e94..acedede579b 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -548,15 +548,17 @@ void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bo void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, AggregateDescriptions & descriptions) { - for (const ASTFunction * node : aggregates()) + for (const ASTPtr & ast : aggregates()) { + const ASTFunction & node = typeid_cast(*ast); + AggregateDescription aggregate; - if (node->arguments) - getRootActionsNoMakeSet(node->arguments, actions); + if (node.arguments) + getRootActionsNoMakeSet(node.arguments, actions); - aggregate.column_name = node->getColumnName(); + aggregate.column_name = node.getColumnName(); - const ASTs & arguments = node->arguments ? node->arguments->children : ASTs(); + const ASTs & arguments = node.arguments ? node.arguments->children : ASTs(); aggregate.argument_names.resize(arguments.size()); DataTypes types(arguments.size()); @@ -568,7 +570,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr { throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier '{}' in aggregate function '{}'", - name, node->formatForErrorMessage()); + name, node.formatForErrorMessage()); } types[i] = dag_node->result_type; @@ -576,8 +578,8 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr } AggregateFunctionProperties properties; - aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters, "", getContext()) : Array(); - aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters, properties); + aggregate.parameters = (node.parameters) ? getAggregateFunctionParametersArray(node.parameters, "", getContext()) : Array(); + aggregate.function = AggregateFunctionFactory::instance().get(node.name, types, aggregate.parameters, properties); descriptions.push_back(aggregate); } @@ -744,12 +746,13 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) } // Window functions - for (const ASTFunction * function_node : syntax->window_function_asts) + for (const ASTPtr & ast : syntax->window_function_asts) { - assert(function_node->is_window_function); + const ASTFunction & function_node = typeid_cast(*ast); + assert(function_node.is_window_function); WindowFunctionDescription window_function; - window_function.function_node = function_node; + window_function.function_node = &function_node; window_function.column_name = window_function.function_node->getColumnName(); window_function.function_parameters @@ -760,7 +763,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) // Requiring a constant reference to a shared pointer to non-const AST // doesn't really look sane, but the visitor does indeed require it. - // Hence we clone the node (not very sane either, I know). + // Hence, we clone the node (not very sane either, I know). getRootActionsNoMakeSet(window_function.function_node->clone(), actions); const ASTs & arguments @@ -793,22 +796,22 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) // Find the window corresponding to this function. It may be either // referenced by name and previously defined in WINDOW clause, or it // may be defined inline. - if (!function_node->window_name.empty()) + if (!function_node.window_name.empty()) { - auto it = window_descriptions.find(function_node->window_name); + auto it = window_descriptions.find(function_node.window_name); if (it == std::end(window_descriptions)) { throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Window '{}' is not defined (referenced by '{}')", - function_node->window_name, - function_node->formatForErrorMessage()); + function_node.window_name, + function_node.formatForErrorMessage()); } it->second.window_functions.push_back(window_function); } else { - const auto & definition = function_node->window_definition->as< + const auto & definition = function_node.window_definition->as< const ASTWindowDefinition &>(); WindowDescription desc; desc.window_name = definition.getDefaultWindowName(); @@ -1127,9 +1130,17 @@ JoinPtr SelectQueryExpressionAnalyzer::makeJoin( if (auto storage = analyzed_join->getStorageJoin()) { + auto joined_block_actions = analyzed_join->createJoinedBlockActions(getContext()); + NamesWithAliases required_columns_with_aliases = analyzed_join->getRequiredColumns( + Block(joined_block_actions->getResultColumns()), joined_block_actions->getRequiredColumns().getNames()); + + Names original_right_column_names; + for (auto & pr : required_columns_with_aliases) + original_right_column_names.push_back(pr.first); + auto right_columns = storage->getRightSampleBlock().getColumnsWithTypeAndName(); std::tie(left_convert_actions, right_convert_actions) = analyzed_join->createConvertingActions(left_columns, right_columns); - return storage->getJoinLocked(analyzed_join, getContext()); + return storage->getJoinLocked(analyzed_join, getContext(), original_right_column_names); } joined_plan = buildJoinedPlan(getContext(), join_element, *analyzed_join, query_options); @@ -1323,10 +1334,13 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression GetAggregatesVisitor(data).visit(select_query->orderBy()); /// TODO: data.aggregates -> aggregates() - for (const ASTFunction * node : data.aggregates) - if (node->arguments) - for (auto & argument : node->arguments->children) + for (const ASTPtr & ast : data.aggregates) + { + const ASTFunction & node = typeid_cast(*ast); + if (node.arguments) + for (auto & argument : node.arguments->children) getRootActions(argument, only_types, step.actions()); + } } void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( @@ -1378,10 +1392,9 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( void SelectQueryExpressionAnalyzer::appendExpressionsAfterWindowFunctions(ExpressionActionsChain & chain, bool /* only_types */) { ExpressionActionsChain::Step & step = chain.lastStep(columns_after_window); + for (const auto & expression : syntax->expressions_with_window_function) - { getRootActionsForWindowFunctions(expression->clone(), true, step.actions()); - } } void SelectQueryExpressionAnalyzer::appendGroupByModifiers(ActionsDAGPtr & before_aggregation, ExpressionActionsChain & chain, bool /* only_types */) @@ -1760,9 +1773,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( /// second_stage: Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing. /** First we compose a chain of actions and remember the necessary steps from it. - * Regardless of from_stage and to_stage, we will compose a complete sequence of actions to perform optimization and - * throw out unnecessary columns based on the entire query. In unnecessary parts of the query, we will not execute subqueries. - */ + * Regardless of from_stage and to_stage, we will compose a complete sequence of actions to perform optimization and + * throw out unnecessary columns based on the entire query. In unnecessary parts of the query, we will not execute subqueries. + */ const ASTSelectQuery & query = *query_analyzer.getSelectQuery(); auto context = query_analyzer.getContext(); @@ -1805,7 +1818,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (storage && (query.sampleSize() || settings.parallel_replicas_count > 1)) { - // we evaluate sampling for Merge lazily so we need to get all the columns + // we evaluate sampling for Merge lazily, so we need to get all the columns if (storage->getName() == "Merge") { const auto columns = metadata_snapshot->getColumns().getAll(); diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 271c3943afc..941194e69ff 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -168,7 +168,7 @@ protected: const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists. const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; } const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; } - const std::vector & aggregates() const { return syntax->aggregates; } + const ASTs & aggregates() const { return syntax->aggregates; } /// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables. void initGlobalSubqueriesAndExternalTables(bool do_global, bool is_explain); diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index 17f0fda71ec..b660db064d1 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -40,6 +40,8 @@ NamesAndTypesList FilesystemCacheLogElement::getNamesAndTypes() {"source_file_path", std::make_shared()}, {"file_segment_range", std::make_shared(types)}, {"total_requested_range", std::make_shared(types)}, + {"key", std::make_shared()}, + {"offset", std::make_shared()}, {"size", std::make_shared()}, {"read_type", std::make_shared()}, {"read_from_cache_attempted", std::make_shared()}, @@ -60,6 +62,8 @@ void FilesystemCacheLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(source_file_path); columns[i++]->insert(Tuple{file_segment_range.first, file_segment_range.second}); columns[i++]->insert(Tuple{requested_range.first, requested_range.second}); + columns[i++]->insert(file_segment_key); + columns[i++]->insert(file_segment_offset); columns[i++]->insert(file_segment_size); columns[i++]->insert(typeToString(cache_type)); columns[i++]->insert(read_from_cache_attempted); diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 1b22d561c51..0d088a922e0 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -11,16 +11,7 @@ namespace DB { -/// -/// -------- Column --------- Type ------ -/// | event_date | DateTime | -/// | event_time | UInt64 | -/// | query_id | String | -/// | remote_file_path | String | -/// | segment_range | Tuple | -/// | read_type | String | -/// ------------------------------------- -/// + struct FilesystemCacheLogElement { enum class CacheType @@ -39,6 +30,8 @@ struct FilesystemCacheLogElement std::pair file_segment_range{}; std::pair requested_range{}; CacheType cache_type{}; + std::string file_segment_key; + size_t file_segment_offset; size_t file_segment_size; bool read_from_cache_attempted; String read_buffer_id; diff --git a/src/Interpreters/FullSortingMergeJoin.h b/src/Interpreters/FullSortingMergeJoin.h index a6b53a51c04..3fc9f8920ed 100644 --- a/src/Interpreters/FullSortingMergeJoin.h +++ b/src/Interpreters/FullSortingMergeJoin.h @@ -28,6 +28,7 @@ public: LOG_TRACE(&Poco::Logger::get("FullSortingMergeJoin"), "Will use full sorting merge join"); } + std::string getName() const override { return "FullSortingMergeJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } bool addBlockToJoin(const Block & /* block */, bool /* check_limits */) override diff --git a/src/Interpreters/GetAggregatesVisitor.cpp b/src/Interpreters/GetAggregatesVisitor.cpp index dd958693d89..718721308b1 100644 --- a/src/Interpreters/GetAggregatesVisitor.cpp +++ b/src/Interpreters/GetAggregatesVisitor.cpp @@ -1,4 +1,6 @@ #include +#include + namespace DB { @@ -13,7 +15,7 @@ struct WindowExpressionsCollectorChildInfo bool window_function_in_subtree = false; }; -// This visitor travers AST and collects the list of expressions which depend on +// This visitor traverses the AST and collects the list of expressions which depend on // evaluation of window functions. Expression is collected only if // it's not a part of another expression. // @@ -26,15 +28,18 @@ struct WindowExpressionsCollectorMatcher { if (child->as() || child->as()) return false; + if (auto * select = node->as()) { - // We don't analysis WITH statement because it might contain useless aggregates + // We don't analyse the WITH statement because it might contain useless aggregates if (child == select->with()) return false; } - // We procces every expression manually + + // We process every expression manually if (auto * func = node->as()) return false; + return true; } @@ -50,6 +55,8 @@ struct WindowExpressionsCollectorMatcher ASTPtr & ast, const ASTPtr & parent) { + checkStackSize(); + if (auto * func = ast->as()) { if (func->is_window_function) @@ -67,7 +74,7 @@ struct WindowExpressionsCollectorMatcher { func->compute_after_window_functions = true; if ((!parent || !parent->as())) - expressions_with_window_functions.push_back(func); + expressions_with_window_functions.push_back(ast); } return result; @@ -75,15 +82,16 @@ struct WindowExpressionsCollectorMatcher return {}; } - std::vector expressions_with_window_functions {}; + ASTs expressions_with_window_functions; }; using WindowExpressionsCollectorVisitor = InDepthNodeVisitorWithChildInfo; -std::vector getExpressionsWithWindowFunctions(ASTPtr & ast) +ASTs getExpressionsWithWindowFunctions(ASTPtr & ast) { WindowExpressionsCollectorVisitor visitor; visitor.visit(ast); + return std::move(visitor.expressions_with_window_functions); } diff --git a/src/Interpreters/GetAggregatesVisitor.h b/src/Interpreters/GetAggregatesVisitor.h index 3f5804c39a0..7bf6591af69 100644 --- a/src/Interpreters/GetAggregatesVisitor.h +++ b/src/Interpreters/GetAggregatesVisitor.h @@ -26,8 +26,8 @@ public: // Explicit empty initializers are needed to make designated initializers // work on GCC 10. std::unordered_set uniq_names {}; - std::vector aggregates {}; - std::vector window_functions {}; + ASTs aggregates; + ASTs window_functions; }; static bool needChildVisit(const ASTPtr & node, const ASTPtr & child) @@ -61,7 +61,7 @@ public: } private: - static void visit(const ASTFunction & node, const ASTPtr &, Data & data) + static void visit(const ASTFunction & node, const ASTPtr & ast, Data & data) { if (isAggregateFunction(node)) { @@ -74,7 +74,7 @@ private: return; data.uniq_names.insert(column_name); - data.aggregates.push_back(&node); + data.aggregates.push_back(ast); } else if (node.is_window_function) { @@ -87,7 +87,7 @@ private: return; data.uniq_names.insert(column_name); - data.window_functions.push_back(&node); + data.window_functions.push_back(ast); } } @@ -114,6 +114,6 @@ inline void assertNoAggregates(const ASTPtr & ast, const char * description) GetAggregatesVisitor(data).visit(ast); } -std::vector getExpressionsWithWindowFunctions(ASTPtr & ast); +ASTs getExpressionsWithWindowFunctions(ASTPtr & ast); } diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 5b633fee9b6..08378c3158b 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -115,7 +115,7 @@ public: if (alias.empty()) { auto hash = subquery_or_table_name->getTreeHash(); - external_table_name = fmt::format("_data_{}_{}", hash.first, hash.second); + external_table_name = fmt::format("_data_{}", toString(hash)); } else external_table_name = alias; diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index edf604bc0b4..89ea3a326cc 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -1,15 +1,11 @@ #include #include #include -#include #include #include #include -#include -#include -#include #include #include @@ -18,6 +14,9 @@ #include +#include + + namespace CurrentMetrics { extern const Metric TemporaryFilesForJoin; @@ -302,7 +301,8 @@ void GraceHashJoin::initBuckets() bool GraceHashJoin::isSupported(const std::shared_ptr & table_join) { bool is_asof = (table_join->strictness() == JoinStrictness::Asof); - return !is_asof && isInnerOrLeft(table_join->kind()) && table_join->oneDisjunct(); + auto kind = table_join->kind(); + return !is_asof && (isInner(kind) || isLeft(kind) || isRight(kind) || isFull(kind)) && table_join->oneDisjunct(); } GraceHashJoin::~GraceHashJoin() = default; @@ -322,7 +322,6 @@ bool GraceHashJoin::hasMemoryOverflow(size_t total_rows, size_t total_bytes) con /// One row can't be split, avoid loop if (total_rows < 2) return false; - bool has_overflow = !table_join->sizeLimits().softCheck(total_rows, total_bytes); if (has_overflow) @@ -494,17 +493,30 @@ bool GraceHashJoin::alwaysReturnsEmptySet() const return hash_join_is_empty; } -IBlocksStreamPtr GraceHashJoin::getNonJoinedBlocks(const Block &, const Block &, UInt64) const +/// Each bucket are handled by the following steps +/// 1. build hash_join by the right side blocks. +/// 2. join left side with the hash_join, +/// 3. read right non-joined blocks from hash_join. +/// buckets are handled one by one, each hash_join will not be release before the right non-joined blocks are emitted. +/// +/// There is a finished counter in JoiningTransform/DelayedJoinedBlocksWorkerTransform, +/// only one processor could take the non-joined blocks from right stream, and ensure all rows from +/// left stream have been emitted before this. +IBlocksStreamPtr +GraceHashJoin::getNonJoinedBlocks(const Block & left_sample_block_, const Block & result_sample_block_, UInt64 max_block_size_) const { - /// We do no support returning non joined blocks here. - /// TODO: They _should_ be reported by getDelayedBlocks instead - return nullptr; + return hash_join->getNonJoinedBlocks(left_sample_block_, result_sample_block_, max_block_size_); } class GraceHashJoin::DelayedBlocks : public IBlocksStream { public: - explicit DelayedBlocks(size_t current_bucket_, Buckets buckets_, InMemoryJoinPtr hash_join_, const Names & left_key_names_, const Names & right_key_names_) + explicit DelayedBlocks( + size_t current_bucket_, + Buckets buckets_, + InMemoryJoinPtr hash_join_, + const Names & left_key_names_, + const Names & right_key_names_) : current_bucket(current_bucket_) , buckets(std::move(buckets_)) , hash_join(std::move(hash_join_)) @@ -522,12 +534,15 @@ public: do { + // One DelayedBlocks is shared among multiple DelayedJoinedBlocksWorkerTransform. + // There is a lock inside left_reader.read() . block = left_reader.read(); if (!block) { return {}; } + // block comes from left_reader, need to join with right table to get the result. Blocks blocks = JoinCommon::scatterBlockByHash(left_key_names, block, num_buckets); block = std::move(blocks[current_idx]); diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index bce04ee6b04..44949440467 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -13,7 +13,6 @@ namespace DB { - class TableJoin; class HashJoin; @@ -61,6 +60,7 @@ public: ~GraceHashJoin() override; + std::string getName() const override { return "GraceHashJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } void initialize(const Block & sample_block) override; @@ -79,7 +79,7 @@ public: bool supportTotals() const override { return false; } IBlocksStreamPtr - getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override; + getNonJoinedBlocks(const Block & left_sample_block_, const Block & result_sample_block_, UInt64 max_block_size) const override; /// Open iterator over joined blocks. /// Must be called after all @joinBlock calls. diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 56dea98c1f1..9f55945816c 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -151,6 +151,7 @@ public: ~HashJoin() override; + std::string getName() const override { return "HashJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } /** Add block of data from right hand of JOIN to the map. diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h index 97b119bd795..493a5dd2126 100644 --- a/src/Interpreters/IJoin.h +++ b/src/Interpreters/IJoin.h @@ -48,6 +48,8 @@ class IJoin public: virtual ~IJoin() = default; + virtual std::string getName() const = 0; + virtual const TableJoin & getTableJoin() const = 0; /// Add block of data from right hand of JOIN. diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index e82415f1aca..7d1f9948238 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -132,6 +132,21 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot MATERIALIZE TTL as there is no TTL set for table {}", table->getStorageID().getNameForLogs()); + if (mut_command->type == MutationCommand::UPDATE || mut_command->type == MutationCommand::DELETE) + { + /// TODO: add a check for result query size. + auto rewritten_command_ast = replaceNonDeterministicToScalars(*command_ast, getContext()); + if (rewritten_command_ast) + { + auto * new_alter_command = rewritten_command_ast->as(); + mut_command = MutationCommand::parse(new_alter_command); + if (!mut_command) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Alter command '{}' is rewritten to invalid command '{}'", + queryToString(*command_ast), queryToString(*rewritten_command_ast)); + } + } + mutation_commands.emplace_back(std::move(*mut_command)); } else diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp index 714bcd6d356..3b47a002e50 100644 --- a/src/Interpreters/InterpreterCreateIndexQuery.cpp +++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp @@ -15,6 +15,8 @@ namespace DB namespace ErrorCodes { extern const int TABLE_IS_READ_ONLY; + extern const int INCORRECT_QUERY; + extern const int NOT_IMPLEMENTED; } @@ -23,6 +25,30 @@ BlockIO InterpreterCreateIndexQuery::execute() auto current_context = getContext(); const auto & create_index = query_ptr->as(); + if (create_index.unique) + { + if (!current_context->getSettingsRef().create_index_ignore_unique) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CREATE UNIQUE INDEX is not supported." + " SET create_index_ignore_unique=1 to ignore this UNIQUE keyword."); + } + + } + // Noop if allow_create_index_without_type = true. throw otherwise + if (!create_index.index_decl->as()->type) + { + if (!current_context->getSettingsRef().allow_create_index_without_type) + { + throw Exception(ErrorCodes::INCORRECT_QUERY, "CREATE INDEX without TYPE is forbidden." + " SET allow_create_index_without_type=1 to ignore this statements."); + } + else + { + // Nothing to do + return {}; + } + } + AccessRightsElements required_access; required_access.emplace_back(AccessType::ALTER_ADD_INDEX, create_index.getDatabase(), create_index.getTable()); diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index bac59998062..3b0fba5fd9f 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -1,5 +1,4 @@ #include - #include #include #include diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 745dda34828..57189012317 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -71,7 +72,6 @@ #include #include -#include #include #include @@ -764,7 +764,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti /// Table function without columns list. auto table_function_ast = create.as_table_function->ptr(); auto table_function = TableFunctionFactory::instance().get(table_function_ast, getContext()); - properties.columns = table_function->getActualTableStructure(getContext()); + properties.columns = table_function->getActualTableStructure(getContext(), /*is_insert_query*/ true); } else if (create.is_dictionary) { @@ -1329,10 +1329,32 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, } data_path = database->getTableDataPath(create); + auto full_data_path = fs::path{getContext()->getPath()} / data_path; - if (!create.attach && !data_path.empty() && fs::exists(fs::path{getContext()->getPath()} / data_path)) - throw Exception(storage_already_exists_error_code, - "Directory for {} data {} already exists", Poco::toLower(storage_name), String(data_path)); + if (!create.attach && !data_path.empty() && fs::exists(full_data_path)) + { + if (getContext()->getZooKeeperMetadataTransaction() && + !getContext()->getZooKeeperMetadataTransaction()->isInitialQuery() && + !DatabaseCatalog::instance().hasUUIDMapping(create.uuid) && + Context::getGlobalContextInstance()->isServerCompletelyStarted() && + Context::getGlobalContextInstance()->getConfigRef().getBool("allow_moving_table_directory_to_trash", false)) + { + /// This is a secondary query from a Replicated database. It cannot be retried with another UUID, we must execute it as is. + /// We don't have a table with this UUID (and all metadata is loaded), + /// so the existing directory probably contains some leftovers from previous unsuccessful attempts to create the table + + fs::path trash_path = fs::path{getContext()->getPath()} / "trash" / data_path / getHexUIntLowercase(thread_local_rng()); + LOG_WARNING(&Poco::Logger::get("InterpreterCreateQuery"), "Directory for {} data {} already exists. Will move it to {}", + Poco::toLower(storage_name), String(data_path), trash_path); + fs::create_directories(trash_path.parent_path()); + renameNoReplace(full_data_path, trash_path); + } + else + { + throw Exception(storage_already_exists_error_code, + "Directory for {} data {} already exists", Poco::toLower(storage_name), String(data_path)); + } + } bool from_path = create.attach_from_path.has_value(); String actual_data_path = data_path; diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 0b9eea86b46..26d9e5254f3 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -96,7 +96,7 @@ BlockIO InterpreterDescribeQuery::execute() else if (table_expression.table_function) { TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression.table_function, getContext()); - auto table_function_column_descriptions = table_function_ptr->getActualTableStructure(getContext()); + auto table_function_column_descriptions = table_function_ptr->getActualTableStructure(getContext(), /*is_insert_query*/ true); for (const auto & table_function_column_description : table_function_column_descriptions) columns.emplace_back(table_function_column_description); } diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index cc3444bb4df..fe49b1cfd7c 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -22,11 +22,7 @@ BlockIO InterpreterDropNamedCollectionQuery::execute() return executeDDLQueryOnCluster(query_ptr, current_context, params); } - if (query.if_exists) - NamedCollectionUtils::removeIfExistsFromSQL(query.collection_name, current_context); - else - NamedCollectionUtils::removeFromSQL(query.collection_name, current_context); - + NamedCollectionUtils::removeFromSQL(query, current_context); return {}; } diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 0beb4492aef..f746732ca9d 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -247,10 +247,10 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue DatabaseCatalog::instance().removeDependencies(table_id, check_ref_deps, check_loading_deps, is_drop_or_detach_database); database->dropTable(context_, table_id.table_name, query.sync); - /// We have to drop mmapio cache when dropping table from Ordinary database + /// We have to clear mmapio cache when dropping table from Ordinary database /// to avoid reading old data if new table with the same name is created if (database->getUUID() == UUIDHelpers::Nil) - context_->dropMMappedFileCache(); + context_->clearMMappedFileCache(); } db = database; @@ -329,13 +329,12 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, database = tryGetDatabase(database_name, query.if_exists); if (database) { - if (query.kind == ASTDropQuery::Kind::Truncate) - { - throw Exception(ErrorCodes::SYNTAX_ERROR, "Unable to truncate database"); - } - else if (query.kind == ASTDropQuery::Kind::Detach || query.kind == ASTDropQuery::Kind::Drop) + if (query.kind == ASTDropQuery::Kind::Detach || query.kind == ASTDropQuery::Kind::Drop + || query.kind == ASTDropQuery::Kind::Truncate) { bool drop = query.kind == ASTDropQuery::Kind::Drop; + bool truncate = query.kind == ASTDropQuery::Kind::Truncate; + getContext()->checkAccess(AccessType::DROP_DATABASE, database_name); if (query.kind == ASTDropQuery::Kind::Detach && query.permanently) @@ -348,6 +347,9 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, { ASTDropQuery query_for_table; query_for_table.kind = query.kind; + // For truncate operation on database, drop the tables + if (truncate) + query_for_table.kind = ASTDropQuery::Kind::Drop; query_for_table.if_exists = true; query_for_table.setDatabase(database_name); query_for_table.sync = query.sync; @@ -361,7 +363,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, std::vector> tables_to_drop; for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next()) { - iterator->table()->flush(); + iterator->table()->flushAndPrepareForShutdown(); tables_to_drop.push_back({iterator->name(), iterator->table()->isDictionary()}); } @@ -375,8 +377,8 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, uuids_to_wait.push_back(table_to_wait); } } - - if (!drop && query.sync) + // only if operation is DETACH + if ((!drop || !truncate) && query.sync) { /// Avoid "some tables are still in use" when sync mode is enabled for (const auto & table_uuid : uuids_to_wait) @@ -385,12 +387,13 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, /// Protects from concurrent CREATE TABLE queries auto db_guard = DatabaseCatalog::instance().getExclusiveDDLGuardForDatabase(database_name); - - if (!drop) + // only if operation is DETACH + if (!drop || !truncate) database->assertCanBeDetached(true); - /// DETACH or DROP database itself - DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach()); + /// DETACH or DROP database itself. If TRUNCATE skip dropping/erasing the database. + if (!truncate) + DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach()); } } @@ -432,8 +435,9 @@ AccessRightsElements InterpreterDropQuery::getRequiredAccessForDDLOnCluster() co } void InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind kind, ContextPtr global_context, ContextPtr current_context, - const StorageID & target_table_id, bool sync, bool ignore_sync_setting) + const StorageID & target_table_id, bool sync, bool ignore_sync_setting, bool need_ddl_guard) { + auto ddl_guard = (need_ddl_guard ? DatabaseCatalog::instance().getDDLGuard(target_table_id.database_name, target_table_id.table_name) : nullptr); if (DatabaseCatalog::instance().tryGetTable(target_table_id, current_context)) { /// We create and execute `drop` query for internal table. @@ -451,11 +455,11 @@ void InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind kind, ContextPtr auto drop_context = Context::createCopy(global_context); if (ignore_sync_setting) drop_context->setSetting("database_atomic_wait_for_drop_and_detach_synchronously", false); - drop_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; + drop_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); if (auto txn = current_context->getZooKeeperMetadataTransaction()) { /// For Replicated database - drop_context->getClientInfo().is_replicated_database_internal = true; + drop_context->setQueryKindReplicatedDatabaseInternal(); drop_context->setQueryContext(std::const_pointer_cast(current_context)); drop_context->initZooKeeperMetadataTransaction(txn, true); } diff --git a/src/Interpreters/InterpreterDropQuery.h b/src/Interpreters/InterpreterDropQuery.h index af7a4ddef25..7ae544a7356 100644 --- a/src/Interpreters/InterpreterDropQuery.h +++ b/src/Interpreters/InterpreterDropQuery.h @@ -25,7 +25,7 @@ public: BlockIO execute() override; static void executeDropQuery(ASTDropQuery::Kind kind, ContextPtr global_context, ContextPtr current_context, - const StorageID & target_table_id, bool sync, bool ignore_sync_setting = false); + const StorageID & target_table_id, bool sync, bool ignore_sync_setting = false, bool need_ddl_guard = false); bool supportsTransactions() const override; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 3a381cd8dab..39cc4df5c2d 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -541,13 +541,13 @@ QueryPipeline InterpreterExplainQuery::executeImpl() InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions()); interpreter.buildQueryPlan(plan); context = interpreter.getContext(); - // collect the selected marks, rows, parts during build query pipeline. - plan.buildQueryPipeline( + // Collect the selected marks, rows, parts during build query pipeline. + // Hold on to the returned QueryPipelineBuilderPtr because `plan` may have pointers into + // it (through QueryPlanResourceHolder). + auto builder = plan.buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); - if (settings.optimize) - plan.optimize(QueryPlanOptimizationSettings::fromContext(context)); plan.explainEstimate(res_columns); insert_buf = false; break; diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 9cd1f2a251c..27c94119750 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -96,6 +97,7 @@ #include #include #include +#include #include #include #include @@ -314,6 +316,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 3330159aff5..590b7fe37b8 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include @@ -296,7 +295,7 @@ BlockIO InterpreterKillQueryQuery::execute() if (res_columns[0]->empty() && access_denied) throw Exception(ErrorCodes::ACCESS_DENIED, "Not allowed to kill mutation. " - "To execute this query it's necessary to have the grant {}", required_access_rights.toString()); + "To execute this query, it's necessary to have the grant {}", required_access_rights.toString()); res_io.pipeline = QueryPipeline(Pipe(std::make_shared(header.cloneWithColumns(std::move(res_columns))))); @@ -360,7 +359,7 @@ BlockIO InterpreterKillQueryQuery::execute() if (res_columns[0]->empty() && access_denied) throw Exception(ErrorCodes::ACCESS_DENIED, "Not allowed to kill move partition. " - "To execute this query it's necessary to have the grant {}", required_access_rights.toString()); + "To execute this query, it's necessary to have the grant {}", required_access_rights.toString()); res_io.pipeline = QueryPipeline(Pipe(std::make_shared(header.cloneWithColumns(std::move(res_columns))))); diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index 75d43b541e1..ae79b3f932e 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -193,7 +193,7 @@ AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRename required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.to.getDatabase(), elem.to.getTable()); if (rename.exchange) { - required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT , elem.from.getDatabase(), elem.from.getTable()); + required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.from.getDatabase(), elem.from.getTable()); required_access.emplace_back(AccessType::SELECT | AccessType::DROP_TABLE, elem.to.getDatabase(), elem.to.getTable()); } } diff --git a/src/Interpreters/InterpreterRenameQuery.h b/src/Interpreters/InterpreterRenameQuery.h index 25c707d9962..79a6c0ddef4 100644 --- a/src/Interpreters/InterpreterRenameQuery.h +++ b/src/Interpreters/InterpreterRenameQuery.h @@ -10,7 +10,7 @@ namespace DB class AccessRightsElements; class DDLGuard; -/// To avoid deadlocks, we must acquire locks for tables in same order in any different RENAMES. +/// To avoid deadlocks, we must acquire locks for tables in same order in any different RENAMEs. struct UniqueTableName { String database_name; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 6ea15312ec4..fb04cc77955 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -299,7 +299,7 @@ void checkAccessRightsForSelect( } throw Exception( ErrorCodes::ACCESS_DENIED, - "{}: Not enough privileges. To execute this query it's necessary to have grant SELECT for at least one column on {}", + "{}: Not enough privileges. To execute this query, it's necessary to have the grant SELECT for at least one column on {}", context->getUserName(), table_id.getFullTableName()); } @@ -610,27 +610,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( { /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. ASTPtr view_table; - NameToNameMap parameter_types; if (view) { query_info.is_parameterized_view = view->isParameterizedView(); - /// We need to fetch the parameters set for SELECT ... FROM parameterized_view() before the query is replaced. - /// replaceWithSubquery replaces the function child and adds the subquery in its place. - /// the parameters are children of function child, if function (which corresponds to parametrised view and has - /// parameters in its arguments: `parametrised_view()`) is replaced the parameters are also gone from tree - /// So we need to get the parameters before they are removed from the tree - /// and after query is replaced, we use these parameters to substitute in the parameterized view query - if (query_info.is_parameterized_view) - { - query_info.parameterized_view_values = analyzeFunctionParamValues(query_ptr); - parameter_types = view->getParameterTypes(); - } view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); - if (query_info.is_parameterized_view) - { - view->replaceQueryParametersIfParametrizedView(query_ptr, query_info.parameterized_view_values); - } - } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( @@ -639,10 +622,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( options, joined_tables.tablesWithColumns(), required_result_column_names, - table_join, - query_info.is_parameterized_view, - query_info.parameterized_view_values, - parameter_types); + table_join); query_info.syntax_analyzer_result = syntax_analyzer_result; @@ -793,7 +773,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( query_info.filter_asts.push_back(parallel_replicas_custom_filter_ast); } - source_header = storage_snapshot->getSampleBlockForColumns(required_columns, query_info.parameterized_view_values); + source_header = storage_snapshot->getSampleBlockForColumns(required_columns); } /// Calculate structure of the result. @@ -2274,8 +2254,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle && !settings.allow_experimental_query_deduplication && !settings.empty_result_for_aggregation_by_empty_set && storage - && storage->getName() != "MaterializedMySQL" - && !storage->hasLightweightDeletedMask() + && storage->supportsTrivialCountOptimization() && query_info.filter_asts.empty() && query_analyzer->hasAggregation() && (query_analyzer->aggregates().size() == 1) @@ -3181,9 +3160,9 @@ void InterpreterSelectQuery::initSettings() { auto & query = getSelectQuery(); if (query.settings()) - InterpreterSetQuery(query.settings(), context).executeForCurrentContext(); + InterpreterSetQuery(query.settings(), context).executeForCurrentContext(options.ignore_setting_constraints); - auto & client_info = context->getClientInfo(); + const auto & client_info = context->getClientInfo(); auto min_major = DBMS_MIN_MAJOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD; auto min_minor = DBMS_MIN_MINOR_VERSION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD; diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index 8db1d27c073..b8cace5e0ad 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -184,7 +184,7 @@ InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer( , context(buildContext(context_, select_query_options_)) , select_query_options(select_query_options_) , query_tree(query_tree_) - , planner(query_tree_, select_query_options_) + , planner(query_tree_, select_query_options) { } diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index c25de7c55ea..2c0baa0d4b3 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -15,7 +15,7 @@ namespace DB BlockIO InterpreterSetQuery::execute() { const auto & ast = query_ptr->as(); - getContext()->checkSettingsConstraints(ast.changes); + getContext()->checkSettingsConstraints(ast.changes, SettingSource::QUERY); auto session_context = getContext()->getSessionContext(); session_context->applySettingsChanges(ast.changes); session_context->addQueryParameters(ast.query_parameters); @@ -24,10 +24,11 @@ BlockIO InterpreterSetQuery::execute() } -void InterpreterSetQuery::executeForCurrentContext() +void InterpreterSetQuery::executeForCurrentContext(bool ignore_setting_constraints) { const auto & ast = query_ptr->as(); - getContext()->checkSettingsConstraints(ast.changes); + if (!ignore_setting_constraints) + getContext()->checkSettingsConstraints(ast.changes, SettingSource::QUERY); getContext()->applySettingsChanges(ast.changes); getContext()->resetSettingsToDefaultValue(ast.default_settings); } @@ -64,6 +65,9 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta } else if (const auto * explain_query = ast->as()) { + if (explain_query->settings_ast) + InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(); + applySettingsFromQuery(explain_query->getExplainedQuery(), context_); } else if (const auto * query_with_output = dynamic_cast(ast.get())) diff --git a/src/Interpreters/InterpreterSetQuery.h b/src/Interpreters/InterpreterSetQuery.h index bcd4022f9bb..2438762f347 100644 --- a/src/Interpreters/InterpreterSetQuery.h +++ b/src/Interpreters/InterpreterSetQuery.h @@ -23,7 +23,7 @@ public: /** Set setting for current context (query context). * It is used for interpretation of SETTINGS clause in SELECT query. */ - void executeForCurrentContext(); + void executeForCurrentContext(bool ignore_setting_constraints = false); bool supportsTransactions() const override { return true; } diff --git a/src/Interpreters/InterpreterShowIndexesQuery.cpp b/src/Interpreters/InterpreterShowIndexesQuery.cpp index 51311c82eeb..149420006fb 100644 --- a/src/Interpreters/InterpreterShowIndexesQuery.cpp +++ b/src/Interpreters/InterpreterShowIndexesQuery.cpp @@ -40,20 +40,20 @@ SELECT * FROM ( (SELECT name AS table, - 0 AS non_unique, + 1 AS non_unique, 'PRIMARY' AS key_name, - NULL AS seq_in_index, - NULL AS column_name, + row_number() over (order by column_name) AS seq_in_index, + arrayJoin(splitByString(', ', primary_key)) AS column_name, 'A' AS collation, - NULL AS cardinality, + 0 AS cardinality, NULL AS sub_part, NULL AS packed, NULL AS null, - 'primary' AS index_type, - NULL AS comment, - NULL AS index_comment, + 'PRIMARY' AS index_type, + '' AS comment, + '' AS index_comment, 'YES' AS visible, - primary_key AS expression + '' AS expression FROM system.tables WHERE database = '{0}' @@ -61,18 +61,18 @@ FROM ( UNION ALL ( SELECT table AS table, - 0 AS non_unique, + 1 AS non_unique, name AS key_name, - NULL AS seq_in_index, - NULL AS column_name, + 1 AS seq_in_index, + '' AS column_name, NULL AS collation, - NULL AS cardinality, + 0 AS cardinality, NULL AS sub_part, NULL AS packed, NULL AS null, - type AS index_type, - NULL AS comment, - NULL AS index_comment, + upper(type) AS index_type, + '' AS comment, + '' AS index_comment, 'YES' AS visible, expr AS expression FROM system.data_skipping_indices @@ -80,12 +80,27 @@ FROM ( database = '{0}' AND table = '{1}')) {2} -ORDER BY index_type, expression;)", database, table, where_expression); +ORDER BY index_type, expression, column_name, seq_in_index;)", database, table, where_expression); /// Sorting is strictly speaking not necessary but 1. it is convenient for users, 2. SQL currently does not allow to /// sort the output of SHOW INDEXES otherwise (SELECT * FROM (SHOW INDEXES ...) ORDER BY ...) is rejected) and 3. some /// SQL tests can take advantage of this. + /// Note about compatibility of fields 'column_name', 'seq_in_index' and 'expression' with MySQL: + /// MySQL has non-functional and functional indexes. + /// - Non-functional indexes only reference columns, e.g. 'col1, col2'. In this case, `SHOW INDEX` produces as many result rows as there + /// are indexed columns. 'column_name' and 'seq_in_index' (an ascending integer 1, 2, ...) are filled, 'expression' is empty. + /// - Functional indexes can reference arbitrary expressions, e.g. 'col1 + 1, concat(col2, col3)'. 'SHOW INDEX' produces a single row + /// with `column_name` and `seq_in_index` empty and `expression` filled with the entire index expression. Only non-primary-key indexes + /// can be functional indexes. + /// Above SELECT tries to emulate that. Caveats: + /// 1. The primary key index sub-SELECT assumes the primary key expression is non-functional. Non-functional primary key indexes in + /// ClickHouse are possible but quiete obscure. In MySQL they are not possible at all. + /// 2. Related to 1.: Poor man's tuple parsing with splitByString() in the PK sub-SELECT messes up for functional primary key index + /// expressions where the comma is not only used as separator between tuple components, e.g. in 'col1 + 1, concat(col2, col3)'. + /// 3. The data skipping index sub-SELECT assumes the index expression is functional. 3rd party tools that expect MySQL semantics from + /// SHOW INDEX will probably not care as MySQL has no skipping indexes and they only use the result to figure out the primary key. + return rewritten_query; } diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index 2f1a4a32bee..5fe0a862e05 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -116,10 +116,10 @@ String InterpreterShowTablesQuery::getRewrittenQuery() return rewritten_query.str(); } - if (query.temporary && !query.from.empty()) + if (query.temporary && !query.getFrom().empty()) throw Exception(ErrorCodes::SYNTAX_ERROR, "The `FROM` and `TEMPORARY` cannot be used together in `SHOW TABLES`"); - String database = getContext()->resolveDatabase(query.from); + String database = getContext()->resolveDatabase(query.getFrom()); DatabaseCatalog::instance().assertDatabaseExists(database); WriteBufferFromOwnString rewritten_query; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 02cdeb0154e..9c8bc256fa2 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -88,53 +89,48 @@ namespace ErrorCodes namespace ActionLocks { - extern StorageActionBlockType PartsMerge; - extern StorageActionBlockType PartsFetch; - extern StorageActionBlockType PartsSend; - extern StorageActionBlockType ReplicationQueue; - extern StorageActionBlockType DistributedSend; - extern StorageActionBlockType PartsTTLMerge; - extern StorageActionBlockType PartsMove; + extern const StorageActionBlockType PartsMerge; + extern const StorageActionBlockType PartsFetch; + extern const StorageActionBlockType PartsSend; + extern const StorageActionBlockType ReplicationQueue; + extern const StorageActionBlockType DistributedSend; + extern const StorageActionBlockType PartsTTLMerge; + extern const StorageActionBlockType PartsMove; + extern const StorageActionBlockType PullReplicationLog; } namespace { -ExecutionStatus getOverallExecutionStatusOfCommands() +/// Sequentially tries to execute all commands and throws exception with info about failed commands +void executeCommandsAndThrowIfError(std::vector> commands) { - return ExecutionStatus(0); -} + ExecutionStatus result(0); + for (auto & command : commands) + { + try + { + command(); + } + catch (...) + { + ExecutionStatus current_result = ExecutionStatus::fromCurrentException(); -/// Consequently tries to execute all commands and generates final exception message for failed commands -template -ExecutionStatus getOverallExecutionStatusOfCommands(Callable && command, Callables && ... commands) -{ - ExecutionStatus status_head(0); - try - { - command(); - } - catch (...) - { - status_head = ExecutionStatus::fromCurrentException(); + if (result.code == 0) + result.code = current_result.code; + + if (!current_result.message.empty()) + { + if (!result.message.empty()) + result.message += '\n'; + result.message += current_result.message; + } + } } - ExecutionStatus status_tail = getOverallExecutionStatusOfCommands(std::forward(commands)...); - - auto res_status = status_head.code != 0 ? status_head.code : status_tail.code; - auto res_message = status_head.message + (status_tail.message.empty() ? "" : ("\n" + status_tail.message)); - - return ExecutionStatus(res_status, res_message); -} - -/// Consequently tries to execute all commands and throws exception with info about failed commands -template -void executeCommandsAndThrowIfError(Callables && ... commands) -{ - auto status = getOverallExecutionStatusOfCommands(std::forward(commands)...); - if (status.code != 0) - throw Exception::createDeprecated(status.message, status.code); + if (result.code != 0) + throw Exception::createDeprecated(result.message, result.code); } @@ -154,6 +150,8 @@ AccessType getRequiredAccessType(StorageActionBlockType action_type) return AccessType::SYSTEM_TTL_MERGES; else if (action_type == ActionLocks::PartsMove) return AccessType::SYSTEM_MOVES; + else if (action_type == ActionLocks::PullReplicationLog) + return AccessType::SYSTEM_PULLING_REPLICATION_LOG; else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown action type: {}", std::to_string(action_type)); } @@ -321,27 +319,27 @@ BlockIO InterpreterSystemQuery::execute() } case Type::DROP_MARK_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_MARK_CACHE); - system_context->dropMarkCache(); + system_context->clearMarkCache(); break; case Type::DROP_UNCOMPRESSED_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_UNCOMPRESSED_CACHE); - system_context->dropUncompressedCache(); + system_context->clearUncompressedCache(); break; case Type::DROP_INDEX_MARK_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_MARK_CACHE); - system_context->dropIndexMarkCache(); + system_context->clearIndexMarkCache(); break; case Type::DROP_INDEX_UNCOMPRESSED_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_UNCOMPRESSED_CACHE); - system_context->dropIndexUncompressedCache(); + system_context->clearIndexUncompressedCache(); break; case Type::DROP_MMAP_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_MMAP_CACHE); - system_context->dropMMappedFileCache(); + system_context->clearMMappedFileCache(); break; case Type::DROP_QUERY_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE); - getContext()->dropQueryCache(); + getContext()->clearQueryCache(); break; #if USE_EMBEDDED_COMPILER case Type::DROP_COMPILED_EXPRESSION_CACHE: @@ -370,7 +368,18 @@ BlockIO InterpreterSystemQuery::execute() else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; - cache->removeAllReleasable(); + if (query.key_to_drop.empty()) + { + cache->removeAllReleasable(); + } + else + { + auto key = FileCacheKey::fromKeyString(query.key_to_drop); + if (query.offset_to_drop.has_value()) + cache->removeFileSegment(key, query.offset_to_drop.value()); + else + cache->removeKey(key); + } } break; } @@ -410,10 +419,10 @@ BlockIO InterpreterSystemQuery::execute() case Type::RELOAD_DICTIONARIES: { getContext()->checkAccess(AccessType::SYSTEM_RELOAD_DICTIONARY); - executeCommandsAndThrowIfError( + executeCommandsAndThrowIfError({ [&] { system_context->getExternalDictionariesLoader().reloadAllTriedToLoad(); }, [&] { system_context->getEmbeddedDictionaries().reload(); } - ); + }); ExternalDictionariesLoader::resetAll(); break; } @@ -501,6 +510,12 @@ BlockIO InterpreterSystemQuery::execute() case Type::START_DISTRIBUTED_SENDS: startStopAction(ActionLocks::DistributedSend, true); break; + case Type::STOP_PULLING_REPLICATION_LOG: + startStopAction(ActionLocks::PullReplicationLog, false); + break; + case Type::START_PULLING_REPLICATION_LOG: + startStopAction(ActionLocks::PullReplicationLog, true); + break; case Type::DROP_REPLICA: dropReplica(query); break; @@ -536,28 +551,35 @@ BlockIO InterpreterSystemQuery::execute() case Type::FLUSH_LOGS: { getContext()->checkAccess(AccessType::SYSTEM_FLUSH_LOGS); - executeCommandsAndThrowIfError( - [&] { if (auto query_log = getContext()->getQueryLog()) query_log->flush(true); }, - [&] { if (auto part_log = getContext()->getPartLog("")) part_log->flush(true); }, - [&] { if (auto query_thread_log = getContext()->getQueryThreadLog()) query_thread_log->flush(true); }, - [&] { if (auto trace_log = getContext()->getTraceLog()) trace_log->flush(true); }, - [&] { if (auto text_log = getContext()->getTextLog()) text_log->flush(true); }, - [&] { if (auto metric_log = getContext()->getMetricLog()) metric_log->flush(true); }, - [&] { if (auto asynchronous_metric_log = getContext()->getAsynchronousMetricLog()) asynchronous_metric_log->flush(true); }, - [&] { if (auto opentelemetry_span_log = getContext()->getOpenTelemetrySpanLog()) opentelemetry_span_log->flush(true); }, - [&] { if (auto query_views_log = getContext()->getQueryViewsLog()) query_views_log->flush(true); }, - [&] { if (auto zookeeper_log = getContext()->getZooKeeperLog()) zookeeper_log->flush(true); }, - [&] { if (auto session_log = getContext()->getSessionLog()) session_log->flush(true); }, - [&] { if (auto transactions_info_log = getContext()->getTransactionsInfoLog()) transactions_info_log->flush(true); }, - [&] { if (auto processors_profile_log = getContext()->getProcessorsProfileLog()) processors_profile_log->flush(true); }, - [&] { if (auto cache_log = getContext()->getFilesystemCacheLog()) cache_log->flush(true); }, - [&] { if (auto asynchronous_insert_log = getContext()->getAsynchronousInsertLog()) asynchronous_insert_log->flush(true); } - ); + + auto logs = getContext()->getSystemLogs(); + std::vector> commands; + commands.reserve(logs.size()); + for (auto * system_log : logs) + commands.emplace_back([system_log] { system_log->flush(true); }); + + executeCommandsAndThrowIfError(commands); + break; + } + case Type::STOP_LISTEN: + getContext()->checkAccess(AccessType::SYSTEM_LISTEN); + getContext()->stopServers(query.server_type); + break; + case Type::START_LISTEN: + getContext()->checkAccess(AccessType::SYSTEM_LISTEN); + getContext()->startServers(query.server_type); + break; + case Type::FLUSH_ASYNC_INSERT_QUEUE: + { + getContext()->checkAccess(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE); + auto * queue = getContext()->getAsynchronousInsertQueue(); + if (!queue) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot flush asynchronous insert queue because it is not initialized"); + + queue->flushAll(); break; } - case Type::STOP_LISTEN_QUERIES: - case Type::START_LISTEN_QUERIES: - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not supported yet", query.type); case Type::STOP_THREAD_FUZZER: getContext()->checkAccess(AccessType::SYSTEM_THREAD_FUZZER); ThreadFuzzer::stop(); @@ -1062,6 +1084,15 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_MOVES, query.getDatabase(), query.getTable()); break; } + case Type::STOP_PULLING_REPLICATION_LOG: + case Type::START_PULLING_REPLICATION_LOG: + { + if (!query.table) + required_access.emplace_back(AccessType::SYSTEM_PULLING_REPLICATION_LOG); + else + required_access.emplace_back(AccessType::SYSTEM_PULLING_REPLICATION_LOG, query.getDatabase(), query.getTable()); + break; + } case Type::STOP_FETCHES: case Type::START_FETCHES: { @@ -1149,6 +1180,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_FLUSH_LOGS); break; } + case Type::FLUSH_ASYNC_INSERT_QUEUE: + { + required_access.emplace_back(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE); + break; + } case Type::RESTART_DISK: { required_access.emplace_back(AccessType::SYSTEM_RESTART_DISK); @@ -1164,8 +1200,12 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_SYNC_FILE_CACHE); break; } - case Type::STOP_LISTEN_QUERIES: - case Type::START_LISTEN_QUERIES: + case Type::STOP_LISTEN: + case Type::START_LISTEN: + { + required_access.emplace_back(AccessType::SYSTEM_LISTEN); + break; + } case Type::STOP_THREAD_FUZZER: case Type::START_THREAD_FUZZER: case Type::ENABLE_FAILPOINT: diff --git a/src/Interpreters/InterpreterUseQuery.cpp b/src/Interpreters/InterpreterUseQuery.cpp index d8a5ae57470..b71f3a9cc1c 100644 --- a/src/Interpreters/InterpreterUseQuery.cpp +++ b/src/Interpreters/InterpreterUseQuery.cpp @@ -10,7 +10,7 @@ namespace DB BlockIO InterpreterUseQuery::execute() { - const String & new_database = query_ptr->as().database; + const String & new_database = query_ptr->as().getDatabase(); getContext()->checkAccess(AccessType::SHOW_DATABASES, new_database); getContext()->getSessionContext()->setCurrentDatabase(new_database); return {}; diff --git a/src/Interpreters/JIT/CompileDAG.cpp b/src/Interpreters/JIT/CompileDAG.cpp index 6da17fb4c67..9c56341eae0 100644 --- a/src/Interpreters/JIT/CompileDAG.cpp +++ b/src/Interpreters/JIT/CompileDAG.cpp @@ -160,9 +160,7 @@ UInt128 CompileDAG::hash() const } } - UInt128 result; - hash.get128(result); - return result; + return hash.get128(); } } diff --git a/src/Interpreters/JIT/CompiledExpressionCache.h b/src/Interpreters/JIT/CompiledExpressionCache.h index 21f7c67226c..8354b33bae9 100644 --- a/src/Interpreters/JIT/CompiledExpressionCache.h +++ b/src/Interpreters/JIT/CompiledExpressionCache.h @@ -19,7 +19,7 @@ public: size_t getCompiledExpressionSize() const { return compiled_expression_size; } - virtual ~CompiledExpressionCacheEntry() {} + virtual ~CompiledExpressionCacheEntry() = default; private: diff --git a/src/Interpreters/JoinSwitcher.h b/src/Interpreters/JoinSwitcher.h index fb5066b2d04..1d2ebc6b456 100644 --- a/src/Interpreters/JoinSwitcher.h +++ b/src/Interpreters/JoinSwitcher.h @@ -18,6 +18,7 @@ class JoinSwitcher : public IJoin public: JoinSwitcher(std::shared_ptr table_join_, const Block & right_sample_block_); + std::string getName() const override { return "JoinSwitcher"; } const TableJoin & getTableJoin() const override { return *table_join; } /// Add block of data from right hand of JOIN into current join object. diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index ee5c288afbb..29add31fd5d 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -337,6 +337,11 @@ std::shared_ptr JoinedTables::makeTableJoin(const ASTSelectQuery & se LOG_TRACE(&Poco::Logger::get("JoinedTables"), "Can't use dictionary join: dictionary '{}' was not found", dictionary_name); return nullptr; } + if (dictionary->getSpecialKeyType() == DictionarySpecialKeyType::Range) + { + LOG_TRACE(&Poco::Logger::get("JoinedTables"), "Can't use dictionary join: dictionary '{}' is a range dictionary", dictionary_name); + return nullptr; + } auto dictionary_kv = std::dynamic_pointer_cast(dictionary); table_join->setStorageJoin(dictionary_kv); diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index ceef1371f16..6f0c8f1bff4 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -1034,7 +1034,7 @@ std::shared_ptr MergeJoin::loadRightBlock(size_t pos) const { auto load_func = [&]() -> std::shared_ptr { - TemporaryFileStreamLegacy input(flushed_right_blocks[pos]->getPath(), materializeBlock(right_sample_block)); + TemporaryFileStreamLegacy input(flushed_right_blocks[pos]->getAbsolutePath(), materializeBlock(right_sample_block)); return std::make_shared(input.block_in->read()); }; diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index 03a661c5b8a..98fae1d419f 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -22,6 +22,7 @@ class MergeJoin : public IJoin public: MergeJoin(std::shared_ptr table_join_, const Block & right_sample_block); + std::string getName() const override { return "PartialMergeJoin"; } const TableJoin & getTableJoin() const override { return *table_join; } bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp index 24f77f7d0ba..24e9e4487ae 100644 --- a/src/Interpreters/MetricLog.cpp +++ b/src/Interpreters/MetricLog.cpp @@ -113,7 +113,7 @@ void MetricLog::metricThreadFunction() elem.current_metrics[i] = CurrentMetrics::values[i]; } - this->add(elem); + this->add(std::move(elem)); /// We will record current time into table but align it to regular time intervals to avoid time drift. /// We may drop some time points if the server is overloaded and recording took too much time. diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h index c95f5209760..c1838fa105c 100644 --- a/src/Interpreters/MonotonicityCheckVisitor.h +++ b/src/Interpreters/MonotonicityCheckVisitor.h @@ -39,8 +39,8 @@ public: bool canOptimize(const ASTFunction & ast_function) const { /// if GROUP BY contains the same function ORDER BY shouldn't be optimized - auto hash = ast_function.getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + const auto hash = ast_function.getTreeHash(); + const auto key = toString(hash); if (group_by_function_hashes.count(key)) return false; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 25c52ad8925..e50f8488cac 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -113,13 +113,14 @@ QueryTreeNodePtr prepareQueryAffectedQueryTree(const std::vector & has_index_or_projection) + const StorageInMemoryMetadata::HasDependencyCallback & has_dependency) { NameSet new_updated_columns = updated_columns; ColumnDependencies dependencies; + while (!new_updated_columns.empty()) { - auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_index_or_projection); + auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_dependency); new_updated_columns.clear(); for (const auto & dependency : new_dependencies) { @@ -292,9 +293,14 @@ bool MutationsInterpreter::Source::materializeTTLRecalculateOnly() const return data && data->getSettings()->materialize_ttl_recalculate_only; } -bool MutationsInterpreter::Source::hasIndexOrProjection(const String & file_name) const +bool MutationsInterpreter::Source::hasSecondaryIndex(const String & name) const { - return part && part->checksums.has(file_name); + return part && part->hasSecondaryIndex(name); +} + +bool MutationsInterpreter::Source::hasProjection(const String & name) const +{ + return part && part->hasProjection(name); } static Names getAvailableColumnsWithVirtuals(StorageMetadataPtr metadata_snapshot, const IStorage & storage) @@ -533,13 +539,24 @@ void MutationsInterpreter::prepare(bool dry_run) validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized); } - std::function has_index_or_projection - = [&](const String & file_name) { return source.hasIndexOrProjection(file_name); }; + StorageInMemoryMetadata::HasDependencyCallback has_dependency = + [&](const String & name, ColumnDependency::Kind kind) + { + if (kind == ColumnDependency::PROJECTION) + return source.hasProjection(name); + + if (kind == ColumnDependency::SKIP_INDEX) + return source.hasSecondaryIndex(name); + + return true; + }; if (settings.recalculate_dependencies_of_updated_columns) - dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns, has_index_or_projection); + dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns, has_dependency); + bool has_alter_delete = false; std::vector read_columns; + /// First, break a sequence of commands into stages. for (auto & command : commands) { @@ -558,6 +575,7 @@ void MutationsInterpreter::prepare(bool dry_run) predicate = makeASTFunction("isZeroOrNull", predicate); stages.back().filters.push_back(predicate); + has_alter_delete = true; } else if (command.type == MutationCommand::UPDATE) { @@ -692,8 +710,7 @@ void MutationsInterpreter::prepare(bool dry_run) if (it == std::cend(indices_desc)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown index: {}", command.index_name); - if (!source.hasIndexOrProjection("skp_idx_" + it->name + ".idx") - && !source.hasIndexOrProjection("skp_idx_" + it->name + ".idx2")) + if (!source.hasSecondaryIndex(it->name)) { auto query = (*it).expression_list_ast->clone(); auto syntax_result = TreeRewriter(context).analyze(query, all_columns); @@ -707,7 +724,7 @@ void MutationsInterpreter::prepare(bool dry_run) { mutation_kind.set(MutationKind::MUTATE_INDEX_PROJECTION); const auto & projection = projections_desc.get(command.projection_name); - if (!source.hasIndexOrProjection(projection.getDirectoryName())) + if (!source.hasProjection(projection.name)) { for (const auto & column : projection.required_columns) dependencies.emplace(column, ColumnDependency::PROJECTION); @@ -731,8 +748,9 @@ void MutationsInterpreter::prepare(bool dry_run) { // just recalculate ttl_infos without remove expired data auto all_columns_vec = all_columns.getNames(); - auto new_dependencies = metadata_snapshot->getColumnDependencies( - NameSet(all_columns_vec.begin(), all_columns_vec.end()), false, has_index_or_projection); + auto all_columns_set = NameSet(all_columns_vec.begin(), all_columns_vec.end()); + auto new_dependencies = metadata_snapshot->getColumnDependencies(all_columns_set, false, has_dependency); + for (const auto & dependency : new_dependencies) { if (dependency.kind == ColumnDependency::TTL_EXPRESSION) @@ -757,8 +775,8 @@ void MutationsInterpreter::prepare(bool dry_run) } auto all_columns_vec = all_columns.getNames(); - auto all_dependencies = getAllColumnDependencies( - metadata_snapshot, NameSet(all_columns_vec.begin(), all_columns_vec.end()), has_index_or_projection); + auto all_columns_set = NameSet(all_columns_vec.begin(), all_columns_vec.end()); + auto all_dependencies = getAllColumnDependencies(metadata_snapshot, all_columns_set, has_dependency); for (const auto & dependency : all_dependencies) { @@ -767,7 +785,7 @@ void MutationsInterpreter::prepare(bool dry_run) } /// Recalc only skip indices and projections of columns which could be updated by TTL. - auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_index_or_projection); + auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns, true, has_dependency); for (const auto & dependency : new_dependencies) { if (dependency.kind == ColumnDependency::SKIP_INDEX || dependency.kind == ColumnDependency::PROJECTION) @@ -861,30 +879,44 @@ void MutationsInterpreter::prepare(bool dry_run) for (const auto & index : metadata_snapshot->getSecondaryIndices()) { - if (source.hasIndexOrProjection("skp_idx_" + index.name + ".idx") || source.hasIndexOrProjection("skp_idx_" + index.name + ".idx2")) + if (!source.hasSecondaryIndex(index.name)) + continue; + + if (has_alter_delete) { - const auto & index_cols = index.expression->getRequiredColumns(); - bool changed = std::any_of( - index_cols.begin(), - index_cols.end(), - [&](const auto & col) { return updated_columns.contains(col) || changed_columns.contains(col); }); - if (changed) - materialized_indices.insert(index.name); + materialized_indices.insert(index.name); + continue; } + + const auto & index_cols = index.expression->getRequiredColumns(); + bool changed = std::any_of( + index_cols.begin(), + index_cols.end(), + [&](const auto & col) { return updated_columns.contains(col) || changed_columns.contains(col); }); + + if (changed) + materialized_indices.insert(index.name); } for (const auto & projection : metadata_snapshot->getProjections()) { - if (source.hasIndexOrProjection(projection.getDirectoryName())) + if (!source.hasProjection(projection.name)) + continue; + + if (has_alter_delete) { - const auto & projection_cols = projection.required_columns; - bool changed = std::any_of( - projection_cols.begin(), - projection_cols.end(), - [&](const auto & col) { return updated_columns.contains(col) || changed_columns.contains(col); }); - if (changed) - materialized_projections.insert(projection.name); + materialized_projections.insert(projection.name); + continue; } + + const auto & projection_cols = projection.required_columns; + bool changed = std::any_of( + projection_cols.begin(), + projection_cols.end(), + [&](const auto & col) { return updated_columns.contains(col) || changed_columns.contains(col); }); + + if (changed) + materialized_projections.insert(projection.name); } /// Stages might be empty when we materialize skip indices or projections which don't add any diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index d783b503531..9b4caaae231 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -120,7 +120,8 @@ public: bool supportsLightweightDelete() const; bool hasLightweightDeleteMask() const; bool materializeTTLRecalculateOnly() const; - bool hasIndexOrProjection(const String & file_name) const; + bool hasSecondaryIndex(const String & name) const; + bool hasProjection(const String & name) const; void read( Stage & first_stage, diff --git a/src/Interpreters/MutationsNonDeterministicHelpers.cpp b/src/Interpreters/MutationsNonDeterministicHelpers.cpp index 46c203e15bc..119759265ef 100644 --- a/src/Interpreters/MutationsNonDeterministicHelpers.cpp +++ b/src/Interpreters/MutationsNonDeterministicHelpers.cpp @@ -63,6 +63,71 @@ public: using FirstNonDeterministicFunctionFinder = InDepthNodeVisitor; using FirstNonDeterministicFunctionData = FirstNonDeterministicFunctionMatcher::Data; +/// Executes and replaces with literals +/// non-deterministic functions in query. +/// Similar to ExecuteScalarSubqueriesVisitor. +class ExecuteNonDeterministicConstFunctionsMatcher +{ +public: + + struct Data + { + ContextPtr context; + std::optional max_literal_size; + }; + + static bool needChildVisit(const ASTPtr & ast, const ASTPtr & /*child*/) + { + /// Do not visit subqueries because they are executed separately. + return !ast->as(); + } + + static void visit(ASTPtr & ast, const Data & data) + { + if (auto * function = ast->as()) + visit(*function, ast, data); + } + + static void visit(ASTFunction & function, ASTPtr & ast, const Data & data) + { + if (!FunctionFactory::instance().has(function.name)) + return; + + /// It makes sense to execute functions which are deterministic + /// in scope of query because they are usually constant expressions. + auto builder = FunctionFactory::instance().get(function.name, data.context); + if (builder->isDeterministic() || !builder->isDeterministicInScopeOfQuery()) + return; + + Field field; + DataTypePtr type; + + try + { + std::tie(field, type) = evaluateConstantExpression(ast, data.context); + } + catch (...) + { + /// An exception can be thrown if the expression is not constant. + /// It's ok in that context and we just do nothing in that case. + /// It's bad pattern but it's quite hard to implement it in another way. + return; + } + + auto column = type->createColumn(); + column->insert(field); + + Block scalar{{std::move(column), type, "_constant"}}; + if (worthConvertingScalarToLiteral(scalar, data.max_literal_size)) + { + auto literal = std::make_unique(std::move(field)); + ast = addTypeConversionToAST(std::move(literal), type->getName()); + } + } +}; + +using ExecuteNonDeterministicConstFunctionsVisitor = InDepthNodeVisitor; + } FirstNonDeterministicFunctionResult findFirstNonDeterministicFunction(const MutationCommand & command, ContextPtr context) @@ -97,4 +162,51 @@ FirstNonDeterministicFunctionResult findFirstNonDeterministicFunction(const Muta return {}; } +ASTPtr replaceNonDeterministicToScalars(const ASTAlterCommand & alter_command, ContextPtr context) +{ + const auto & settings = context->getSettingsRef(); + if (!settings.mutations_execute_subqueries_on_initiator + && !settings.mutations_execute_nondeterministic_on_initiator) + return nullptr; + + auto query = alter_command.clone(); + auto & new_alter_command = *query->as(); + + if (settings.mutations_execute_subqueries_on_initiator) + { + Scalars scalars; + Scalars local_scalars; + + ExecuteScalarSubqueriesVisitor::Data data{ + WithContext{context}, + /*subquery_depth=*/ 0, + scalars, + local_scalars, + /*only_analyze=*/ false, + /*is_create_parameterized_view=*/ false, + /*replace_only_to_literals=*/ true, + settings.mutations_max_literal_size_to_replace}; + + ExecuteScalarSubqueriesVisitor visitor(data); + if (new_alter_command.update_assignments) + visitor.visit(new_alter_command.update_assignments); + if (new_alter_command.predicate) + visitor.visit(new_alter_command.predicate); + } + + if (settings.mutations_execute_nondeterministic_on_initiator) + { + ExecuteNonDeterministicConstFunctionsVisitor::Data data{ + context, settings.mutations_max_literal_size_to_replace}; + + ExecuteNonDeterministicConstFunctionsVisitor visitor(data); + if (new_alter_command.update_assignments) + visitor.visit(new_alter_command.update_assignments); + if (new_alter_command.predicate) + visitor.visit(new_alter_command.predicate); + } + + return query; +} + } diff --git a/src/Interpreters/MutationsNonDeterministicHelpers.h b/src/Interpreters/MutationsNonDeterministicHelpers.h index eb3a3b1c7a4..5d9aa9752f1 100644 --- a/src/Interpreters/MutationsNonDeterministicHelpers.h +++ b/src/Interpreters/MutationsNonDeterministicHelpers.h @@ -7,6 +7,7 @@ namespace DB { struct MutationCommand; +class ASTAlterCommand; struct FirstNonDeterministicFunctionResult { @@ -18,4 +19,9 @@ struct FirstNonDeterministicFunctionResult /// may also be non-deterministic in expressions of mutation command. FirstNonDeterministicFunctionResult findFirstNonDeterministicFunction(const MutationCommand & command, ContextPtr context); +/// Executes non-deterministic functions and subqueries in expressions of mutation +/// command and replaces them to the literals with a result of expressions. +/// Returns rewritten query if expressions were replaced, nullptr otherwise. +ASTPtr replaceNonDeterministicToScalars(const ASTAlterCommand & alter_command, ContextPtr context); + } diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp index 6a9251cec49..dd205ae6508 100644 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -118,35 +118,37 @@ void OptimizeDateOrDateTimeConverterWithPreimageMatcher::visit(const ASTFunction {"greaterOrEquals", "lessOrEquals"}, }; - if (!swap_relations.contains(function.name)) return; + if (!swap_relations.contains(function.name)) + return; - if (!function.arguments || function.arguments->children.size() != 2) return; + if (!function.arguments || function.arguments->children.size() != 2) + return; size_t func_id = function.arguments->children.size(); for (size_t i = 0; i < function.arguments->children.size(); i++) - { if (const auto * func = function.arguments->children[i]->as()) - { func_id = i; - } - } - if (func_id == function.arguments->children.size()) return; + if (func_id == function.arguments->children.size()) + return; size_t literal_id = 1 - func_id; const auto * literal = function.arguments->children[literal_id]->as(); - if (!literal || literal->value.getType() != Field::Types::UInt64) return; + if (!literal || literal->value.getType() != Field::Types::UInt64) + return; String comparator = literal_id > func_id ? function.name : swap_relations.at(function.name); const auto * ast_func = function.arguments->children[func_id]->as(); /// Currently we only handle single-argument functions. - if (!ast_func || !ast_func->arguments || ast_func->arguments->children.size() != 1) return; + if (!ast_func || !ast_func->arguments || ast_func->arguments->children.size() != 1) + return; const auto * column_id = ast_func->arguments->children.at(0)->as(); - if (!column_id) return; + if (!column_id) + return; auto pos = IdentifierSemantic::getMembership(*column_id); if (!pos) @@ -158,21 +160,30 @@ void OptimizeDateOrDateTimeConverterWithPreimageMatcher::visit(const ASTFunction return; auto data_type_and_name = data.tables[*pos].columns.tryGetByName(column_id->shortName()); - if (!data_type_and_name) return; + if (!data_type_and_name) + return; + + const auto column_type = data_type_and_name->type; + if (!column_type || (!isDateOrDate32(*column_type) && !isDateTime(*column_type) && !isDateTime64(*column_type))) + return; const auto & converter = FunctionFactory::instance().tryGet(ast_func->name, data.context); - if (!converter) return; + if (!converter) + return; ColumnsWithTypeAndName args; - args.emplace_back(data_type_and_name->type, "tmp"); + args.emplace_back(column_type, "tmp"); auto converter_base = converter->build(args); - if (!converter_base || !converter_base->hasInformationAboutPreimage()) return; + if (!converter_base || !converter_base->hasInformationAboutPreimage()) + return; - auto preimage_range = converter_base->getPreimage(*(data_type_and_name->type), literal->value); - if (!preimage_range) return; + auto preimage_range = converter_base->getPreimage(*column_type, literal->value); + if (!preimage_range) + return; const auto new_ast = generateOptimizedDateFilterAST(comparator, *data_type_and_name, *preimage_range); - if (!new_ast) return; + if (!new_ast) + return; ast = new_ast; } diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index 881fcae4de6..a97f1f405bc 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -242,7 +242,7 @@ bool PartLog::addNewParts( elem.profile_counters = part_log_entry.profile_counters; - part_log->add(elem); + part_log->add(std::move(elem)); } } catch (...) diff --git a/src/Interpreters/PredicateExpressionsOptimizer.cpp b/src/Interpreters/PredicateExpressionsOptimizer.cpp index 6606e64f689..e64ff34b11f 100644 --- a/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -118,7 +118,10 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTables(ASTs & tables_e if (table_element->table_join && isLeft(table_element->table_join->as()->kind)) continue; /// Skip right table optimization - if (table_element->table_join && isFull(table_element->table_join->as()->kind)) + if (table_element->table_join && ( + isFull(table_element->table_join->as()->kind) + || table_element->table_join->as()->strictness == JoinStrictness::Asof + || table_element->table_join->as()->strictness == JoinStrictness::Anti)) break; /// Skip left and right table optimization is_rewrite_tables |= tryRewritePredicatesToTable(tables_element[table_pos], tables_predicates[table_pos], diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp index 67822ecf440..c2c01b912f9 100644 --- a/src/Interpreters/PreparedSets.cpp +++ b/src/Interpreters/PreparedSets.cpp @@ -207,7 +207,7 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context) String PreparedSets::toString(const PreparedSets::Hash & key, const DataTypes & types) { WriteBufferFromOwnString buf; - buf << "__set_" << key.first << "_" << key.second; + buf << "__set_" << DB::toString(key); if (!types.empty()) { buf << "("; diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index cb240f5260a..b953b8470e1 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -127,10 +127,10 @@ class PreparedSets { public: - using Hash = std::pair; + using Hash = CityHash_v1_0_2::uint128; struct Hashing { - UInt64 operator()(const Hash & key) const { return key.first ^ key.second; } + UInt64 operator()(const Hash & key) const { return key.low64 ^ key.high64; } }; using SetsFromTuple = std::unordered_map>, Hashing>; diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index daa8d434ab6..c299572ef41 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -37,8 +37,8 @@ static bool isUnlimitedQuery(const IAST * ast) if (!ast) return false; - /// It is KILL QUERY - if (ast->as()) + /// It is KILL QUERY or an async insert flush query + if (ast->as() || ast->getQueryKind() == IAST::QueryKind::AsyncInsertFlush) return true; /// It is SELECT FROM system.processes @@ -223,7 +223,10 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q { /// Set up memory profiling thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step); + thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + thread_group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); + thread_group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); thread_group->performance_counters.setTraceProfileEvents(settings.trace_profile_events); } diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index e5a61497ff2..2eea49e1267 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -393,7 +393,7 @@ public: /** Register running query. Returns refcounted object, that will remove element from list in destructor. * If too many running queries - wait for not more than specified (see settings) amount of time. * If timeout is passed - throw an exception. - * Don't count KILL QUERY queries. + * Don't count KILL QUERY queries or async insert flush queries */ EntryPtr insert(const String & query_, const IAST * ast, ContextMutablePtr query_context, UInt64 watch_start_nanoseconds); diff --git a/src/Interpreters/ProcessorsProfileLog.cpp b/src/Interpreters/ProcessorsProfileLog.cpp index e78a07bb752..14159ad3438 100644 --- a/src/Interpreters/ProcessorsProfileLog.cpp +++ b/src/Interpreters/ProcessorsProfileLog.cpp @@ -73,12 +73,5 @@ void ProcessorProfileLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(output_bytes); } -ProcessorsProfileLog::ProcessorsProfileLog(ContextPtr context_, const String & database_name_, - const String & table_name_, const String & storage_def_, - size_t flush_interval_milliseconds_) - : SystemLog(context_, database_name_, table_name_, - storage_def_, flush_interval_milliseconds_) -{ -} } diff --git a/src/Interpreters/ProcessorsProfileLog.h b/src/Interpreters/ProcessorsProfileLog.h index 81d58edd913..63791c0374c 100644 --- a/src/Interpreters/ProcessorsProfileLog.h +++ b/src/Interpreters/ProcessorsProfileLog.h @@ -45,12 +45,7 @@ struct ProcessorProfileLogElement class ProcessorsProfileLog : public SystemLog { public: - ProcessorsProfileLog( - ContextPtr context_, - const String & database_name_, - const String & table_name_, - const String & storage_def_, - size_t flush_interval_milliseconds_); + using SystemLog::SystemLog; }; } diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index bf8d060bd3c..bd421ae8e33 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -86,9 +86,16 @@ static void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::Mutabl columns[i++]->insert(static_cast(snapshot.current_time)); columns[i++]->insert(static_cast(snapshot.thread_id)); columns[i++]->insert(Type::GAUGE); - columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); - columns[i++]->insert(snapshot.memory_usage); + columns[i]->insert(snapshot.memory_usage); + + i = 0; + columns[i++]->insertData(host_name.data(), host_name.size()); + columns[i++]->insert(static_cast(snapshot.current_time)); + columns[i++]->insert(static_cast(snapshot.thread_id)); + columns[i++]->insert(Type::GAUGE); + columns[i++]->insertData(MemoryTracker::PEAK_USAGE_EVENT_NAME, strlen(MemoryTracker::PEAK_USAGE_EVENT_NAME)); + columns[i]->insert(snapshot.peak_memory_usage); } void getProfileEvents( @@ -121,6 +128,7 @@ void getProfileEvents( group_snapshot.thread_id = 0; group_snapshot.current_time = time(nullptr); group_snapshot.memory_usage = thread_group->memory_tracker.get(); + group_snapshot.peak_memory_usage = thread_group->memory_tracker.getPeak(); auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot(); auto prev_group_snapshot = last_sent_snapshots.find(0); group_snapshot.counters = diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h index 7d9fc512d15..cc338530510 100644 --- a/src/Interpreters/ProfileEventsExt.h +++ b/src/Interpreters/ProfileEventsExt.h @@ -16,6 +16,7 @@ struct ProfileEventsSnapshot UInt64 thread_id; CountersIncrement counters; Int64 memory_usage; + Int64 peak_memory_usage; time_t current_time; }; diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index ec0315c2f95..df21e82305a 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -41,6 +41,15 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"ExceptionWhileProcessing", static_cast(EXCEPTION_WHILE_PROCESSING)} }); + auto query_cache_usage_datatype = std::make_shared( + DataTypeEnum8::Values + { + {"Unknown", static_cast(QueryCache::Usage::Unknown)}, + {"None", static_cast(QueryCache::Usage::None)}, + {"Write", static_cast(QueryCache::Usage::Write)}, + {"Read", static_cast(QueryCache::Usage::Read)} + }); + auto low_cardinality_string = std::make_shared(std::make_shared()); auto array_low_cardinality_string = std::make_shared(low_cardinality_string); @@ -126,6 +135,8 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"transaction_id", getTransactionIDDataType()}, + {"query_cache_usage", std::move(query_cache_usage_datatype)}, + {"asynchronous_read_counters", std::make_shared(low_cardinality_string, std::make_shared())}, }; } @@ -277,6 +288,8 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(Tuple{tid.start_csn, tid.local_tid, tid.host_id}); + columns[i++]->insert(query_cache_usage); + if (async_read_counters) async_read_counters->dumpToMapColumn(columns[i++].get()); else diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index 570d1297239..5bc80280eac 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -4,8 +4,9 @@ #include #include #include -#include +#include #include +#include #include #include #include @@ -96,6 +97,8 @@ struct QueryLogElement TransactionID tid; + QueryCache::Usage query_cache_usage = QueryCache::Usage::Unknown; + static std::string name() { return "QueryLog"; } static NamesAndTypesList getNamesAndTypes(); diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/src/Interpreters/ReplaceQueryParameterVisitor.cpp index e4a052388ac..46dcc6129bc 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -66,10 +66,11 @@ void ReplaceQueryParameterVisitor::visitChildren(ASTPtr & ast) const String & ReplaceQueryParameterVisitor::getParamValue(const String & name) { auto search = query_parameters.find(name); - if (search != query_parameters.end()) - return search->second; - else + if (search == query_parameters.end()) throw Exception(ErrorCodes::UNKNOWN_QUERY_PARAMETER, "Substitution {} is not set", backQuote(name)); + + ++num_replaced_parameters; + return search->second; } void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) @@ -131,6 +132,7 @@ void ReplaceQueryParameterVisitor::visitIdentifier(ASTPtr & ast) if (ast_identifier->children.empty()) return; + bool replaced_parameter = false; auto & name_parts = ast_identifier->name_parts; for (size_t i = 0, j = 0, size = name_parts.size(); i < size; ++i) { @@ -138,9 +140,14 @@ void ReplaceQueryParameterVisitor::visitIdentifier(ASTPtr & ast) { const auto & ast_param = ast_identifier->children[j++]->as(); name_parts[i] = getParamValue(ast_param.name); + replaced_parameter = true; } } + /// Do not touch AST if there are no parameters + if (!replaced_parameter) + return; + /// FIXME: what should this mean? if (!ast_identifier->semantic->special && name_parts.size() >= 2) ast_identifier->semantic->table = ast_identifier->name_parts.end()[-2]; diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.h b/src/Interpreters/ReplaceQueryParameterVisitor.h index dd785cd768e..7d5da7ea85b 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.h +++ b/src/Interpreters/ReplaceQueryParameterVisitor.h @@ -20,8 +20,12 @@ public: void visit(ASTPtr & ast); + size_t getNumberOfReplacedParameters() const { return num_replaced_parameters; } + private: const NameToNameMap & query_parameters; + size_t num_replaced_parameters = 0; + const String & getParamValue(const String & name); void visitIdentifier(ASTPtr & ast); void visitQueryParameter(ASTPtr & ast); diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index e6895ed243b..c91329c869c 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -51,6 +51,8 @@ struct SelectQueryOptions bool settings_limit_offset_done = false; bool is_explain = false; /// The value is true if it's explain statement. bool is_create_parameterized_view = false; + /// Bypass setting constraints for some internal queries such as projection ASTs. + bool ignore_setting_constraints = false; /// These two fields are used to evaluate shardNum() and shardCount() function when /// prefer_localhost_replica == 1 and local instance is selected. They are needed because local @@ -141,6 +143,12 @@ struct SelectQueryOptions return *this; } + SelectQueryOptions & ignoreSettingConstraints(bool value = true) + { + ignore_setting_constraints = value; + return *this; + } + SelectQueryOptions & setInternal(bool value = false) { is_internal = value; diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 0fbcfc9e6a1..1b78ff4d2b6 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -24,6 +24,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int INVALID_SETTING_VALUE; +} + namespace { @@ -52,7 +57,11 @@ ServerAsynchronousMetrics::ServerAsynchronousMetrics( : AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_) , WithContext(global_context_) , heavy_metric_update_period(heavy_metrics_update_period_seconds) -{} +{ + /// sanity check + if (update_period_seconds == 0 || heavy_metrics_update_period_seconds == 0) + throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting asynchronous_metrics_update_period_s and asynchronous_heavy_metrics_update_period_s must not be zero"); +} void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time) { @@ -92,6 +101,12 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values " The files opened with `mmap` are kept in the cache to avoid costly TLB flushes."}; } + if (auto query_cache = getContext()->getQueryCache()) + { + new_values["QueryCacheBytes"] = { query_cache->weight(), "Total size of the query cache in bytes." }; + new_values["QueryCacheEntries"] = { query_cache->count(), "Total number of entries in the query cache." }; + } + { auto caches = FileCacheFactory::instance().getAll(); size_t total_bytes = 0; diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 64f7b4fc934..e0b5db44593 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -3,11 +3,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -200,7 +202,6 @@ private: LOG_TEST(log, "Schedule closing session with session_id: {}, user_id: {}", session.key.second, session.key.first); - } void cleanThread() @@ -240,7 +241,7 @@ private: if (session != sessions.end() && session->second->close_cycle <= current_cycle) { - if (!session->second.unique()) + if (session->second.use_count() != 1) { LOG_TEST(log, "Delay closing session with session_id: {}, user_id: {}", key.second, key.first); @@ -298,8 +299,12 @@ Session::~Session() if (notified_session_log_about_login) { + LOG_DEBUG(log, "{} Logout, user_id: {}", toString(auth_id), toString(*user_id)); if (auto session_log = getSessionLog()) + { + /// TODO: We have to ensure that the same info is added to the session log on a LoginSuccess event and on the corresponding Logout event. session_log->addLogOut(auth_id, user, getClientInfo()); + } } } @@ -316,6 +321,7 @@ AuthenticationType Session::getAuthenticationTypeOrLogInFailure(const String & u } catch (const Exception & e) { + LOG_ERROR(log, "{} Authentication failed with error: {}", toString(auth_id), e.what()); if (auto session_log = getSessionLog()) session_log->addLoginFailure(auth_id, getClientInfo(), user_name, e); @@ -333,6 +339,9 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So if (session_context) throw Exception(ErrorCodes::LOGICAL_ERROR, "If there is a session context it must be created after authentication"); + if (session_tracker_handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session tracker handle was created before authentication finish"); + auto address = address_; if ((address == Poco::Net::SocketAddress{}) && (prepared_client_info->interface == ClientInfo::Interface::LOCAL)) address = Poco::Net::SocketAddress{"127.0.0.1", 0}; @@ -368,17 +377,117 @@ void Session::onAuthenticationFailure(const std::optional & user_name, c } } -ClientInfo & Session::getClientInfo() -{ - /// FIXME it may produce different info for LoginSuccess and the corresponding Logout entries in the session log - return session_context ? session_context->getClientInfo() : *prepared_client_info; -} - const ClientInfo & Session::getClientInfo() const { return session_context ? session_context->getClientInfo() : *prepared_client_info; } +void Session::setClientInfo(const ClientInfo & client_info) +{ + if (session_context) + session_context->setClientInfo(client_info); + else + prepared_client_info = client_info; +} + +void Session::setClientName(const String & client_name) +{ + if (session_context) + session_context->setClientName(client_name); + else + prepared_client_info->client_name = client_name; +} + +void Session::setClientInterface(ClientInfo::Interface interface) +{ + if (session_context) + session_context->setClientInterface(interface); + else + prepared_client_info->interface = interface; +} + +void Session::setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version) +{ + if (session_context) + { + session_context->setClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); + } + else + { + prepared_client_info->client_version_major = client_version_major; + prepared_client_info->client_version_minor = client_version_minor; + prepared_client_info->client_version_patch = client_version_patch; + prepared_client_info->client_tcp_protocol_version = client_tcp_protocol_version; + } +} + +void Session::setClientConnectionId(uint32_t connection_id) +{ + if (session_context) + session_context->setClientConnectionId(connection_id); + else + prepared_client_info->connection_id = connection_id; +} + +void Session::setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer) +{ + if (session_context) + { + session_context->setHttpClientInfo(http_method, http_user_agent, http_referer); + } + else + { + prepared_client_info->http_method = http_method; + prepared_client_info->http_user_agent = http_user_agent; + prepared_client_info->http_referer = http_referer; + } +} + +void Session::setForwardedFor(const String & forwarded_for) +{ + if (session_context) + session_context->setForwardedFor(forwarded_for); + else + prepared_client_info->forwarded_for = forwarded_for; +} + +void Session::setQuotaClientKey(const String & quota_key) +{ + if (session_context) + session_context->setQuotaClientKey(quota_key); + else + prepared_client_info->quota_key = quota_key; +} + +void Session::setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version) +{ + if (session_context) + { + session_context->setConnectionClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); + } + else + { + prepared_client_info->connection_client_version_major = client_version_major; + prepared_client_info->connection_client_version_minor = client_version_minor; + prepared_client_info->connection_client_version_patch = client_version_patch; + prepared_client_info->connection_tcp_protocol_version = client_tcp_protocol_version; + } +} + +const OpenTelemetry::TracingContext & Session::getClientTraceContext() const +{ + if (session_context) + return session_context->getClientTraceContext(); + return prepared_client_info->client_trace_context; +} + +OpenTelemetry::TracingContext & Session::getClientTraceContext() +{ + if (session_context) + return session_context->getClientTraceContext(); + return prepared_client_info->client_trace_context; +} + ContextMutablePtr Session::makeSessionContext() { if (session_context) @@ -387,6 +496,8 @@ ContextMutablePtr Session::makeSessionContext() throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created before any query context"); if (!user_id) throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); + if (session_tracker_handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session tracker handle was created before making session"); LOG_DEBUG(log, "{} Creating session context with user_id: {}", toString(auth_id), toString(*user_id)); @@ -396,18 +507,23 @@ ContextMutablePtr Session::makeSessionContext() new_session_context->makeSessionContext(); /// Copy prepared client info to the new session context. - auto & res_client_info = new_session_context->getClientInfo(); - res_client_info = std::move(prepared_client_info).value(); + new_session_context->setClientInfo(*prepared_client_info); prepared_client_info.reset(); /// Set user information for the new context: current profiles, roles, access rights. - if (user_id) - new_session_context->setUser(*user_id); + new_session_context->setUser(*user_id); /// Session context is ready. session_context = new_session_context; user = session_context->getUser(); + session_tracker_handle = session_context->getSessionTracker().trackSession( + *user_id, + {}, + session_context->getSettingsRef().max_sessions_for_user); + + recordLoginSucess(session_context); + return session_context; } @@ -419,6 +535,8 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created before any query context"); if (!user_id) throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); + if (session_tracker_handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session tracker handle was created before making session"); LOG_DEBUG(log, "{} Creating named session context with name: {}, user_id: {}", toString(auth_id), session_name_, toString(*user_id)); @@ -436,13 +554,26 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: /// Copy prepared client info to the session context, no matter it's been just created or not. /// If we continue using a previously created session context found by session ID /// it's necessary to replace the client info in it anyway, because it contains actual connection information (client address, etc.) - auto & res_client_info = new_session_context->getClientInfo(); - res_client_info = std::move(prepared_client_info).value(); + new_session_context->setClientInfo(*prepared_client_info); prepared_client_info.reset(); + auto access = new_session_context->getAccess(); + UInt64 max_sessions_for_user = 0; /// Set user information for the new context: current profiles, roles, access rights. - if (user_id && !new_session_context->getAccess()->tryGetUser()) + if (!access->tryGetUser()) + { new_session_context->setUser(*user_id); + max_sessions_for_user = new_session_context->getSettingsRef().max_sessions_for_user; + } + else + { + // Always get setting from profile + // profile can be changed by ALTER PROFILE during single session + auto settings = access->getDefaultSettings(); + const Field * max_session_for_user_field = settings.tryGet("max_sessions_for_user"); + if (max_session_for_user_field) + max_sessions_for_user = max_session_for_user_field->safeGet(); + } /// Session context is ready. session_context = std::move(new_session_context); @@ -450,6 +581,13 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: named_session_created = new_named_session_created; user = session_context->getUser(); + session_tracker_handle = session_context->getSessionTracker().trackSession( + *user_id, + { session_name_ }, + max_sessions_for_user); + + recordLoginSucess(session_context); + return session_context; } @@ -484,7 +622,7 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t if (auto query_context_user = query_context->getAccess()->tryGetUser()) { - LOG_DEBUG(log, "{} Creating query context from {} context, user_id: {}, parent context user: {}", + LOG_TRACE(log, "{} Creating query context from {} context, user_id: {}, parent context user: {}", toString(auth_id), from_session_context ? "session" : "global", toString(*user_id), @@ -492,32 +630,28 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t } /// Copy the specified client info to the new query context. - auto & res_client_info = query_context->getClientInfo(); if (client_info_to_move) - res_client_info = std::move(*client_info_to_move); + query_context->setClientInfo(*client_info_to_move); else if (client_info_to_copy && (client_info_to_copy != &getClientInfo())) - res_client_info = *client_info_to_copy; + query_context->setClientInfo(*client_info_to_copy); /// Copy current user's name and address if it was authenticated after query_client_info was initialized. if (prepared_client_info && !prepared_client_info->current_user.empty()) { - res_client_info.current_user = prepared_client_info->current_user; - res_client_info.current_address = prepared_client_info->current_address; + query_context->setCurrentUserName(prepared_client_info->current_user); + query_context->setCurrentAddress(prepared_client_info->current_address); } /// Set parameters of initial query. - if (res_client_info.query_kind == ClientInfo::QueryKind::NO_QUERY) - res_client_info.query_kind = ClientInfo::QueryKind::INITIAL_QUERY; + if (query_context->getClientInfo().query_kind == ClientInfo::QueryKind::NO_QUERY) + query_context->setQueryKind(ClientInfo::QueryKind::INITIAL_QUERY); - if (res_client_info.query_kind == ClientInfo::QueryKind::INITIAL_QUERY) + if (query_context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) { - res_client_info.initial_user = res_client_info.current_user; - res_client_info.initial_address = res_client_info.current_address; + query_context->setInitialUserName(query_context->getClientInfo().current_user); + query_context->setInitialAddress(query_context->getClientInfo().current_address); } - /// Sets that row policies of the initial user should be used too. - query_context->enableRowPoliciesOfInitialUser(); - /// Set user information for the new context: current profiles, roles, access rights. if (user_id && !query_context->getAccess()->tryGetUser()) query_context->setUser(*user_id); @@ -527,24 +661,38 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t if (user_id) user = query_context->getUser(); - if (!notified_session_log_about_login) - { - if (auto session_log = getSessionLog()) - { - session_log->addLoginSuccess( - auth_id, - named_session ? std::optional(named_session->key.second) : std::nullopt, - *query_context, - user); - - notified_session_log_about_login = true; - } - } + /// Interserver does not create session context + recordLoginSucess(query_context); return query_context; } +void Session::recordLoginSucess(ContextPtr login_context) const +{ + if (notified_session_log_about_login) + return; + + if (!login_context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session or query context must be created"); + + if (auto session_log = getSessionLog()) + { + const auto & settings = login_context->getSettingsRef(); + const auto access = login_context->getAccess(); + + session_log->addLoginSuccess(auth_id, + named_session ? named_session->key.second : "", + settings, + access, + getClientInfo(), + user); + } + + notified_session_log_about_login = true; +} + + void Session::releaseSessionID() { if (!named_session) @@ -566,4 +714,3 @@ void Session::closeSession(const String & session_id) } } - diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index d7c06a60464..81ef987b428 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -54,10 +55,23 @@ public: /// Writes a row about login failure into session log (if enabled) void onAuthenticationFailure(const std::optional & user_name, const Poco::Net::SocketAddress & address_, const Exception & e); - /// Returns a reference to session ClientInfo. - ClientInfo & getClientInfo(); + /// Returns a reference to the session's ClientInfo. const ClientInfo & getClientInfo() const; + /// Modify the session's ClientInfo. + void setClientInfo(const ClientInfo & client_info); + void setClientName(const String & client_name); + void setClientInterface(ClientInfo::Interface interface); + void setClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version); + void setClientConnectionId(uint32_t connection_id); + void setHttpClientInfo(ClientInfo::HTTPMethod http_method, const String & http_user_agent, const String & http_referer); + void setForwardedFor(const String & forwarded_for); + void setQuotaClientKey(const String & quota_key); + void setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version); + + const OpenTelemetry::TracingContext & getClientTraceContext() const; + OpenTelemetry::TracingContext & getClientTraceContext(); + /// Makes a session context, can be used one or zero times. /// The function also assigns an user to this context. ContextMutablePtr makeSessionContext(); @@ -83,6 +97,8 @@ public: private: std::shared_ptr getSessionLog() const; ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const; + void recordLoginSucess(ContextPtr login_context) const; + mutable bool notified_session_log_about_login = false; const UUID auth_id; @@ -100,6 +116,8 @@ private: std::shared_ptr named_session; bool named_session_created = false; + SessionTracker::SessionTrackerHandle session_tracker_handle; + Poco::Logger * log = nullptr; }; diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index c930013e52b..b847eaf9824 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -199,12 +199,13 @@ void SessionLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(auth_failure_reason.data(), auth_failure_reason.length()); } -void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional session_id, const Context & login_context, const UserPtr & login_user) +void SessionLog::addLoginSuccess(const UUID & auth_id, + const String & session_id, + const Settings & settings, + const ContextAccessPtr & access, + const ClientInfo & client_info, + const UserPtr & login_user) { - const auto access = login_context.getAccess(); - const auto & settings = login_context.getSettingsRef(); - const auto & client_info = login_context.getClientInfo(); - DB::SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS); log_entry.client_info = client_info; @@ -215,8 +216,7 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional ses } log_entry.external_auth_server = login_user ? login_user->auth_data.getLDAPServerName() : ""; - if (session_id) - log_entry.session_id = *session_id; + log_entry.session_id = session_id; if (const auto roles_info = access->getRolesInfo()) log_entry.roles = roles_info->getCurrentRolesNames(); @@ -227,7 +227,7 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional ses for (const auto & s : settings.allChanged()) log_entry.settings.emplace_back(s.getName(), s.getValueString()); - add(log_entry); + add(std::move(log_entry)); } void SessionLog::addLoginFailure( @@ -243,7 +243,7 @@ void SessionLog::addLoginFailure( log_entry.client_info = info; log_entry.user_identified_with = AuthenticationType::NO_PASSWORD; - add(log_entry); + add(std::move(log_entry)); } void SessionLog::addLogOut(const UUID & auth_id, const UserPtr & login_user, const ClientInfo & client_info) @@ -257,7 +257,7 @@ void SessionLog::addLogOut(const UUID & auth_id, const UserPtr & login_user, con log_entry.external_auth_server = login_user ? login_user->auth_data.getLDAPServerName() : ""; log_entry.client_info = client_info; - add(log_entry); + add(std::move(log_entry)); } } diff --git a/src/Interpreters/SessionLog.h b/src/Interpreters/SessionLog.h index 1282ac09c4d..8757bc12270 100644 --- a/src/Interpreters/SessionLog.h +++ b/src/Interpreters/SessionLog.h @@ -20,6 +20,7 @@ enum SessionLogElementType : int8_t class ContextAccess; struct User; using UserPtr = std::shared_ptr; +using ContextAccessPtr = std::shared_ptr; /** A struct which will be inserted as row into session_log table. * @@ -72,7 +73,13 @@ class SessionLog : public SystemLog using SystemLog::SystemLog; public: - void addLoginSuccess(const UUID & auth_id, std::optional session_id, const Context & login_context, const UserPtr & login_user); + void addLoginSuccess(const UUID & auth_id, + const String & session_id, + const Settings & settings, + const ContextAccessPtr & access, + const ClientInfo & client_info, + const UserPtr & login_user); + void addLoginFailure(const UUID & auth_id, const ClientInfo & info, const std::optional & user, const Exception & reason); void addLogOut(const UUID & auth_id, const UserPtr & login_user, const ClientInfo & client_info); }; diff --git a/src/Interpreters/SessionTracker.cpp b/src/Interpreters/SessionTracker.cpp new file mode 100644 index 00000000000..4636766e288 --- /dev/null +++ b/src/Interpreters/SessionTracker.cpp @@ -0,0 +1,62 @@ +#include "SessionTracker.h" + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int USER_SESSION_LIMIT_EXCEEDED; +} + +SessionTracker::Session::Session(SessionTracker & tracker_, + const UUID& user_id_, + SessionInfos::const_iterator session_info_iter_) noexcept + : tracker(tracker_), user_id(user_id_), session_info_iter(session_info_iter_) +{ +} + +SessionTracker::Session::~Session() +{ + tracker.stopTracking(user_id, session_info_iter); +} + +SessionTracker::SessionTrackerHandle +SessionTracker::trackSession(const UUID & user_id, + const SessionInfo & session_info, + size_t max_sessions_for_user) +{ + std::lock_guard lock(mutex); + + auto sessions_for_user_iter = sessions_for_user.find(user_id); + if (sessions_for_user_iter == sessions_for_user.end()) + sessions_for_user_iter = sessions_for_user.emplace(user_id, SessionInfos()).first; + + SessionInfos & session_infos = sessions_for_user_iter->second; + if (max_sessions_for_user && session_infos.size() >= max_sessions_for_user) + { + throw Exception(ErrorCodes::USER_SESSION_LIMIT_EXCEEDED, + "User {} has overflown session count {}", + toString(user_id), + max_sessions_for_user); + } + + session_infos.emplace_front(session_info); + + return std::make_unique(*this, user_id, session_infos.begin()); +} + +void SessionTracker::stopTracking(const UUID& user_id, SessionInfos::const_iterator session_info_iter) +{ + std::lock_guard lock(mutex); + + auto sessions_for_user_iter = sessions_for_user.find(user_id); + chassert(sessions_for_user_iter != sessions_for_user.end()); + + sessions_for_user_iter->second.erase(session_info_iter); + if (sessions_for_user_iter->second.empty()) + sessions_for_user.erase(sessions_for_user_iter); +} + +} diff --git a/src/Interpreters/SessionTracker.h b/src/Interpreters/SessionTracker.h new file mode 100644 index 00000000000..0827213aeed --- /dev/null +++ b/src/Interpreters/SessionTracker.h @@ -0,0 +1,60 @@ +#pragma once + +#include "ClientInfo.h" + +#include +#include +#include +#include + +namespace DB +{ + +struct SessionInfo +{ + const String session_id; +}; + +using SessionInfos = std::list; + +using SessionsForUser = std::unordered_map; + +class SessionTracker; + +class SessionTracker +{ +public: + class Session : boost::noncopyable + { + public: + explicit Session(SessionTracker & tracker_, + const UUID & user_id_, + SessionInfos::const_iterator session_info_iter_) noexcept; + + ~Session(); + + private: + friend class SessionTracker; + + SessionTracker & tracker; + const UUID user_id; + const SessionInfos::const_iterator session_info_iter; + }; + + using SessionTrackerHandle = std::unique_ptr; + + SessionTrackerHandle trackSession(const UUID & user_id, + const SessionInfo & session_info, + size_t max_sessions_for_user); + +private: + /// disallow manual messing with session tracking + friend class Session; + + std::mutex mutex; + SessionsForUser sessions_for_user TSA_GUARDED_BY(mutex); + + void stopTracking(const UUID& user_id, SessionInfos::const_iterator session_info_iter); +}; + +} diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp index e09a66a38e6..c9f48ee1be9 100644 --- a/src/Interpreters/SortedBlocksWriter.cpp +++ b/src/Interpreters/SortedBlocksWriter.cpp @@ -39,7 +39,7 @@ namespace TemporaryFileOnDiskHolder flushToFile(const DiskPtr & disk, const Block & header, QueryPipelineBuilder pipeline, const String & codec) { auto tmp_file = std::make_unique(disk, CurrentMetrics::TemporaryFilesForJoin); - auto write_stat = TemporaryFileStreamLegacy::write(tmp_file->getPath(), header, std::move(pipeline), codec); + auto write_stat = TemporaryFileStreamLegacy::write(tmp_file->getAbsolutePath(), header, std::move(pipeline), codec); ProfileEvents::increment(ProfileEvents::ExternalProcessingCompressedBytesTotal, write_stat.compressed_bytes); ProfileEvents::increment(ProfileEvents::ExternalProcessingUncompressedBytesTotal, write_stat.uncompressed_bytes); @@ -267,7 +267,7 @@ SortedBlocksWriter::SortedFiles SortedBlocksWriter::finishMerge(std::function(file->getPath(), materializeBlock(sample_block))); + return Pipe(std::make_shared(file->getAbsolutePath(), materializeBlock(sample_block))); } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 3fd0297f5b8..23a5a96584c 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -48,6 +48,11 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } +namespace ActionLocks +{ + extern const StorageActionBlockType PartsMerge; +} + namespace { class StorageWithComment : public IAST @@ -101,7 +106,6 @@ namespace namespace { -constexpr size_t DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS = 7500; constexpr size_t DEFAULT_METRIC_LOG_COLLECT_INTERVAL_MILLISECONDS = 1000; /// Creates a system log with MergeTree engine using parameters from config @@ -124,18 +128,24 @@ std::shared_ptr createSystemLog( LOG_DEBUG(&Poco::Logger::get("SystemLog"), "Creating {}.{} from {}", default_database_name, default_table_name, config_prefix); - String database = config.getString(config_prefix + ".database", default_database_name); - String table = config.getString(config_prefix + ".table", default_table_name); + SystemLogSettings log_settings; - if (database != default_database_name) + log_settings.queue_settings.database = config.getString(config_prefix + ".database", default_database_name); + log_settings.queue_settings.table = config.getString(config_prefix + ".table", default_table_name); + + if (log_settings.queue_settings.database != default_database_name) { /// System tables must be loaded before other tables, but loading order is undefined for all databases except `system` - LOG_ERROR(&Poco::Logger::get("SystemLog"), "Custom database name for a system table specified in config." - " Table `{}` will be created in `system` database instead of `{}`", table, database); - database = default_database_name; + LOG_ERROR( + &Poco::Logger::get("SystemLog"), + "Custom database name for a system table specified in config." + " Table `{}` will be created in `system` database instead of `{}`", + log_settings.queue_settings.table, + log_settings.queue_settings.database); + + log_settings.queue_settings.database = default_database_name; } - String engine; if (config.has(config_prefix + ".engine")) { if (config.has(config_prefix + ".partition_by")) @@ -159,26 +169,26 @@ std::shared_ptr createSystemLog( "If 'engine' is specified for system table, SETTINGS parameters should " "be specified directly inside 'engine' and 'settings' setting doesn't make sense"); - engine = config.getString(config_prefix + ".engine"); + log_settings.engine = config.getString(config_prefix + ".engine"); } else { /// ENGINE expr is necessary. - engine = "ENGINE = MergeTree"; + log_settings.engine = "ENGINE = MergeTree"; /// PARTITION expr is not necessary. String partition_by = config.getString(config_prefix + ".partition_by", "toYYYYMM(event_date)"); if (!partition_by.empty()) - engine += " PARTITION BY (" + partition_by + ")"; + log_settings.engine += " PARTITION BY (" + partition_by + ")"; /// TTL expr is not necessary. String ttl = config.getString(config_prefix + ".ttl", ""); if (!ttl.empty()) - engine += " TTL " + ttl; + log_settings.engine += " TTL " + ttl; /// ORDER BY expr is necessary. String order_by = config.getString(config_prefix + ".order_by", TSystemLog::getDefaultOrderBy()); - engine += " ORDER BY (" + order_by + ")"; + log_settings.engine += " ORDER BY (" + order_by + ")"; /// SETTINGS expr is not necessary. /// https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#settings @@ -188,24 +198,52 @@ std::shared_ptr createSystemLog( String settings = config.getString(config_prefix + ".settings", ""); if (!storage_policy.empty() || !settings.empty()) { - engine += " SETTINGS"; + log_settings.engine += " SETTINGS"; /// If 'storage_policy' is repeated, the 'settings' configuration is preferred. if (!storage_policy.empty()) - engine += " storage_policy = " + quoteString(storage_policy); + log_settings.engine += " storage_policy = " + quoteString(storage_policy); if (!settings.empty()) - engine += (storage_policy.empty() ? " " : ", ") + settings; + log_settings.engine += (storage_policy.empty() ? " " : ", ") + settings; } } /// Validate engine definition syntax to prevent some configuration errors. ParserStorageWithComment storage_parser; - parseQuery(storage_parser, engine.data(), engine.data() + engine.size(), + parseQuery(storage_parser, log_settings.engine.data(), log_settings.engine.data() + log_settings.engine.size(), "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - size_t flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds", - DEFAULT_SYSTEM_LOG_FLUSH_INTERVAL_MILLISECONDS); + log_settings.queue_settings.flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds", + TSystemLog::getDefaultFlushIntervalMilliseconds()); - return std::make_shared(context, database, table, engine, flush_interval_milliseconds); + log_settings.queue_settings.max_size_rows = config.getUInt64(config_prefix + ".max_size_rows", + TSystemLog::getDefaultMaxSize()); + + if (log_settings.queue_settings.max_size_rows < 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{0}.max_size_rows {1} should be 1 at least", + config_prefix, + log_settings.queue_settings.max_size_rows); + + log_settings.queue_settings.reserved_size_rows = config.getUInt64(config_prefix + ".reserved_size_rows", + TSystemLog::getDefaultReservedSize()); + + if (log_settings.queue_settings.max_size_rows < log_settings.queue_settings.reserved_size_rows) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "{0}.max_size_rows {1} should be greater or equal to {0}.reserved_size_rows {2}", + config_prefix, + log_settings.queue_settings.max_size_rows, + log_settings.queue_settings.reserved_size_rows); + } + + log_settings.queue_settings.buffer_size_rows_flush_threshold = config.getUInt64(config_prefix + ".buffer_size_rows_flush_threshold", + log_settings.queue_settings.max_size_rows / 2); + + log_settings.queue_settings.notify_flush_on_crash = config.getBool(config_prefix + ".flush_on_crash", + TSystemLog::shouldNotifyFlushOnCrash()); + + log_settings.queue_settings.turn_off_logger = TSystemLog::shouldTurnOffLogger(); + + return std::make_shared(context, log_settings); } @@ -325,22 +363,25 @@ void SystemLogs::shutdown() log->shutdown(); } +void SystemLogs::handleCrash() +{ + for (auto & log : logs) + log->handleCrash(); +} template SystemLog::SystemLog( ContextPtr context_, - const String & database_name_, - const String & table_name_, - const String & storage_def_, - size_t flush_interval_milliseconds_) - : WithContext(context_) - , table_id(database_name_, table_name_) - , storage_def(storage_def_) + const SystemLogSettings & settings_, + std::shared_ptr> queue_) + : Base(settings_.queue_settings, queue_) + , WithContext(context_) + , log(&Poco::Logger::get("SystemLog (" + settings_.queue_settings.database + "." + settings_.queue_settings.table + ")")) + , table_id(settings_.queue_settings.database, settings_.queue_settings.table) + , storage_def(settings_.engine) , create_query(serializeAST(*getCreateTableQuery())) - , flush_interval_milliseconds(flush_interval_milliseconds_) { - assert(database_name_ == DatabaseCatalog::SYSTEM_DATABASE); - log = &Poco::Logger::get("SystemLog (" + database_name_ + "." + table_name_ + ")"); + assert(settings_.queue_settings.database == DatabaseCatalog::SYSTEM_DATABASE); } template @@ -353,6 +394,26 @@ void SystemLog::shutdown() table->flushAndShutdown(); } +template +void SystemLog::stopFlushThread() +{ + { + std::lock_guard lock(thread_mutex); + + if (!saving_thread || !saving_thread->joinable()) + return; + + if (is_shutdown) + return; + + is_shutdown = true; + queue->shutdown(); + } + + saving_thread->join(); +} + + template void SystemLog::savingThreadFunction() { @@ -370,27 +431,7 @@ void SystemLog::savingThreadFunction() // Should we prepare table even if there are no new messages. bool should_prepare_tables_anyway = false; - { - std::unique_lock lock(mutex); - flush_event.wait_for(lock, - std::chrono::milliseconds(flush_interval_milliseconds), - [&] () - { - return requested_flush_up_to > flushed_up_to || is_shutdown || is_force_prepare_tables; - } - ); - - queue_front_index += queue.size(); - to_flush_end = queue_front_index; - // Swap with existing array from previous flush, to save memory - // allocations. - to_flush.resize(0); - queue.swap(to_flush); - - should_prepare_tables_anyway = is_force_prepare_tables; - - exit_this_thread = is_shutdown; - } + to_flush_end = queue->pop(to_flush, should_prepare_tables_anyway, exit_this_thread); if (to_flush.empty()) { @@ -399,9 +440,7 @@ void SystemLog::savingThreadFunction() prepareTable(); LOG_TRACE(log, "Table created (force)"); - std::lock_guard lock(mutex); - is_force_prepare_tables = false; - flush_event.notify_all(); + queue->confirm(to_flush_end); } } else @@ -473,12 +512,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, tryLogCurrentException(__PRETTY_FUNCTION__); } - { - std::lock_guard lock(mutex); - flushed_up_to = to_flush_end; - is_force_prepare_tables = false; - flush_event.notify_all(); - } + queue->confirm(to_flush_end); LOG_TRACE(log, "Flushed system log up to offset {}", to_flush_end); } @@ -532,6 +566,10 @@ void SystemLog::prepareTable() rename->elements.emplace_back(std::move(elem)); + ActionLock merges_lock; + if (DatabaseCatalog::instance().getDatabase(table_id.database_name)->getUUID() == UUIDHelpers::Nil) + merges_lock = table->getActionLock(ActionLocks::PartsMerge); + auto query_context = Context::createCopy(context); /// As this operation is performed automatically we don't want it to fail because of user dependencies on log tables query_context->setSetting("check_table_dependencies", Field{false}); @@ -618,7 +656,6 @@ ASTPtr SystemLog::getCreateTableQuery() return create; } - #define INSTANTIATE_SYSTEM_LOG(ELEMENT) template class SystemLog; SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 84b70c67e2a..cf287ad7775 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -58,6 +58,7 @@ struct SystemLogs ~SystemLogs(); void shutdown(); + void handleCrash(); std::shared_ptr query_log; /// Used to log queries. std::shared_ptr query_thread_log; /// Used to log query threads. @@ -87,6 +88,12 @@ struct SystemLogs std::vector logs; }; +struct SystemLogSettings +{ + SystemLogQueueSettings queue_settings; + + String engine; +}; template class SystemLog : public SystemLogBase, private boost::noncopyable, WithContext @@ -103,37 +110,33 @@ public: * where N - is a minimal number from 1, for that table with corresponding name doesn't exist yet; * and new table get created - as if previous table was not exist. */ - SystemLog( - ContextPtr context_, - const String & database_name_, - const String & table_name_, - const String & storage_def_, - size_t flush_interval_milliseconds_); + SystemLog(ContextPtr context_, + const SystemLogSettings & settings_, + std::shared_ptr> queue_ = nullptr); + + /** Append a record into log. + * Writing to table will be done asynchronously and in case of failure, record could be lost. + */ void shutdown() override; + void stopFlushThread() override; + protected: - using ISystemLog::mutex; + Poco::Logger * log; + using ISystemLog::is_shutdown; - using ISystemLog::flush_event; - using ISystemLog::stopFlushThread; - using Base::log; + using ISystemLog::saving_thread; + using ISystemLog::thread_mutex; using Base::queue; - using Base::queue_front_index; - using Base::is_force_prepare_tables; - using Base::requested_flush_up_to; - using Base::flushed_up_to; - using Base::logged_queue_full_at_index; private: - /* Saving thread data */ const StorageID table_id; const String storage_def; String create_query; String old_create_query; bool is_prepared = false; - const size_t flush_interval_milliseconds; /** Creates new table if it does not exist. * Renames old table if its structure is not suitable. diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index ba3befab59b..af3b027b2db 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -189,7 +189,6 @@ private: template void inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right, bool strict); - NamesAndTypesList correctedColumnsAddedByJoin() const; void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix); @@ -223,10 +222,10 @@ public: { /// When join_algorithm = 'default' (not specified by user) we use hash or direct algorithm. /// It's behaviour that was initially supported by clickhouse. - bool is_enbaled_by_default = val == JoinAlgorithm::DEFAULT + bool is_enabled_by_default = val == JoinAlgorithm::DEFAULT || val == JoinAlgorithm::HASH || val == JoinAlgorithm::DIRECT; - if (join_algorithm.isSet(JoinAlgorithm::DEFAULT) && is_enbaled_by_default) + if (join_algorithm.isSet(JoinAlgorithm::DEFAULT) && is_enabled_by_default) return true; return join_algorithm.isSet(val); } @@ -331,7 +330,7 @@ public: const ColumnsWithTypeAndName & right_sample_columns); void setAsofInequality(ASOFJoinInequality inequality) { asof_inequality = inequality; } - ASOFJoinInequality getAsofInequality() { return asof_inequality; } + ASOFJoinInequality getAsofInequality() const { return asof_inequality; } ASTPtr leftKeysList() const; ASTPtr rightKeysList() const; /// For ON syntax only @@ -371,6 +370,8 @@ public: bool isSpecialStorage() const { return !right_storage_name.empty() || right_storage_join || right_kv_storage; } std::shared_ptr getStorageKeyValue() { return right_kv_storage; } + + NamesAndTypesList correctedColumnsAddedByJoin() const; }; } diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index 69fef21dbab..0aa2a0b9f4a 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -235,9 +235,9 @@ TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const : parent(parent_) , header(header_) , file(std::move(file_)) - , out_writer(std::make_unique(std::make_unique(file->getPath()), header)) + , out_writer(std::make_unique(std::make_unique(file->getAbsolutePath()), header)) { - LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", file->getPath()); + LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", file->getAbsolutePath()); } TemporaryFileStream::TemporaryFileStream(FileSegmentsHolderPtr segments_, const Block & header_, TemporaryDataOnDisk * parent_) @@ -365,7 +365,7 @@ void TemporaryFileStream::release() String TemporaryFileStream::getPath() const { if (file) - return file->getPath(); + return file->getAbsolutePath(); if (segment_holder && !segment_holder->empty()) return segment_holder->front().getPathInLocalCache(); diff --git a/src/Interpreters/TextLog.cpp b/src/Interpreters/TextLog.cpp index 45d5a7b2344..3951a41f0c5 100644 --- a/src/Interpreters/TextLog.cpp +++ b/src/Interpreters/TextLog.cpp @@ -80,15 +80,10 @@ void TextLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(message_format_string); } -TextLog::TextLog(ContextPtr context_, const String & database_name_, - const String & table_name_, const String & storage_def_, - size_t flush_interval_milliseconds_) - : SystemLog(context_, database_name_, table_name_, - storage_def_, flush_interval_milliseconds_) +TextLog::TextLog(ContextPtr context_, + const SystemLogSettings & settings) + : SystemLog(context_, settings, getLogQueue(settings.queue_settings)) { - // SystemLog methods may write text logs, so we disable logging for the text - // log table to avoid recursion. - log->setLevel(0); } } diff --git a/src/Interpreters/TextLog.h b/src/Interpreters/TextLog.h index 6efc1c906d4..4bfed5327f3 100644 --- a/src/Interpreters/TextLog.h +++ b/src/Interpreters/TextLog.h @@ -40,12 +40,17 @@ struct TextLogElement class TextLog : public SystemLog { public: - TextLog( - ContextPtr context_, - const String & database_name_, - const String & table_name_, - const String & storage_def_, - size_t flush_interval_milliseconds_); + using Queue = SystemLogQueue; + + explicit TextLog(ContextPtr context_, const SystemLogSettings & settings); + + static std::shared_ptr getLogQueue(const SystemLogQueueSettings & settings) + { + static std::shared_ptr queue = std::make_shared(settings); + return queue; + } + + static consteval bool shouldTurnOffLogger() { return true; } }; } diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 5acfe500b1d..6ee8ec987db 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -83,6 +83,8 @@ ThreadGroupPtr ThreadGroup::createForBackgroundProcess(ContextPtr storage_contex const Settings & settings = storage_context->getSettingsRef(); group->memory_tracker.setProfilerStep(settings.memory_profiler_step); group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); + group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); group->memory_tracker.setParent(&background_memory_tracker); if (settings.memory_tracker_fault_probability > 0.0) @@ -380,12 +382,10 @@ void ThreadStatus::finalizePerformanceCounters() updatePerformanceCounters(); // We want to close perf file descriptors if the perf events were enabled for - // one query. What this code does in practice is less clear -- e.g., if I run - // 'select 1 settings metrics_perf_events_enabled = 1', I still get - // query_context->getSettingsRef().metrics_perf_events_enabled == 0 *shrug*. + // one query. bool close_perf_descriptors = true; - if (auto query_context_ptr = query_context.lock()) - close_perf_descriptors = !query_context_ptr->getSettingsRef().metrics_perf_events_enabled; + if (auto global_context_ptr = global_context.lock()) + close_perf_descriptors = !global_context_ptr->getSettingsRef().metrics_perf_events_enabled; try { @@ -408,7 +408,7 @@ void ThreadStatus::finalizePerformanceCounters() if (settings.log_queries && settings.log_query_threads) { const auto now = std::chrono::system_clock::now(); - Int64 query_duration_ms = std::chrono::duration_cast(now - query_start_time.point).count(); + Int64 query_duration_ms = std::chrono::duration_cast(now - query_start_time.point).count(); if (query_duration_ms >= settings.log_queries_min_query_duration_ms.totalMilliseconds()) { if (auto thread_log = global_context_ptr->getQueryThreadLog()) @@ -513,12 +513,12 @@ void ThreadStatus::logToQueryThreadLog(QueryThreadLog & thread_log, const String } } - thread_log.add(elem); + thread_log.add(std::move(elem)); } static String getCleanQueryAst(const ASTPtr q, ContextPtr context) { - String res = serializeAST(*q, true); + String res = serializeAST(*q); if (auto * masker = SensitiveDataMasker::getInstance()) masker->wipeSensitiveData(res); @@ -573,7 +573,7 @@ void ThreadStatus::logToQueryViewsLog(const ViewRuntimeData & vinfo) element.stack_trace = getExceptionStackTraceString(vinfo.exception); } - views_log->add(element); + views_log->add(std::move(element)); } void CurrentThread::attachToGroup(const ThreadGroupPtr & thread_group) diff --git a/src/Interpreters/TraceCollector.cpp b/src/Interpreters/TraceCollector.cpp index cb00e37df69..30fbe26d038 100644 --- a/src/Interpreters/TraceCollector.cpp +++ b/src/Interpreters/TraceCollector.cpp @@ -112,6 +112,9 @@ void TraceCollector::run() Int64 size; readPODBinary(size, in); + UInt64 ptr; + readPODBinary(ptr, in); + ProfileEvents::Event event; readPODBinary(event, in); @@ -127,8 +130,9 @@ void TraceCollector::run() UInt64 time = static_cast(ts.tv_sec * 1000000000LL + ts.tv_nsec); UInt64 time_in_microseconds = static_cast((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000)); - TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, event, increment}; - trace_log->add(element); + + TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, ptr, event, increment}; + trace_log->add(std::move(element)); } } } diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index 0408ebe504b..cd5f965a679 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -38,6 +38,7 @@ NamesAndTypesList TraceLogElement::getNamesAndTypes() {"query_id", std::make_shared()}, {"trace", std::make_shared(std::make_shared())}, {"size", std::make_shared()}, + {"ptr", std::make_shared()}, {"event", std::make_shared(std::make_shared())}, {"increment", std::make_shared()}, }; @@ -57,6 +58,7 @@ void TraceLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(query_id.data(), query_id.size()); columns[i++]->insert(trace); columns[i++]->insert(size); + columns[i++]->insert(ptr); String event_name; if (event != ProfileEvents::end()) diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index c481f033a72..71aec0b50c4 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -27,8 +27,10 @@ struct TraceLogElement UInt64 thread_id{}; String query_id{}; Array trace{}; - /// Allocation size in bytes for TraceType::Memory. + /// Allocation size in bytes for TraceType::Memory and TraceType::MemorySample. Int64 size{}; + /// Allocation ptr for TraceType::MemorySample. + UInt64 ptr{}; /// ProfileEvent for TraceType::ProfileEvent. ProfileEvents::Event event{ProfileEvents::end()}; /// Increment of profile event for TraceType::ProfileEvent. diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index 6257e617d4a..66b23f09ba0 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -34,7 +34,7 @@ try elem.tid = tid; elem.csn = csn; elem.fillCommonFields(nullptr); - system_log->add(elem); + system_log->add(std::move(elem)); } catch (...) { @@ -409,7 +409,7 @@ CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn, bool { std::bernoulli_distribution fault(fault_probability_before_commit); if (fault(thread_local_rng)) - throw Coordination::Exception("Fault injected (before commit)", Coordination::Error::ZCONNECTIONLOSS); + throw Coordination::Exception::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Fault injected (before commit)"); } /// Commit point @@ -419,7 +419,7 @@ CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn, bool { std::bernoulli_distribution fault(fault_probability_after_commit); if (fault(thread_local_rng)) - throw Coordination::Exception("Fault injected (after commit)", Coordination::Error::ZCONNECTIONLOSS); + throw Coordination::Exception::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Fault injected (after commit)"); } } catch (const Coordination::Exception & e) @@ -482,7 +482,7 @@ CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN bool removed = running_list.erase(txn->tid.getHash()); if (!removed) { - LOG_ERROR(log , "I's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid); + LOG_ERROR(log, "It's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid); abort(); } } diff --git a/src/Interpreters/TransactionsInfoLog.cpp b/src/Interpreters/TransactionsInfoLog.cpp index b62cd4672d8..90f5022a444 100644 --- a/src/Interpreters/TransactionsInfoLog.cpp +++ b/src/Interpreters/TransactionsInfoLog.cpp @@ -101,7 +101,7 @@ try elem.type = type; elem.tid = tid; elem.fillCommonFields(&context); - system_log->add(elem); + system_log->add(std::move(elem)); } catch (...) { diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index aeb912ddfbb..130ce2194fd 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -249,13 +249,6 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt if (first_table || !data.join_using_columns.contains(column.name)) { std::string column_name = column.name; - - /// replaceQueryParameterWithValue is used for parameterized view (which are created using query parameters - /// and SELECT is used with substitution of these query parameters ) - if (!data.parameter_values.empty()) - column_name - = StorageView::replaceQueryParameterWithValue(column_name, data.parameter_values, data.parameter_types); - addIdentifier(columns, table.table, column_name); } } diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index 6c804ad6c90..73e45fc7ea0 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -28,15 +28,11 @@ public: const TablesWithColumns & tables; std::unordered_set join_using_columns; bool has_columns; - NameToNameMap parameter_values; - NameToNameMap parameter_types; - Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, const NameToNameMap & parameter_values_ = {}, const NameToNameMap & parameter_types_ = {}) + Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true) : source_columns(source_columns_) , tables(tables_) , has_columns(has_columns_) - , parameter_values(parameter_values_) - , parameter_types(parameter_types_) {} bool hasColumn(const String & name) const { return source_columns.count(name); } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index fd4d2c9d846..9612a4d6c3f 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -289,13 +289,6 @@ void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query) elems = std::move(unique_elems); } -/// Optimize duplicate ORDER BY -void optimizeDuplicateOrderBy(ASTPtr & query, ContextPtr context) -{ - DuplicateOrderByVisitor::Data order_by_data{context}; - DuplicateOrderByVisitor(order_by_data).visit(query); -} - /// Return simple subselect (without UNIONs or JOINs or SETTINGS) if any const ASTSelectQuery * getSimpleSubselect(const ASTSelectQuery & select) { @@ -379,41 +372,6 @@ std::unordered_set getDistinctNames(const ASTSelectQuery & select) return names; } -/// Remove DISTINCT from query if columns are known as DISTINCT from subquery -void optimizeDuplicateDistinct(ASTSelectQuery & select) -{ - if (!select.select() || select.select()->children.empty()) - return; - - const ASTSelectQuery * subselect = getSimpleSubselect(select); - if (!subselect) - return; - - std::unordered_set distinct_names = getDistinctNames(*subselect); - std::unordered_set selected_names; - - /// Check source column names from select list (ignore aliases and table names) - for (const auto & id : select.select()->children) - { - const auto * identifier = id->as(); - if (!identifier) - return; - - const String & name = identifier->shortName(); - if (!distinct_names.contains(name)) - return; /// Not a distinct column, keep DISTINCT for it. - - selected_names.emplace(name); - } - - /// select columns list != distinct columns list - /// SELECT DISTINCT a FROM (SELECT DISTINCT a, b FROM ...)) -- cannot remove DISTINCT - if (selected_names.size() != distinct_names.size()) - return; - - select.distinct = false; -} - /// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression, /// has a single argument and not an aggregate functions. void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, ContextPtr context, @@ -451,8 +409,8 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context { for (auto & elem : set->children) { - auto hash = elem->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + const auto hash = elem->getTreeHash(); + const auto key = toString(hash); group_by_hashes.insert(key); } } @@ -461,8 +419,8 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context { for (auto & elem : group_by->children) { - auto hash = elem->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); + const auto hash = elem->getTreeHash(); + const auto key = toString(hash); group_by_hashes.insert(key); } } @@ -830,17 +788,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, && !select_query->group_by_with_cube) optimizeAggregateFunctionsOfGroupByKeys(select_query, query); - /// Remove duplicate ORDER BY and DISTINCT from subqueries. - if (settings.optimize_duplicate_order_by_and_distinct) - { - optimizeDuplicateOrderBy(query, context); - - /// DISTINCT has special meaning in Distributed query with enabled distributed_group_by_no_merge - /// TODO: disable Distributed/remote() tables only - if (!settings.distributed_group_by_no_merge) - optimizeDuplicateDistinct(*select_query); - } - /// Remove functions from ORDER BY if its argument is also in ORDER BY if (settings.optimize_redundant_functions_in_order_by) optimizeRedundantFunctionsInOrderBy(select_query, context); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index cda5ceeb164..5b384a5f74b 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -110,6 +110,9 @@ using CustomizeCountDistinctVisitor = InDepthNodeVisitor>, true>; +char countdistinctif[] = "countdistinctif"; +using CustomizeCountDistinctIfVisitor = InDepthNodeVisitor>, true>; + char in[] = "in"; using CustomizeInVisitor = InDepthNodeVisitor>, true>; @@ -299,11 +302,10 @@ using ReplacePositionalArgumentsVisitor = InDepthNodeVisitorsetExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list); } -std::vector getAggregates(ASTPtr & query, const ASTSelectQuery & select_query) +ASTs getAggregates(ASTPtr & query, const ASTSelectQuery & select_query) { /// There can not be aggregate functions inside the WHERE and PREWHERE. if (select_query.where()) @@ -743,11 +745,12 @@ std::vector getAggregates(ASTPtr & query, const ASTSelectQu GetAggregatesVisitor(data).visit(query); /// There can not be other aggregate functions within the aggregate functions. - for (const ASTFunction * node : data.aggregates) + for (const ASTPtr & ast : data.aggregates) { - if (node->arguments) + const ASTFunction & node = typeid_cast(*ast); + if (node.arguments) { - for (auto & arg : node->arguments->children) + for (auto & arg : node.arguments->children) { assertNoAggregates(arg, "inside another aggregate function"); // We also can't have window functions inside aggregate functions, @@ -759,7 +762,7 @@ std::vector getAggregates(ASTPtr & query, const ASTSelectQu return data.aggregates; } -std::vector getWindowFunctions(ASTPtr & query, const ASTSelectQuery & select_query) +ASTs getWindowFunctions(ASTPtr & query, const ASTSelectQuery & select_query) { /// There can not be window functions inside the WHERE, PREWHERE and HAVING if (select_query.having()) @@ -777,20 +780,16 @@ std::vector getWindowFunctions(ASTPtr & query, const ASTSel /// Window functions cannot be inside aggregates or other window functions. /// Aggregate functions can be inside window functions because they are /// calculated earlier. - for (const ASTFunction * node : data.window_functions) + for (const ASTPtr & ast : data.window_functions) { - if (node->arguments) - { - for (auto & arg : node->arguments->children) - { - assertNoWindows(arg, "inside another window function"); - } - } + const ASTFunction & node = typeid_cast(*ast); - if (node->window_definition) - { - assertNoWindows(node->window_definition, "inside window definition"); - } + if (node.arguments) + for (auto & arg : node.arguments->children) + assertNoWindows(arg, "inside another window function"); + + if (node.window_definition) + assertNoWindows(node.window_definition, "inside window definition"); } return data.window_functions; @@ -1160,10 +1159,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( const SelectQueryOptions & select_options, const TablesWithColumns & tables_with_columns, const Names & required_result_columns, - std::shared_ptr table_join, - bool is_parameterized_view, - const NameToNameMap parameter_values, - const NameToNameMap parameter_types) const + std::shared_ptr table_join) const { auto * select_query = query->as(); if (!select_query) @@ -1201,7 +1197,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.analyzed_join->setColumnsFromJoinedTable(std::move(columns_from_joined_table), source_columns_set, right_table.table.getQualifiedNamePrefix()); } - translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns, parameter_values, parameter_types); + translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns); /// Optimizes logical expressions. LogicalExpressionsOptimizer(select_query, tables_with_columns, settings.optimize_min_equality_disjunction_chain_length.value).perform(); @@ -1259,15 +1255,6 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.window_function_asts = getWindowFunctions(query, *select_query); result.expressions_with_window_function = getExpressionsWithWindowFunctions(query); - /// replaceQueryParameterWithValue is used for parameterized view (which are created using query parameters - /// and SELECT is used with substitution of these query parameters ) - /// the replaced column names will be used in the next steps - if (is_parameterized_view) - { - for (auto & column : result.source_columns) - column.name = StorageView::replaceQueryParameterWithValue(column.name, parameter_values, parameter_types); - } - result.collectUsedColumns(query, true, settings.query_plan_optimize_primary_key); result.required_source_columns_before_expanding_alias_columns = result.required_source_columns.getNames(); @@ -1287,6 +1274,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( bool is_changed = replaceAliasColumnsInQuery(query, result.storage_snapshot->metadata->getColumns(), result.array_join_result_to_source, getContext(), excluded_nodes); + /// If query is changed, we need to redo some work to correct name resolution. if (is_changed) { @@ -1356,8 +1344,8 @@ TreeRewriterResultPtr TreeRewriter::analyze( GetAggregatesVisitor(data).visit(query); /// There can not be other aggregate functions within the aggregate functions. - for (const ASTFunction * node : data.aggregates) - for (auto & arg : node->arguments->children) + for (const ASTPtr & node : data.aggregates) + for (auto & arg : typeid_cast(*node).arguments->children) assertNoAggregates(arg, "inside another aggregate function"); result.aggregates = data.aggregates; } @@ -1383,6 +1371,12 @@ void TreeRewriter::normalize( CustomizeIfDistinctVisitor::Data data_distinct_if{"DistinctIf"}; CustomizeIfDistinctVisitor(data_distinct_if).visit(query); + if (settings.rewrite_count_distinct_if_with_count_distinct_implementation) + { + CustomizeCountDistinctIfVisitor::Data data_count_distinct_if{settings.count_distinct_implementation.toString() + "If"}; + CustomizeCountDistinctIfVisitor(data_count_distinct_if).visit(query); + } + ExistsExpressionVisitor::Data exists; ExistsExpressionVisitor(exists).visit(query); diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h index b94043b8983..89cc13da294 100644 --- a/src/Interpreters/TreeRewriter.h +++ b/src/Interpreters/TreeRewriter.h @@ -40,11 +40,10 @@ struct TreeRewriterResult NameSet expanded_aliases; Aliases aliases; - std::vector aggregates; - std::vector window_function_asts; - - std::vector expressions_with_window_function; + ASTs aggregates; + ASTs window_function_asts; + ASTs expressions_with_window_function; /// Which column is needed to be ARRAY-JOIN'ed to get the specified. /// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v". @@ -129,10 +128,7 @@ public: const SelectQueryOptions & select_options = {}, const std::vector & tables_with_columns = {}, const Names & required_result_columns = {}, - std::shared_ptr table_join = {}, - bool is_parameterized_view = false, - const NameToNameMap parameter_values = {}, - const NameToNameMap parameter_types = {}) const; + std::shared_ptr table_join = {}) const; private: static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view = false); diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index 7ed7788cf1d..8a7a5024d69 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -91,34 +91,30 @@ void WindowFrame::toString(WriteBuffer & buf) const void WindowFrame::checkValid() const { // Check the validity of offsets. - if (type == WindowFrame::FrameType::ROWS - || type == WindowFrame::FrameType::GROUPS) + if (begin_type == BoundaryType::Offset + && !((begin_offset.getType() == Field::Types::UInt64 + || begin_offset.getType() == Field::Types::Int64) + && begin_offset.get() >= 0 + && begin_offset.get() < INT_MAX)) { - if (begin_type == BoundaryType::Offset - && !((begin_offset.getType() == Field::Types::UInt64 - || begin_offset.getType() == Field::Types::Int64) - && begin_offset.get() >= 0 - && begin_offset.get() < INT_MAX)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", - type, - applyVisitor(FieldVisitorToString(), begin_offset), - begin_offset.getType()); - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", + type, + applyVisitor(FieldVisitorToString(), begin_offset), + begin_offset.getType()); + } - if (end_type == BoundaryType::Offset - && !((end_offset.getType() == Field::Types::UInt64 - || end_offset.getType() == Field::Types::Int64) - && end_offset.get() >= 0 - && end_offset.get() < INT_MAX)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", - type, - applyVisitor(FieldVisitorToString(), end_offset), - end_offset.getType()); - } + if (end_type == BoundaryType::Offset + && !((end_offset.getType() == Field::Types::UInt64 + || end_offset.getType() == Field::Types::Int64) + && end_offset.get() >= 0 + && end_offset.get() < INT_MAX)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given", + type, + applyVisitor(FieldVisitorToString(), end_offset), + end_offset.getType()); } // Check relative positioning of offsets. diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp index 48f4d510af7..2231a58c6a9 100644 --- a/src/Interpreters/ZooKeeperLog.cpp +++ b/src/Interpreters/ZooKeeperLog.cpp @@ -73,6 +73,7 @@ NamesAndTypesList ZooKeeperLogElement::getNamesAndTypes() {"Create", static_cast(Coordination::OpNum::Create)}, {"Remove", static_cast(Coordination::OpNum::Remove)}, {"Exists", static_cast(Coordination::OpNum::Exists)}, + {"Reconfig", static_cast(Coordination::OpNum::Reconfig)}, {"Get", static_cast(Coordination::OpNum::Get)}, {"Set", static_cast(Coordination::OpNum::Set)}, {"GetACL", static_cast(Coordination::OpNum::GetACL)}, diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 89c73117960..0e810748ab1 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -32,6 +33,7 @@ namespace DB namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; + extern const int LOGICAL_ERROR; extern const int TYPE_MISMATCH; extern const int UNEXPECTED_DATA_AFTER_PARSED_VALUE; } @@ -565,4 +567,39 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t return converted; } +template +static bool decimalEqualsFloat(Field field, Float64 float_value) +{ + auto decimal_field = field.get>(); + auto decimal_to_float = DecimalUtils::convertTo(decimal_field.getValue(), decimal_field.getScale()); + return decimal_to_float == float_value; +} + +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type) +{ + Field result_value = convertFieldToType(from_value, to_type); + + if (Field::isDecimal(from_value.getType()) && Field::isDecimal(result_value.getType())) + { + bool is_equal = applyVisitor(FieldVisitorAccurateEquals{}, from_value, result_value); + return is_equal ? result_value : std::optional{}; + } + + if (from_value.getType() == Field::Types::Float64 && Field::isDecimal(result_value.getType())) + { + /// Convert back to Float64 and compare + if (result_value.getType() == Field::Types::Decimal32) + return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + if (result_value.getType() == Field::Types::Decimal64) + return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + if (result_value.getType() == Field::Types::Decimal128) + return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + if (result_value.getType() == Field::Types::Decimal256) + return decimalEqualsFloat(result_value, from_value.get()) ? result_value : std::optional{}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown decimal type {}", result_value.getTypeName()); + } + + return result_value; +} + } diff --git a/src/Interpreters/convertFieldToType.h b/src/Interpreters/convertFieldToType.h index 91b631d0e12..7f49ea5479d 100644 --- a/src/Interpreters/convertFieldToType.h +++ b/src/Interpreters/convertFieldToType.h @@ -20,4 +20,8 @@ Field convertFieldToType(const Field & from_value, const IDataType & to_type, co /// Does the same, but throws ARGUMENT_OUT_OF_BOUND if value does not fall into the range. Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_type, const IDataType * from_type_hint = nullptr); +/// Applies stricter rules than convertFieldToType, doesn't allow loss of precision converting to Decimal. +/// Returns `Field` if the conversion was successful and the result is equal to the original value, otherwise returns nullopt. +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type); + } diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index 5a333172b14..921cd5ff553 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -1,27 +1,24 @@ #include #include -#include #include #include #include #include #include -#include #include #include #include #include #include #include -#include #include #include #include #include -#include #include + namespace DB { @@ -94,18 +91,18 @@ std::pair> evaluateConstantExpression(co if (!result_column) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Element of set in IN, VALUES or LIMIT or aggregate function parameter " + "Element of set in IN, VALUES, or LIMIT, or aggregate function parameter, or a table function argument " "is not a constant expression (result column not found): {}", result_name); if (result_column->empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty result column after evaluation " - "of constant expression for IN, VALUES or LIMIT or aggregate function parameter"); + "of constant expression for IN, VALUES, or LIMIT, or aggregate function parameter, or a table function argument"); /// Expressions like rand() or now() are not constant if (!isColumnConst(*result_column)) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Element of set in IN, VALUES or LIMIT or aggregate function parameter " + "Element of set in IN, VALUES, or LIMIT, or aggregate function parameter, or a table function argument " "is not a constant expression (result column is not const): {}", result_name); return std::make_pair((*result_column)[0], result_type); diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 04c1d545207..750affdfe71 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -49,7 +49,7 @@ static ZooKeeperRetriesInfo getRetriesInfo() ); } -bool isSupportedAlterType(int type) +bool isSupportedAlterTypeForOnClusterDDLQuery(int type) { assert(type != ASTAlterCommand::NO_TYPE); static const std::unordered_set unsupported_alter_types{ @@ -90,7 +90,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, { for (const auto & command : query_alter->command_list->children) { - if (!isSupportedAlterType(command->as().type)) + if (!isSupportedAlterTypeForOnClusterDDLQuery(command->as().type)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type of ALTER query"); } } @@ -559,7 +559,7 @@ Strings DDLQueryStatusSource::getChildrenAllowNoNode(const std::shared_ptrtryGetChildren(node_path, res); if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE) - throw Coordination::Exception(code, node_path); + throw Coordination::Exception::fromPath(code, node_path); return res; } diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h index 40db13d7ef5..7daf9babf9f 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.h +++ b/src/Interpreters/executeDDLQueryOnCluster.h @@ -21,7 +21,7 @@ class Cluster; using ClusterPtr = std::shared_ptr; /// Returns true if provided ALTER type can be executed ON CLUSTER -bool isSupportedAlterType(int type); +bool isSupportedAlterTypeForOnClusterDDLQuery(int type); struct DDLQueryOnClusterParams { diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 694226af6b0..bac6807b682 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -75,6 +76,7 @@ #include #include +#include namespace ProfileEvents { @@ -155,7 +157,6 @@ static void logQuery(const String & query, ContextPtr context, bool internal, Qu } } - /// Call this inside catch block. static void setExceptionStackTrace(QueryLogElement & elem) { @@ -208,7 +209,337 @@ static void logException(ContextPtr context, QueryLogElement & elem, bool log_er LOG_INFO(&Poco::Logger::get("executeQuery"), message); } -static void onExceptionBeforeStart( +static void +addStatusInfoToQueryLogElement(QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) +{ + const auto time_now = std::chrono::system_clock::now(); + UInt64 elapsed_microseconds = info.elapsed_microseconds; + element.event_time = timeInSeconds(time_now); + element.event_time_microseconds = timeInMicroseconds(time_now); + element.query_duration_ms = elapsed_microseconds / 1000; + + ProfileEvents::increment(ProfileEvents::QueryTimeMicroseconds, elapsed_microseconds); + if (query_ast->as() || query_ast->as()) + { + ProfileEvents::increment(ProfileEvents::SelectQueryTimeMicroseconds, elapsed_microseconds); + } + else if (query_ast->as()) + { + ProfileEvents::increment(ProfileEvents::InsertQueryTimeMicroseconds, elapsed_microseconds); + } + else + { + ProfileEvents::increment(ProfileEvents::OtherQueryTimeMicroseconds, elapsed_microseconds); + } + + element.read_rows = info.read_rows; + element.read_bytes = info.read_bytes; + + element.written_rows = info.written_rows; + element.written_bytes = info.written_bytes; + + element.memory_usage = info.peak_memory_usage > 0 ? info.peak_memory_usage : 0; + + element.thread_ids = info.thread_ids; + element.profile_counters = info.profile_counters; + + /// We need to refresh the access info since dependent views might have added extra information, either during + /// creation of the view (PushingToViews chain) or while executing its internal SELECT + const auto & access_info = context_ptr->getQueryAccessInfo(); + element.query_databases.insert(access_info.databases.begin(), access_info.databases.end()); + element.query_tables.insert(access_info.tables.begin(), access_info.tables.end()); + element.query_columns.insert(access_info.columns.begin(), access_info.columns.end()); + element.query_partitions.insert(access_info.partitions.begin(), access_info.partitions.end()); + element.query_projections.insert(access_info.projections.begin(), access_info.projections.end()); + element.query_views.insert(access_info.views.begin(), access_info.views.end()); + + const auto & factories_info = context_ptr->getQueryFactoriesInfo(); + element.used_aggregate_functions = factories_info.aggregate_functions; + element.used_aggregate_function_combinators = factories_info.aggregate_function_combinators; + element.used_database_engines = factories_info.database_engines; + element.used_data_type_families = factories_info.data_type_families; + element.used_dictionaries = factories_info.dictionaries; + element.used_formats = factories_info.formats; + element.used_functions = factories_info.functions; + element.used_storages = factories_info.storages; + element.used_table_functions = factories_info.table_functions; + + element.async_read_counters = context_ptr->getAsyncReadCounters(); +} + + +QueryLogElement logQueryStart( + const std::chrono::time_point & query_start_time, + const ContextMutablePtr & context, + const String & query_for_logging, + const ASTPtr & query_ast, + const QueryPipeline & pipeline, + const std::unique_ptr & interpreter, + bool internal, + const String & query_database, + const String & query_table, + bool async_insert) +{ + const Settings & settings = context->getSettingsRef(); + + QueryLogElement elem; + + elem.type = QueryLogElementType::QUERY_START; + elem.event_time = timeInSeconds(query_start_time); + elem.event_time_microseconds = timeInMicroseconds(query_start_time); + elem.query_start_time = timeInSeconds(query_start_time); + elem.query_start_time_microseconds = timeInMicroseconds(query_start_time); + + elem.current_database = context->getCurrentDatabase(); + elem.query = query_for_logging; + if (settings.log_formatted_queries) + elem.formatted_query = queryToString(query_ast); + elem.normalized_query_hash = normalizedQueryHash(query_for_logging); + elem.query_kind = query_ast->getQueryKind(); + + elem.client_info = context->getClientInfo(); + + if (auto txn = context->getCurrentTransaction()) + elem.tid = txn->tid; + + bool log_queries = settings.log_queries && !internal; + + /// Log into system table start of query execution, if need. + if (log_queries) + { + /// This check is not obvious, but without it 01220_scalar_optimization_in_alter fails. + if (pipeline.initialized()) + { + const auto & info = context->getQueryAccessInfo(); + elem.query_databases = info.databases; + elem.query_tables = info.tables; + elem.query_columns = info.columns; + elem.query_partitions = info.partitions; + elem.query_projections = info.projections; + elem.query_views = info.views; + } + + if (async_insert) + InterpreterInsertQuery::extendQueryLogElemImpl(elem, context); + else if (interpreter) + interpreter->extendQueryLogElem(elem, query_ast, context, query_database, query_table); + + if (settings.log_query_settings) + elem.query_settings = std::make_shared(context->getSettingsRef()); + + elem.log_comment = settings.log_comment; + if (elem.log_comment.size() > settings.max_query_size) + elem.log_comment.resize(settings.max_query_size); + + if (elem.type >= settings.log_queries_min_type && !settings.log_queries_min_query_duration_ms.totalMilliseconds()) + { + if (auto query_log = context->getQueryLog()) + query_log->add(elem); + } + } + + return elem; +} + +void logQueryFinish( + QueryLogElement & elem, + const ContextMutablePtr & context, + const ASTPtr & query_ast, + const QueryPipeline & query_pipeline, + bool pulling_pipeline, + std::shared_ptr query_span, + QueryCache::Usage query_cache_usage, + bool internal) +{ + const Settings & settings = context->getSettingsRef(); + auto log_queries = settings.log_queries && !internal; + auto log_queries_min_type = settings.log_queries_min_type; + auto log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(); + auto log_processors_profiles = settings.log_processors_profiles; + + QueryStatusPtr process_list_elem = context->getProcessListElement(); + if (process_list_elem) + { + /// Update performance counters before logging to query_log + CurrentThread::finalizePerformanceCounters(); + + QueryStatusInfo info = process_list_elem->getInfo(true, context->getSettingsRef().log_profile_events); + elem.type = QueryLogElementType::QUERY_FINISH; + + addStatusInfoToQueryLogElement(elem, info, query_ast, context); + + if (pulling_pipeline) + { + query_pipeline.tryGetResultRowsAndBytes(elem.result_rows, elem.result_bytes); + } + else /// will be used only for ordinary INSERT queries + { + auto progress_out = process_list_elem->getProgressOut(); + elem.result_rows = progress_out.written_rows; + elem.result_bytes = progress_out.written_bytes; + } + + auto progress_callback = context->getProgressCallback(); + if (progress_callback) + { + Progress p; + p.incrementPiecewiseAtomically(Progress{ResultProgress{elem.result_rows, elem.result_bytes}}); + progress_callback(p); + } + + if (elem.read_rows != 0) + { + double elapsed_seconds = static_cast(info.elapsed_microseconds) / 1000000.0; + double rows_per_second = static_cast(elem.read_rows) / elapsed_seconds; + LOG_DEBUG( + &Poco::Logger::get("executeQuery"), + "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", + elem.read_rows, + ReadableSize(elem.read_bytes), + elapsed_seconds, + rows_per_second, + ReadableSize(elem.read_bytes / elapsed_seconds)); + } + + elem.query_cache_usage = query_cache_usage; + + if (log_queries && elem.type >= log_queries_min_type + && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) + { + if (auto query_log = context->getQueryLog()) + query_log->add(elem); + } + if (log_processors_profiles) + { + if (auto processors_profile_log = context->getProcessorsProfileLog()) + { + ProcessorProfileLogElement processor_elem; + processor_elem.event_time = elem.event_time; + processor_elem.event_time_microseconds = elem.event_time_microseconds; + processor_elem.initial_query_id = elem.client_info.initial_query_id; + processor_elem.query_id = elem.client_info.current_query_id; + + auto get_proc_id = [](const IProcessor & proc) -> UInt64 { return reinterpret_cast(&proc); }; + + for (const auto & processor : query_pipeline.getProcessors()) + { + std::vector parents; + for (const auto & port : processor->getOutputs()) + { + if (!port.isConnected()) + continue; + const IProcessor & next = port.getInputPort().getProcessor(); + parents.push_back(get_proc_id(next)); + } + + processor_elem.id = get_proc_id(*processor); + processor_elem.parent_ids = std::move(parents); + + processor_elem.plan_step = reinterpret_cast(processor->getQueryPlanStep()); + processor_elem.plan_group = processor->getQueryPlanStepGroup(); + + processor_elem.processor_name = processor->getName(); + + /// NOTE: convert this to UInt64 + processor_elem.elapsed_us = static_cast(processor->getElapsedUs()); + processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedUs()); + processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedUs()); + + auto stats = processor->getProcessorDataStats(); + processor_elem.input_rows = stats.input_rows; + processor_elem.input_bytes = stats.input_bytes; + processor_elem.output_rows = stats.output_rows; + processor_elem.output_bytes = stats.output_bytes; + + processors_profile_log->add(processor_elem); + } + } + } + } + + if (query_span) + { + query_span->addAttribute("db.statement", elem.query); + query_span->addAttribute("clickhouse.query_id", elem.client_info.current_query_id); + query_span->addAttribute("clickhouse.query_status", "QueryFinish"); + query_span->addAttributeIfNotEmpty("clickhouse.tracestate", OpenTelemetry::CurrentContext().tracestate); + query_span->addAttributeIfNotZero("clickhouse.read_rows", elem.read_rows); + query_span->addAttributeIfNotZero("clickhouse.read_bytes", elem.read_bytes); + query_span->addAttributeIfNotZero("clickhouse.written_rows", elem.written_rows); + query_span->addAttributeIfNotZero("clickhouse.written_bytes", elem.written_bytes); + query_span->addAttributeIfNotZero("clickhouse.memory_usage", elem.memory_usage); + query_span->finish(); + } +} + +void logQueryException( + QueryLogElement & elem, + const ContextMutablePtr & context, + const Stopwatch & start_watch, + const ASTPtr & query_ast, + std::shared_ptr query_span, + bool internal, + bool log_error) +{ + const Settings & settings = context->getSettingsRef(); + auto log_queries = settings.log_queries && !internal; + auto log_queries_min_type = settings.log_queries_min_type; + auto log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(); + + elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING; + elem.exception_code = getCurrentExceptionCode(); + auto exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false); + elem.exception = std::move(exception_message.text); + elem.exception_format_string = exception_message.format_string; + + QueryStatusPtr process_list_elem = context->getProcessListElement(); + + /// Update performance counters before logging to query_log + CurrentThread::finalizePerformanceCounters(); + const auto time_now = std::chrono::system_clock::now(); + elem.event_time = timeInSeconds(time_now); + elem.event_time_microseconds = timeInMicroseconds(time_now); + + if (process_list_elem) + { + QueryStatusInfo info = process_list_elem->getInfo(true, settings.log_profile_events, false); + addStatusInfoToQueryLogElement(elem, info, query_ast, context); + } + else + { + elem.query_duration_ms = start_watch.elapsedMilliseconds(); + } + + elem.query_cache_usage = QueryCache::Usage::None; + + if (settings.calculate_text_stack_trace && log_error) + setExceptionStackTrace(elem); + logException(context, elem, log_error); + + /// In case of exception we log internal queries also + if (log_queries && elem.type >= log_queries_min_type && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) + { + if (auto query_log = context->getQueryLog()) + query_log->add(elem); + } + + ProfileEvents::increment(ProfileEvents::FailedQuery); + if (query_ast->as() || query_ast->as()) + ProfileEvents::increment(ProfileEvents::FailedSelectQuery); + else if (query_ast->as()) + ProfileEvents::increment(ProfileEvents::FailedInsertQuery); + + if (query_span) + { + query_span->addAttribute("db.statement", elem.query); + query_span->addAttribute("clickhouse.query_id", elem.client_info.current_query_id); + query_span->addAttribute("clickhouse.exception", elem.exception); + query_span->addAttribute("clickhouse.exception_code", elem.exception_code); + query_span->finish(); + } +} + +void logExceptionBeforeStart( const String & query_for_logging, ContextPtr context, ASTPtr ast, @@ -331,7 +662,7 @@ static std::tuple executeQueryImpl( /// the value passed by the client Stopwatch start_watch{CLOCK_MONOTONIC}; - auto & client_info = context->getClientInfo(); + const auto & client_info = context->getClientInfo(); if (!internal) { @@ -343,8 +674,7 @@ static std::tuple executeQueryImpl( // On the other hand, if it's initialized then take it as the start of the query if (client_info.initial_query_start_time == 0) { - client_info.initial_query_start_time = timeInSeconds(query_start_time); - client_info.initial_query_start_time_microseconds = timeInMicroseconds(query_start_time); + context->setInitialQueryStartTime(query_start_time); } else { @@ -378,10 +708,14 @@ static std::tuple executeQueryImpl( /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); } + else if (settings.dialect == Dialect::prql && !internal) + { + ParserPRQLQuery parser(max_query_size, settings.max_parser_depth); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } else { ParserQuery parser(end, settings.allow_settings_after_format_in_insert); - /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); } @@ -395,11 +729,16 @@ static std::tuple executeQueryImpl( is_create_parameterized_view = create_query->isParameterizedView(); /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - if (!is_create_parameterized_view && context->hasQueryParameters()) + /// Even if we don't have parameters in query_context, check that AST doesn't have unknown parameters + bool probably_has_params = find_first_symbols<'{'>(begin, end) != end; + if (!is_create_parameterized_view && probably_has_params) { ReplaceQueryParameterVisitor visitor(context->getQueryParameters()); visitor.visit(ast); - query = serializeAST(*ast); + if (visitor.getNumberOfReplacedParameters()) + query = serializeAST(*ast); + else + query.assign(begin, query_end); } else { @@ -431,7 +770,7 @@ static std::tuple executeQueryImpl( logQuery(query_for_logging, context, internal, stage); if (!internal) - onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); + logExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); throw; } @@ -647,7 +986,7 @@ static std::tuple executeQueryImpl( QueryCachePtr query_cache = context->getQueryCache(); const bool can_use_query_cache = query_cache != nullptr && settings.use_query_cache && !internal && (ast->as() || ast->as()); - bool write_into_query_cache = false; + QueryCache::Usage query_cache_usage = QueryCache::Usage::None; if (!async_insert) { @@ -664,6 +1003,7 @@ static std::tuple executeQueryImpl( QueryPipeline pipeline; pipeline.readFromQueryCache(reader.getSource(), reader.getSourceTotals(), reader.getSourceExtremes()); res.pipeline = std::move(pipeline); + query_cache_usage = QueryCache::Usage::Read; return true; } } @@ -699,6 +1039,11 @@ static std::tuple executeQueryImpl( } + // InterpreterSelectQueryAnalyzer does not build QueryPlan in the constructor. + // We need to force to build it here to check if we need to ignore quota. + if (auto * interpreter_with_analyzer = dynamic_cast(interpreter.get())) + interpreter_with_analyzer->getQueryPlan(); + if (!interpreter->ignoreQuota() && !quota_checked) { quota = context->getQuota(); @@ -767,7 +1112,7 @@ static std::tuple executeQueryImpl( settings.query_cache_max_size_in_bytes, settings.query_cache_max_entries)); res.pipeline.writeResultIntoQueryCache(query_cache_writer); - write_into_query_cache = true; + query_cache_usage = QueryCache::Usage::Write; } } @@ -804,273 +1149,42 @@ static std::tuple executeQueryImpl( /// Everything related to query log. { - QueryLogElement elem; - - elem.type = QueryLogElementType::QUERY_START; - - elem.event_time = timeInSeconds(query_start_time); - elem.event_time_microseconds = timeInMicroseconds(query_start_time); - elem.query_start_time = timeInSeconds(query_start_time); - elem.query_start_time_microseconds = timeInMicroseconds(query_start_time); - - elem.current_database = context->getCurrentDatabase(); - elem.query = query_for_logging; - if (settings.log_formatted_queries) - elem.formatted_query = queryToString(ast); - elem.normalized_query_hash = normalizedQueryHash(query_for_logging); - elem.query_kind = ast->getQueryKind(); - - elem.client_info = client_info; - - if (auto txn = context->getCurrentTransaction()) - elem.tid = txn->tid; - - bool log_queries = settings.log_queries && !internal; - - /// Log into system table start of query execution, if need. - if (log_queries) - { - /// This check is not obvious, but without it 01220_scalar_optimization_in_alter fails. - if (pipeline.initialized()) - { - const auto & info = context->getQueryAccessInfo(); - elem.query_databases = info.databases; - elem.query_tables = info.tables; - elem.query_columns = info.columns; - elem.query_partitions = info.partitions; - elem.query_projections = info.projections; - elem.query_views = info.views; - } - - if (async_insert) - InterpreterInsertQuery::extendQueryLogElemImpl(elem, context); - else if (interpreter) - interpreter->extendQueryLogElem(elem, ast, context, query_database, query_table); - - if (settings.log_query_settings) - elem.query_settings = std::make_shared(context->getSettingsRef()); - - elem.log_comment = settings.log_comment; - if (elem.log_comment.size() > settings.max_query_size) - elem.log_comment.resize(settings.max_query_size); - - if (elem.type >= settings.log_queries_min_type && !settings.log_queries_min_query_duration_ms.totalMilliseconds()) - { - if (auto query_log = context->getQueryLog()) - query_log->add(elem); - } - } - - /// Common code for finish and exception callbacks - auto status_info_to_query_log - = [](QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) mutable - { - const auto time_now = std::chrono::system_clock::now(); - UInt64 elapsed_microseconds = info.elapsed_microseconds; - element.event_time = timeInSeconds(time_now); - element.event_time_microseconds = timeInMicroseconds(time_now); - element.query_duration_ms = elapsed_microseconds / 1000; - - ProfileEvents::increment(ProfileEvents::QueryTimeMicroseconds, elapsed_microseconds); - if (query_ast->as() || query_ast->as()) - { - ProfileEvents::increment(ProfileEvents::SelectQueryTimeMicroseconds, elapsed_microseconds); - } - else if (query_ast->as()) - { - ProfileEvents::increment(ProfileEvents::InsertQueryTimeMicroseconds, elapsed_microseconds); - } - else - { - ProfileEvents::increment(ProfileEvents::OtherQueryTimeMicroseconds, elapsed_microseconds); - } - - element.read_rows = info.read_rows; - element.read_bytes = info.read_bytes; - - element.written_rows = info.written_rows; - element.written_bytes = info.written_bytes; - - element.memory_usage = info.peak_memory_usage > 0 ? info.peak_memory_usage : 0; - - element.thread_ids = info.thread_ids; - element.profile_counters = info.profile_counters; - - /// We need to refresh the access info since dependent views might have added extra information, either during - /// creation of the view (PushingToViews chain) or while executing its internal SELECT - const auto & access_info = context_ptr->getQueryAccessInfo(); - element.query_databases.insert(access_info.databases.begin(), access_info.databases.end()); - element.query_tables.insert(access_info.tables.begin(), access_info.tables.end()); - element.query_columns.insert(access_info.columns.begin(), access_info.columns.end()); - element.query_partitions.insert(access_info.partitions.begin(), access_info.partitions.end()); - element.query_projections.insert(access_info.projections.begin(), access_info.projections.end()); - element.query_views.insert(access_info.views.begin(), access_info.views.end()); - - const auto & factories_info = context_ptr->getQueryFactoriesInfo(); - element.used_aggregate_functions = factories_info.aggregate_functions; - element.used_aggregate_function_combinators = factories_info.aggregate_function_combinators; - element.used_database_engines = factories_info.database_engines; - element.used_data_type_families = factories_info.data_type_families; - element.used_dictionaries = factories_info.dictionaries; - element.used_formats = factories_info.formats; - element.used_functions = factories_info.functions; - element.used_storages = factories_info.storages; - element.used_table_functions = factories_info.table_functions; - - element.async_read_counters = context_ptr->getAsyncReadCounters(); - }; - + QueryLogElement elem = logQueryStart( + query_start_time, + context, + query_for_logging, + ast, + pipeline, + interpreter, + internal, + query_database, + query_table, + async_insert); /// Also make possible for caller to log successful query finish and exception during execution. auto finish_callback = [elem, context, ast, - write_into_query_cache, - log_queries, - log_queries_min_type = settings.log_queries_min_type, - log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(), - log_processors_profiles = settings.log_processors_profiles, - status_info_to_query_log, + query_cache_usage, + internal, implicit_txn_control, execute_implicit_tcl_query, pulling_pipeline = pipeline.pulling(), query_span](QueryPipeline & query_pipeline) mutable { - if (write_into_query_cache) + if (query_cache_usage == QueryCache::Usage::Write) /// Trigger the actual write of the buffered query result into the query cache. This is done explicitly to prevent /// partial/garbage results in case of exceptions during query execution. query_pipeline.finalizeWriteInQueryCache(); - QueryStatusPtr process_list_elem = context->getProcessListElement(); + logQueryFinish(elem, context, ast, query_pipeline, pulling_pipeline, query_span, query_cache_usage, internal); - if (process_list_elem) - { - /// Update performance counters before logging to query_log - CurrentThread::finalizePerformanceCounters(); - - QueryStatusInfo info = process_list_elem->getInfo(true, context->getSettingsRef().log_profile_events); - elem.type = QueryLogElementType::QUERY_FINISH; - - status_info_to_query_log(elem, info, ast, context); - - if (pulling_pipeline) - { - query_pipeline.tryGetResultRowsAndBytes(elem.result_rows, elem.result_bytes); - } - else /// will be used only for ordinary INSERT queries - { - auto progress_out = process_list_elem->getProgressOut(); - elem.result_rows = progress_out.written_rows; - elem.result_bytes = progress_out.written_bytes; - } - - auto progress_callback = context->getProgressCallback(); - if (progress_callback) - { - Progress p; - p.incrementPiecewiseAtomically(Progress{ResultProgress{elem.result_rows, elem.result_bytes}}); - progress_callback(p); - } - - if (elem.read_rows != 0) - { - double elapsed_seconds = static_cast(info.elapsed_microseconds) / 1000000.0; - double rows_per_second = static_cast(elem.read_rows) / elapsed_seconds; - LOG_DEBUG( - &Poco::Logger::get("executeQuery"), - "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", - elem.read_rows, - ReadableSize(elem.read_bytes), - elapsed_seconds, - rows_per_second, - ReadableSize(elem.read_bytes / elapsed_seconds)); - } - - if (log_queries && elem.type >= log_queries_min_type && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) - { - if (auto query_log = context->getQueryLog()) - query_log->add(elem); - } - if (log_processors_profiles) - { - if (auto processors_profile_log = context->getProcessorsProfileLog()) - { - ProcessorProfileLogElement processor_elem; - processor_elem.event_time = elem.event_time; - processor_elem.event_time_microseconds = elem.event_time_microseconds; - processor_elem.initial_query_id = elem.client_info.initial_query_id; - processor_elem.query_id = elem.client_info.current_query_id; - - auto get_proc_id = [](const IProcessor & proc) -> UInt64 - { - return reinterpret_cast(&proc); - }; - - for (const auto & processor : query_pipeline.getProcessors()) - { - std::vector parents; - for (const auto & port : processor->getOutputs()) - { - if (!port.isConnected()) - continue; - const IProcessor & next = port.getInputPort().getProcessor(); - parents.push_back(get_proc_id(next)); - } - - processor_elem.id = get_proc_id(*processor); - processor_elem.parent_ids = std::move(parents); - - processor_elem.plan_step = reinterpret_cast(processor->getQueryPlanStep()); - processor_elem.plan_group = processor->getQueryPlanStepGroup(); - - processor_elem.processor_name = processor->getName(); - - /// NOTE: convert this to UInt64 - processor_elem.elapsed_us = static_cast(processor->getElapsedUs()); - processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedUs()); - processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedUs()); - - auto stats = processor->getProcessorDataStats(); - processor_elem.input_rows = stats.input_rows; - processor_elem.input_bytes = stats.input_bytes; - processor_elem.output_rows = stats.output_rows; - processor_elem.output_bytes = stats.output_bytes; - - processors_profile_log->add(processor_elem); - } - } - } - - if (*implicit_txn_control) - execute_implicit_tcl_query(context, ASTTransactionControl::COMMIT); - } - - if (query_span) - { - query_span->addAttribute("db.statement", elem.query); - query_span->addAttribute("clickhouse.query_id", elem.client_info.current_query_id); - query_span->addAttribute("clickhouse.query_status", "QueryFinish"); - query_span->addAttributeIfNotEmpty("clickhouse.tracestate", OpenTelemetry::CurrentContext().tracestate); - query_span->addAttributeIfNotZero("clickhouse.read_rows", elem.read_rows); - query_span->addAttributeIfNotZero("clickhouse.read_bytes", elem.read_bytes); - query_span->addAttributeIfNotZero("clickhouse.written_rows", elem.written_rows); - query_span->addAttributeIfNotZero("clickhouse.written_bytes", elem.written_bytes); - query_span->addAttributeIfNotZero("clickhouse.memory_usage", elem.memory_usage); - query_span->finish(); - } + if (*implicit_txn_control) + execute_implicit_tcl_query(context, ASTTransactionControl::COMMIT); }; - auto exception_callback = [start_watch, - elem, - context, - ast, - log_queries, - log_queries_min_type = settings.log_queries_min_type, - log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(), - my_quota(quota), - status_info_to_query_log, - implicit_txn_control, - execute_implicit_tcl_query, - query_span](bool log_error) mutable + auto exception_callback = + [start_watch, elem, context, ast, internal, my_quota(quota), implicit_txn_control, execute_implicit_tcl_query, query_span]( + bool log_error) mutable { if (*implicit_txn_control) execute_implicit_tcl_query(context, ASTTransactionControl::ROLLBACK); @@ -1080,60 +1194,7 @@ static std::tuple executeQueryImpl( if (my_quota) my_quota->used(QuotaType::ERRORS, 1, /* check_exceeded = */ false); - elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING; - elem.exception_code = getCurrentExceptionCode(); - auto exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false); - elem.exception = std::move(exception_message.text); - elem.exception_format_string = exception_message.format_string; - - QueryStatusPtr process_list_elem = context->getProcessListElement(); - const Settings & current_settings = context->getSettingsRef(); - - /// Update performance counters before logging to query_log - CurrentThread::finalizePerformanceCounters(); - const auto time_now = std::chrono::system_clock::now(); - elem.event_time = timeInSeconds(time_now); - elem.event_time_microseconds = timeInMicroseconds(time_now); - - if (process_list_elem) - { - QueryStatusInfo info = process_list_elem->getInfo(true, current_settings.log_profile_events, false); - status_info_to_query_log(elem, info, ast, context); - } - else - { - elem.query_duration_ms = start_watch.elapsedMilliseconds(); - } - - if (current_settings.calculate_text_stack_trace && log_error) - setExceptionStackTrace(elem); - logException(context, elem, log_error); - - /// In case of exception we log internal queries also - if (log_queries && elem.type >= log_queries_min_type && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) - { - if (auto query_log = context->getQueryLog()) - query_log->add(elem); - } - - ProfileEvents::increment(ProfileEvents::FailedQuery); - if (ast->as() || ast->as()) - { - ProfileEvents::increment(ProfileEvents::FailedSelectQuery); - } - else if (ast->as()) - { - ProfileEvents::increment(ProfileEvents::FailedInsertQuery); - } - - if (query_span) - { - query_span->addAttribute("db.statement", elem.query); - query_span->addAttribute("clickhouse.query_id", elem.client_info.current_query_id); - query_span->addAttribute("clickhouse.exception", elem.exception); - query_span->addAttribute("clickhouse.exception_code", elem.exception_code); - query_span->finish(); - } + logQueryException(elem, context, start_watch, ast, query_span, internal, log_error); }; res.finish_callback = std::move(finish_callback); @@ -1148,7 +1209,7 @@ static std::tuple executeQueryImpl( txn->onException(); if (!internal) - onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); + logExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); throw; } diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 93152cc1de6..f2a12bbef18 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -1,15 +1,21 @@ #pragma once #include -#include -#include #include +#include +#include +#include + +#include +#include namespace DB { +class IInterpreter; class ReadBuffer; class WriteBuffer; +struct QueryStatusInfo; struct QueryResultDetails { @@ -66,4 +72,42 @@ BlockIO executeQuery( /// if built pipeline does not require any input and does not produce any output. void executeTrivialBlockIO(BlockIO & streams, ContextPtr context); +/// Prepares a QueryLogElement and, if enabled, logs it to system.query_log +QueryLogElement logQueryStart( + const std::chrono::time_point & query_start_time, + const ContextMutablePtr & context, + const String & query_for_logging, + const ASTPtr & query_ast, + const QueryPipeline & pipeline, + const std::unique_ptr & interpreter, + bool internal, + const String & query_database, + const String & query_table, + bool async_insert); + +void logQueryFinish( + QueryLogElement & elem, + const ContextMutablePtr & context, + const ASTPtr & query_ast, + const QueryPipeline & query_pipeline, + bool pulling_pipeline, + std::shared_ptr query_span, + QueryCache::Usage query_cache_usage, + bool internal); + +void logQueryException( + QueryLogElement & elem, + const ContextMutablePtr & context, + const Stopwatch & start_watch, + const ASTPtr & query_ast, + std::shared_ptr query_span, + bool internal, + bool log_error); + +void logExceptionBeforeStart( + const String & query_for_logging, + ContextPtr context, + ASTPtr ast, + const std::shared_ptr & query_span, + UInt64 elapsed_millliseconds); } diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index 5bbd2667f55..4cac2f0e20c 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -306,7 +306,9 @@ void fillMissingColumns( return; size_t level = ISerialization::getArrayLevel(subpath); - assert(level < num_dimensions); + /// It can happen if element of Array is Map. + if (level >= num_dimensions) + return; auto stream_name = ISerialization::getFileNameForStream(*requested_column, subpath); auto it = offsets_columns.find(stream_name); diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 83af2684322..aeb45c08bea 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -45,10 +45,10 @@ namespace ErrorCodes namespace ActionLocks { - extern StorageActionBlockType PartsMerge; - extern StorageActionBlockType PartsFetch; - extern StorageActionBlockType PartsSend; - extern StorageActionBlockType DistributedSend; + extern const StorageActionBlockType PartsMerge; + extern const StorageActionBlockType PartsFetch; + extern const StorageActionBlockType PartsSend; + extern const StorageActionBlockType DistributedSend; } static void executeCreateQuery( @@ -250,6 +250,9 @@ static void loadSystemDatabaseImpl(ContextMutablePtr context, const String & dat { String path = context->getPath() + "metadata/" + database_name; String metadata_file = path + ".sql"; + if (fs::exists(metadata_file + ".tmp")) + fs::remove(metadata_file + ".tmp"); + if (fs::exists(fs::path(metadata_file))) { /// 'has_force_restore_data_flag' is true, to not fail on loading query_log table, if it is corrupted. diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index b9d12c8ed42..dab14a66ed7 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -470,6 +470,7 @@ TEST_F(FileCacheTest, get) auto & file_segment2 = get(holder2, 2); ASSERT_TRUE(file_segment2.getOrSetDownloader() != FileSegment::getCallerId()); + ASSERT_EQ(file_segment2.state(), State::DOWNLOADING); { std::lock_guard lock(mutex); @@ -478,8 +479,7 @@ TEST_F(FileCacheTest, get) cv.notify_one(); file_segment2.wait(file_segment2.range().right); - file_segment2.complete(); - ASSERT_TRUE(file_segment2.state() == State::DOWNLOADED); + ASSERT_EQ(file_segment2.getDownloadedSize(false), file_segment2.range().size()); }); { @@ -488,7 +488,7 @@ TEST_F(FileCacheTest, get) } download(file_segment); - ASSERT_TRUE(file_segment.state() == State::DOWNLOADED); + ASSERT_EQ(file_segment.state(), State::DOWNLOADED); other_1.join(); diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 0c3a7bd615d..90b3457b7d8 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -21,6 +21,12 @@ namespace fs = std::filesystem; namespace DB { class SensitiveDataMasker; + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + } @@ -34,22 +40,18 @@ static std::string createDirectory(const std::string & file) return path; } -#ifndef WITHOUT_TEXT_LOG -void Loggers::setTextLog(std::shared_ptr log, int max_priority) +static std::string renderFileNameTemplate(time_t now, const std::string & file_path) { - text_log = log; - text_log_max_priority = max_priority; + fs::path path{file_path}; + std::tm buf; + localtime_r(&now, &buf); + std::ostringstream ss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + ss << std::put_time(&buf, file_path.c_str()); + return path.replace_filename(ss.str()); } -#endif void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name) { -#ifndef WITHOUT_TEXT_LOG - if (split) - if (auto log = text_log.lock()) - split->addTextLog(log, text_log_max_priority); -#endif - auto current_logger = config.getString("logger", ""); if (config_logger.has_value() && *config_logger == current_logger) return; @@ -68,9 +70,12 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log /// The maximum (the most verbose) of those will be used as default for Poco loggers int max_log_level = 0; - const auto log_path = config.getString("logger.log", ""); - if (!log_path.empty()) + time_t now = std::time({}); + + const auto log_path_prop = config.getString("logger.log", ""); + if (!log_path_prop.empty()) { + const auto log_path = renderFileNameTemplate(now, log_path_prop); createDirectory(log_path); std::string ext; @@ -109,9 +114,10 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log split->addChannel(log, "log"); } - const auto errorlog_path = config.getString("logger.errorlog", ""); - if (!errorlog_path.empty()) + const auto errorlog_path_prop = config.getString("logger.errorlog", ""); + if (!errorlog_path_prop.empty()) { + const auto errorlog_path = renderFileNameTemplate(now, errorlog_path_prop); createDirectory(errorlog_path); // NOTE: we don't use notice & critical in the code, so in practice error log collects fatal & error & warning. @@ -262,6 +268,44 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log } } } +#ifndef WITHOUT_TEXT_LOG + if (config.has("text_log")) + { + String text_log_level_str = config.getString("text_log.level", "trace"); + int text_log_level = Poco::Logger::parseLevel(text_log_level_str); + + DB::SystemLogQueueSettings log_settings; + log_settings.flush_interval_milliseconds = config.getUInt64("text_log.flush_interval_milliseconds", + DB::TextLog::getDefaultFlushIntervalMilliseconds()); + + log_settings.max_size_rows = config.getUInt64("text_log.max_size_rows", + DB::TextLog::getDefaultMaxSize()); + + if (log_settings.max_size_rows< 1) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "text_log.max_size_rows {} should be 1 at least", + log_settings.max_size_rows); + + log_settings.reserved_size_rows = config.getUInt64("text_log.reserved_size_rows", DB::TextLog::getDefaultReservedSize()); + + if (log_settings.max_size_rows < log_settings.reserved_size_rows) + { + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, + "text_log.max_size {0} should be greater or equal to text_log.reserved_size_rows {1}", + log_settings.max_size_rows, + log_settings.reserved_size_rows); + } + + log_settings.buffer_size_rows_flush_threshold = config.getUInt64("text_log.buffer_size_rows_flush_threshold", + log_settings.max_size_rows / 2); + + log_settings.notify_flush_on_crash = config.getBool("text_log.flush_on_crash", + DB::TextLog::shouldNotifyFlushOnCrash()); + + log_settings.turn_off_logger = DB::TextLog::shouldTurnOffLogger(); + + split->addTextLog(DB::TextLog::getLogQueue(log_settings), text_log_level); + } +#endif } void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger) diff --git a/src/Loggers/Loggers.h b/src/Loggers/Loggers.h index ebc10954b94..9eff731a4c5 100644 --- a/src/Loggers/Loggers.h +++ b/src/Loggers/Loggers.h @@ -7,12 +7,6 @@ #include #include "OwnSplitChannel.h" -#ifndef WITHOUT_TEXT_LOG -namespace DB -{ - class TextLog; -} -#endif namespace Poco::Util { @@ -29,9 +23,6 @@ public: /// Close log files. On next log write files will be reopened. void closeLogs(Poco::Logger & logger); -#ifndef WITHOUT_TEXT_LOG - void setTextLog(std::shared_ptr log, int max_priority); -#endif private: Poco::AutoPtr log_file; @@ -41,10 +32,6 @@ private: /// Previous value of logger element in config. It is used to reinitialize loggers whenever the value changed. std::optional config_logger; -#ifndef WITHOUT_TEXT_LOG - std::weak_ptr text_log; - int text_log_max_priority = -1; -#endif Poco::AutoPtr split; }; diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp index 03db198c305..618d453c238 100644 --- a/src/Loggers/OwnSplitChannel.cpp +++ b/src/Loggers/OwnSplitChannel.cpp @@ -1,7 +1,6 @@ #include "OwnSplitChannel.h" #include "OwnFormattingChannel.h" -#include #include #include #include @@ -135,13 +134,10 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) elem.source_line = msg.getSourceLine(); elem.message_format_string = msg.getFormatString(); - std::shared_ptr text_log_locked{}; - { - std::lock_guard lock(text_log_mutex); - text_log_locked = text_log.lock(); - } + std::shared_ptr> text_log_locked{}; + text_log_locked = text_log.lock(); if (text_log_locked) - text_log_locked->add(elem); + text_log_locked->push(std::move(elem)); } #endif } @@ -153,10 +149,9 @@ void OwnSplitChannel::addChannel(Poco::AutoPtr channel, const std } #ifndef WITHOUT_TEXT_LOG -void OwnSplitChannel::addTextLog(std::shared_ptr log, int max_priority) +void OwnSplitChannel::addTextLog(std::shared_ptr> log_queue, int max_priority) { - std::lock_guard lock(text_log_mutex); - text_log = log; + text_log = log_queue; text_log_max_priority.store(max_priority, std::memory_order_relaxed); } #endif diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h index 80305c1ccee..a6ee8af5b14 100644 --- a/src/Loggers/OwnSplitChannel.h +++ b/src/Loggers/OwnSplitChannel.h @@ -10,7 +10,9 @@ #ifndef WITHOUT_TEXT_LOG namespace DB { - class TextLog; + template class SystemLogQueue; + struct TextLogElement; + using TextLogQueue = SystemLogQueue; } #endif @@ -31,7 +33,7 @@ public: void addChannel(Poco::AutoPtr channel, const std::string & name); #ifndef WITHOUT_TEXT_LOG - void addTextLog(std::shared_ptr log, int max_priority); + void addTextLog(std::shared_ptr log_queue, int max_priority); #endif void setLevel(const std::string & name, int level); @@ -45,10 +47,8 @@ private: using ExtendedChannelPtrPair = std::pair; std::map channels; - std::mutex text_log_mutex; - #ifndef WITHOUT_TEXT_LOG - std::weak_ptr text_log; + std::weak_ptr text_log; std::atomic text_log_max_priority = -1; #endif }; diff --git a/src/Parsers/ASTAlterNamedCollectionQuery.cpp b/src/Parsers/ASTAlterNamedCollectionQuery.cpp index 7e95147ad75..6363a7306bd 100644 --- a/src/Parsers/ASTAlterNamedCollectionQuery.cpp +++ b/src/Parsers/ASTAlterNamedCollectionQuery.cpp @@ -15,6 +15,8 @@ ASTPtr ASTAlterNamedCollectionQuery::clone() const void ASTAlterNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "Alter NAMED COLLECTION "; + if (if_exists) + settings.ostr << "IF EXISTS "; settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); formatOnCluster(settings); if (!changes.empty()) diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 61e5903fad5..955320c318c 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -13,7 +13,7 @@ namespace ErrorCodes String ASTAlterCommand::getID(char delim) const { - return String("AlterCommand") + delim + typeToString(type); + return fmt::format("AlterCommand{}{}", delim, type); } ASTPtr ASTAlterCommand::clone() const @@ -80,53 +80,6 @@ ASTPtr ASTAlterCommand::clone() const return res; } -const char * ASTAlterCommand::typeToString(ASTAlterCommand::Type type) -{ - switch (type) - { - case ADD_COLUMN: return "ADD_COLUMN"; - case DROP_COLUMN: return "DROP_COLUMN"; - case MODIFY_COLUMN: return "MODIFY_COLUMN"; - case COMMENT_COLUMN: return "COMMENT_COLUMN"; - case RENAME_COLUMN: return "RENAME_COLUMN"; - case MATERIALIZE_COLUMN: return "MATERIALIZE_COLUMN"; - case MODIFY_ORDER_BY: return "MODIFY_ORDER_BY"; - case MODIFY_SAMPLE_BY: return "MODIFY_SAMPLE_BY"; - case MODIFY_TTL: return "MODIFY_TTL"; - case MATERIALIZE_TTL: return "MATERIALIZE_TTL"; - case MODIFY_SETTING: return "MODIFY_SETTING"; - case RESET_SETTING: return "RESET_SETTING"; - case MODIFY_QUERY: return "MODIFY_QUERY"; - case REMOVE_TTL: return "REMOVE_TTL"; - case REMOVE_SAMPLE_BY: return "REMOVE_SAMPLE_BY"; - case ADD_INDEX: return "ADD_INDEX"; - case DROP_INDEX: return "DROP_INDEX"; - case MATERIALIZE_INDEX: return "MATERIALIZE_INDEX"; - case ADD_CONSTRAINT: return "ADD_CONSTRAINT"; - case DROP_CONSTRAINT: return "DROP_CONSTRAINT"; - case ADD_PROJECTION: return "ADD_PROJECTION"; - case DROP_PROJECTION: return "DROP_PROJECTION"; - case MATERIALIZE_PROJECTION: return "MATERIALIZE_PROJECTION"; - case DROP_PARTITION: return "DROP_PARTITION"; - case DROP_DETACHED_PARTITION: return "DROP_DETACHED_PARTITION"; - case ATTACH_PARTITION: return "ATTACH_PARTITION"; - case MOVE_PARTITION: return "MOVE_PARTITION"; - case REPLACE_PARTITION: return "REPLACE_PARTITION"; - case FETCH_PARTITION: return "FETCH_PARTITION"; - case FREEZE_PARTITION: return "FREEZE_PARTITION"; - case FREEZE_ALL: return "FREEZE_ALL"; - case UNFREEZE_PARTITION: return "UNFREEZE_PARTITION"; - case UNFREEZE_ALL: return "UNFREEZE_ALL"; - case DELETE: return "DELETE"; - case UPDATE: return "UPDATE"; - case NO_TYPE: return "NO_TYPE"; - case LIVE_VIEW_REFRESH: return "LIVE_VIEW_REFRESH"; - case MODIFY_DATABASE_SETTING: return "MODIFY_DATABASE_SETTING"; - case MODIFY_COMMENT: return "MODIFY_COMMENT"; - } - UNREACHABLE(); -} - void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { if (type == ASTAlterCommand::ADD_COLUMN) diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 1400113fa9c..30cf0cac4ce 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -208,8 +208,6 @@ public: ASTPtr clone() const override; - static const char * typeToString(Type type); - protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/src/Parsers/ASTCreateIndexQuery.cpp b/src/Parsers/ASTCreateIndexQuery.cpp index 50470fbc1e4..17d4b9a9d58 100644 --- a/src/Parsers/ASTCreateIndexQuery.cpp +++ b/src/Parsers/ASTCreateIndexQuery.cpp @@ -38,7 +38,7 @@ void ASTCreateIndexQuery::formatQueryImpl(const FormatSettings & settings, Forma settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str; - settings.ostr << "CREATE INDEX " << (if_not_exists ? "IF NOT EXISTS " : ""); + settings.ostr << "CREATE " << (unique ? "UNIQUE " : "") << "INDEX " << (if_not_exists ? "IF NOT EXISTS " : ""); index_name->formatImpl(settings, state, frame); settings.ostr << " ON "; @@ -56,8 +56,7 @@ void ASTCreateIndexQuery::formatQueryImpl(const FormatSettings & settings, Forma formatOnCluster(settings); - if (!cluster.empty()) - settings.ostr << " "; + settings.ostr << " "; index_decl->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTCreateIndexQuery.h b/src/Parsers/ASTCreateIndexQuery.h index 424a0e493d9..b7577f2634e 100644 --- a/src/Parsers/ASTCreateIndexQuery.h +++ b/src/Parsers/ASTCreateIndexQuery.h @@ -20,6 +20,7 @@ public: ASTPtr index_decl; bool if_not_exists{false}; + bool unique{false}; String getID(char delim) const override; diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.cpp b/src/Parsers/ASTCreateNamedCollectionQuery.cpp index 97e83541f05..45ef8565148 100644 --- a/src/Parsers/ASTCreateNamedCollectionQuery.cpp +++ b/src/Parsers/ASTCreateNamedCollectionQuery.cpp @@ -18,6 +18,8 @@ ASTPtr ASTCreateNamedCollectionQuery::clone() const void ASTCreateNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE NAMED COLLECTION "; + if (if_not_exists) + settings.ostr << "IF NOT EXISTS "; settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); formatOnCluster(settings); diff --git a/src/Parsers/ASTCreateNamedCollectionQuery.h b/src/Parsers/ASTCreateNamedCollectionQuery.h index 303d1901931..785aeb781b4 100644 --- a/src/Parsers/ASTCreateNamedCollectionQuery.h +++ b/src/Parsers/ASTCreateNamedCollectionQuery.h @@ -13,6 +13,7 @@ class ASTCreateNamedCollectionQuery : public IAST, public ASTQueryWithOnCluster public: std::string collection_name; SettingsChanges changes; + bool if_not_exists = false; String getID(char) const override { return "CreateNamedCollectionQuery"; } diff --git a/src/Parsers/ASTDropNamedCollectionQuery.cpp b/src/Parsers/ASTDropNamedCollectionQuery.cpp index 3b8568cfd70..e317681d33d 100644 --- a/src/Parsers/ASTDropNamedCollectionQuery.cpp +++ b/src/Parsers/ASTDropNamedCollectionQuery.cpp @@ -13,6 +13,8 @@ ASTPtr ASTDropNamedCollectionQuery::clone() const void ASTDropNamedCollectionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP NAMED COLLECTION "; + if (if_exists) + settings.ostr << "IF EXISTS "; settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(collection_name) << (settings.hilite ? hilite_none : ""); formatOnCluster(settings); } diff --git a/src/Parsers/ASTIndexDeclaration.cpp b/src/Parsers/ASTIndexDeclaration.cpp index d223661451e..12d59681cc3 100644 --- a/src/Parsers/ASTIndexDeclaration.cpp +++ b/src/Parsers/ASTIndexDeclaration.cpp @@ -13,8 +13,8 @@ ASTPtr ASTIndexDeclaration::clone() const auto res = std::make_shared(); res->name = name; - res->granularity = granularity; - + if (granularity) + res->granularity = granularity; if (expr) res->set(res->expr, expr->clone()); if (type) @@ -25,23 +25,37 @@ ASTPtr ASTIndexDeclaration::clone() const void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const { - if (part_of_create_index_query) + if (expr) { - s.ostr << "("; - expr->formatImpl(s, state, frame); - s.ostr << ")"; - } - else - { - s.ostr << backQuoteIfNeed(name); - s.ostr << " "; - expr->formatImpl(s, state, frame); + if (part_of_create_index_query) + { + if (expr->as()) + { + s.ostr << "("; + expr->formatImpl(s, state, frame); + s.ostr << ")"; + } + else + expr->formatImpl(s, state, frame); + } + else + { + s.ostr << backQuoteIfNeed(name); + s.ostr << " "; + expr->formatImpl(s, state, frame); + } } - s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); - type->formatImpl(s, state, frame); - s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : ""); - s.ostr << granularity; + if (type) + { + s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); + type->formatImpl(s, state, frame); + } + if (granularity) + { + s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : ""); + s.ostr << granularity; + } } } diff --git a/src/Parsers/ASTInsertQuery.h b/src/Parsers/ASTInsertQuery.h index 43780e27114..45fd3d97950 100644 --- a/src/Parsers/ASTInsertQuery.h +++ b/src/Parsers/ASTInsertQuery.h @@ -35,6 +35,8 @@ public: /// Data from buffer to insert after inlined one - may be nullptr. ReadBuffer * tail = nullptr; + bool async_insert_flush = false; + String getDatabase() const; String getTable() const; @@ -66,7 +68,7 @@ public: return res; } - QueryKind getQueryKind() const override { return QueryKind::Insert; } + QueryKind getQueryKind() const override { return async_insert_flush ? QueryKind::AsyncInsertFlush : QueryKind::Insert; } protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTLiteral.cpp b/src/Parsers/ASTLiteral.cpp index 5c76f6f33bf..4a9a3d8df5b 100644 --- a/src/Parsers/ASTLiteral.cpp +++ b/src/Parsers/ASTLiteral.cpp @@ -93,7 +93,7 @@ void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const void ASTLiteral::appendColumnNameImplLegacy(WriteBuffer & ostr) const { - /// 100 - just arbitrary value. + /// 100 - just arbitrary value. constexpr auto min_elements_for_hashing = 100; /// Special case for very large arrays. Instead of listing all elements, will use hash of them. @@ -118,9 +118,31 @@ void ASTLiteral::appendColumnNameImplLegacy(WriteBuffer & ostr) const } } +/// Use different rules for escaping backslashes and quotes +class FieldVisitorToStringPostgreSQL : public StaticVisitor +{ +public: + template + String operator() (const T & x) const { return visitor(x); } + +private: + FieldVisitorToString visitor; +}; + +template<> +String FieldVisitorToStringPostgreSQL::operator() (const String & x) const +{ + WriteBufferFromOwnString wb; + writeQuotedStringPostgreSQL(x, wb); + return wb.str(); +} + void ASTLiteral::formatImplWithoutAlias(const FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { - settings.ostr << applyVisitor(FieldVisitorToString(), value); + if (settings.literal_escaping_style == LiteralEscapingStyle::Regular) + settings.ostr << applyVisitor(FieldVisitorToString(), value); + else + settings.ostr << applyVisitor(FieldVisitorToStringPostgreSQL(), value); } } diff --git a/src/Parsers/ASTProjectionSelectQuery.cpp b/src/Parsers/ASTProjectionSelectQuery.cpp index da3d9286f0a..90d9ede7337 100644 --- a/src/Parsers/ASTProjectionSelectQuery.cpp +++ b/src/Parsers/ASTProjectionSelectQuery.cpp @@ -73,11 +73,11 @@ void ASTProjectionSelectQuery::formatImpl(const FormatSettings & s, FormatState if (orderBy()) { - /// Let's convert the ASTFunction into ASTExpressionList, which generates consistent format + /// Let's convert tuple ASTFunction into ASTExpressionList, which generates consistent format /// between GROUP BY and ORDER BY projection definition. s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY " << (s.hilite ? hilite_none : ""); ASTPtr order_by; - if (auto * func = orderBy()->as()) + if (auto * func = orderBy()->as(); func && func->name == "tuple") order_by = func->arguments; else { @@ -142,6 +142,14 @@ ASTPtr ASTProjectionSelectQuery::cloneToASTSelect() const } if (groupBy()) select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, groupBy()->clone()); + + auto settings_query = std::make_shared(); + SettingsChanges settings_changes; + settings_changes.insertSetting("optimize_aggregators_of_group_by_keys", false); + settings_changes.insertSetting("optimize_group_by_function_keys", false); + settings_query->changes = std::move(settings_changes); + settings_query->is_standalone = false; + select_query->setExpression(ASTSelectQuery::Expression::SETTINGS, std::move(settings_query)); return node; } diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index 9550752b1f3..48b4ae3c38d 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -5,7 +5,6 @@ #include #include -#include namespace DB { diff --git a/src/Parsers/ASTSetQuery.cpp b/src/Parsers/ASTSetQuery.cpp index 0b8d76dbb89..1b7b76fe231 100644 --- a/src/Parsers/ASTSetQuery.cpp +++ b/src/Parsers/ASTSetQuery.cpp @@ -64,4 +64,14 @@ void ASTSetQuery::formatImpl(const FormatSettings & format, FormatState &, Forma } } +void ASTSetQuery::appendColumnName(WriteBuffer & ostr) const +{ + Hash hash = getTreeHash(); + + writeCString("__settings_", ostr); + writeText(hash.low64, ostr); + ostr.write('_'); + writeText(hash.high64, ostr); +} + } diff --git a/src/Parsers/ASTSetQuery.h b/src/Parsers/ASTSetQuery.h index 40abe2de31d..beed052c79a 100644 --- a/src/Parsers/ASTSetQuery.h +++ b/src/Parsers/ASTSetQuery.h @@ -37,6 +37,9 @@ public: void updateTreeHashImpl(SipHash & hash_state) const override; QueryKind getQueryKind() const override { return QueryKind::Set; } + + void appendColumnName(WriteBuffer & ostr) const override; + void appendColumnNameWithoutAlias(WriteBuffer & ostr) const override { return appendColumnName(ostr); } }; } diff --git a/src/Parsers/ASTShowTablesQuery.cpp b/src/Parsers/ASTShowTablesQuery.cpp index 7b4e052bc86..03ae96fa288 100644 --- a/src/Parsers/ASTShowTablesQuery.cpp +++ b/src/Parsers/ASTShowTablesQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -10,10 +11,20 @@ ASTPtr ASTShowTablesQuery::clone() const { auto res = std::make_shared(*this); res->children.clear(); + if (from) + res->set(res->from, from->clone()); + cloneOutputOptions(*res); return res; } +String ASTShowTablesQuery::getFrom() const +{ + String name; + tryGetIdentifierNameInto(from, name); + return name; +} + void ASTShowTablesQuery::formatLike(const FormatSettings & settings) const { if (!like.empty()) @@ -72,9 +83,11 @@ void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, Format settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") << (dictionaries ? "DICTIONARIES" : "TABLES") << (settings.hilite ? hilite_none : ""); - if (!from.empty()) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") - << backQuoteIfNeed(from); + if (from) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : ""); + from->formatImpl(settings, state, frame); + } formatLike(settings); diff --git a/src/Parsers/ASTShowTablesQuery.h b/src/Parsers/ASTShowTablesQuery.h index 2878df54bcc..2fd4c3dac96 100644 --- a/src/Parsers/ASTShowTablesQuery.h +++ b/src/Parsers/ASTShowTablesQuery.h @@ -24,8 +24,9 @@ public: bool caches = false; bool full = false; + IAST * from; + String cluster_str; - String from; String like; bool not_like = false; @@ -38,6 +39,8 @@ public: ASTPtr clone() const override; QueryKind getQueryKind() const override { return QueryKind::Show; } + String getFrom() const; + protected: void formatLike(const FormatSettings & settings) const; void formatLimit(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const; diff --git a/src/Parsers/ASTSubquery.cpp b/src/Parsers/ASTSubquery.cpp index a3408f12330..92adad666ed 100644 --- a/src/Parsers/ASTSubquery.cpp +++ b/src/Parsers/ASTSubquery.cpp @@ -19,11 +19,9 @@ void ASTSubquery::appendColumnNameImpl(WriteBuffer & ostr) const } else { - Hash hash = getTreeHash(); + const auto hash = getTreeHash(); writeCString("__subquery_", ostr); - writeText(hash.first, ostr); - ostr.write('_'); - writeText(hash.second, ostr); + writeString(toString(hash), ostr); } } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index a91449ff035..fb10474a4d4 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -162,7 +162,9 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, || type == Type::STOP_REPLICATION_QUEUES || type == Type::START_REPLICATION_QUEUES || type == Type::STOP_DISTRIBUTED_SENDS - || type == Type::START_DISTRIBUTED_SENDS) + || type == Type::START_DISTRIBUTED_SENDS + || type == Type::STOP_PULLING_REPLICATION_LOG + || type == Type::START_PULLING_REPLICATION_LOG) { if (table) print_database_table(); @@ -210,7 +212,15 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, else if (type == Type::DROP_FILESYSTEM_CACHE) { if (!filesystem_cache_name.empty()) + { settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_name; + if (!key_to_drop.empty()) + { + settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << key_to_drop; + if (offset_to_drop.has_value()) + settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << offset_to_drop.value(); + } + } } else if (type == Type::UNFREEZE) { @@ -220,6 +230,17 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, { settings.ostr << (settings.hilite ? hilite_none : ""); } + else if (type == Type::START_LISTEN || type == Type::STOP_LISTEN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " " << ServerType::serverTypeToString(server_type.type) + << (settings.hilite ? hilite_none : ""); + + if (server_type.type == ServerType::CUSTOM) + { + settings.ostr << (settings.hilite ? hilite_identifier : "") << " " << backQuoteIfNeed(server_type.custom_name); + } + + } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 52b3b79b16e..9d15e7cacee 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "config.h" @@ -35,8 +36,8 @@ public: #if USE_AWS_S3 DROP_S3_CLIENT_CACHE, #endif - STOP_LISTEN_QUERIES, - START_LISTEN_QUERIES, + STOP_LISTEN, + START_LISTEN, RESTART_REPLICAS, RESTART_REPLICA, RESTORE_REPLICA, @@ -71,6 +72,7 @@ public: START_REPLICATION_QUEUES, FLUSH_LOGS, FLUSH_DISTRIBUTED, + FLUSH_ASYNC_INSERT_QUEUE, STOP_DISTRIBUTED_SENDS, START_DISTRIBUTED_SENDS, START_THREAD_FUZZER, @@ -78,6 +80,8 @@ public: UNFREEZE, ENABLE_FAILPOINT, DISABLE_FAILPOINT, + STOP_PULLING_REPLICATION_LOG, + START_PULLING_REPLICATION_LOG, END }; @@ -106,6 +110,8 @@ public: UInt64 seconds{}; String filesystem_cache_name; + std::string key_to_drop; + std::optional offset_to_drop; String backup_name; @@ -115,6 +121,8 @@ public: SyncReplicaMode sync_replica_mode = SyncReplicaMode::DEFAULT; + ServerType server_type; + String getID(char) const override { return "SYSTEM query"; } ASTPtr clone() const override diff --git a/src/Parsers/ASTUseQuery.h b/src/Parsers/ASTUseQuery.h index f767a6bbdb7..873a316e653 100644 --- a/src/Parsers/ASTUseQuery.h +++ b/src/Parsers/ASTUseQuery.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -14,19 +15,34 @@ namespace DB class ASTUseQuery : public IAST { public: - String database; + IAST * database; + + String getDatabase() const + { + String name; + tryGetIdentifierNameInto(database, name); + return name; + } /** Get the text that identifies this element. */ - String getID(char delim) const override { return "UseQuery" + (delim + database); } + String getID(char delim) const override { return "UseQuery" + (delim + getDatabase()); } - ASTPtr clone() const override { return std::make_shared(*this); } + ASTPtr clone() const override + { + auto res = std::make_shared(*this); + res->children.clear(); + if (database) + res->set(res->database, database->clone()); + return res; + } QueryKind getQueryKind() const override { return QueryKind::Use; } protected: - void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : ""); + database->formatImpl(settings, state, frame); } }; diff --git a/src/Parsers/Access/ASTCreateQuotaQuery.cpp b/src/Parsers/Access/ASTCreateQuotaQuery.cpp index 56abedf5235..091b62b0a9f 100644 --- a/src/Parsers/Access/ASTCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ASTCreateQuotaQuery.cpp @@ -170,6 +170,12 @@ void ASTCreateQuotaQuery::formatImpl(const FormatSettings & settings, FormatStat settings.ostr << (settings.hilite ? hilite_keyword : "") << " OR REPLACE" << (settings.hilite ? hilite_none : ""); formatNames(names, settings); + + if (!storage_name.empty()) + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") + << " IN " << (settings.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(settings); if (!new_name.empty()) diff --git a/src/Parsers/Access/ASTCreateQuotaQuery.h b/src/Parsers/Access/ASTCreateQuotaQuery.h index f5eb59800ec..aecbbb00f9a 100644 --- a/src/Parsers/Access/ASTCreateQuotaQuery.h +++ b/src/Parsers/Access/ASTCreateQuotaQuery.h @@ -38,6 +38,7 @@ public: Strings names; String new_name; std::optional key_type; + String storage_name; struct Limits { diff --git a/src/Parsers/Access/ASTCreateRoleQuery.cpp b/src/Parsers/Access/ASTCreateRoleQuery.cpp index d624b9a9157..eeeb34c97e4 100644 --- a/src/Parsers/Access/ASTCreateRoleQuery.cpp +++ b/src/Parsers/Access/ASTCreateRoleQuery.cpp @@ -71,6 +71,12 @@ void ASTCreateRoleQuery::formatImpl(const FormatSettings & format, FormatState & format.ostr << (format.hilite ? hilite_keyword : "") << " OR REPLACE" << (format.hilite ? hilite_none : ""); formatNames(names, format); + + if (!storage_name.empty()) + format.ostr << (format.hilite ? IAST::hilite_keyword : "") + << " IN " << (format.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(format); if (!new_name.empty()) diff --git a/src/Parsers/Access/ASTCreateRoleQuery.h b/src/Parsers/Access/ASTCreateRoleQuery.h index 42d1a4031b6..4e465553164 100644 --- a/src/Parsers/Access/ASTCreateRoleQuery.h +++ b/src/Parsers/Access/ASTCreateRoleQuery.h @@ -28,6 +28,7 @@ public: Strings names; String new_name; + String storage_name; std::shared_ptr settings; diff --git a/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp b/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp index ca888be2cfe..e95959703ee 100644 --- a/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp +++ b/src/Parsers/Access/ASTCreateRowPolicyQuery.cpp @@ -168,6 +168,11 @@ void ASTCreateRowPolicyQuery::formatImpl(const FormatSettings & settings, Format settings.ostr << " "; names->format(settings); + if (!storage_name.empty()) + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") + << " IN " << (settings.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(settings); assert(names->cluster.empty()); diff --git a/src/Parsers/Access/ASTCreateRowPolicyQuery.h b/src/Parsers/Access/ASTCreateRowPolicyQuery.h index 32d98fab822..3f2418e7287 100644 --- a/src/Parsers/Access/ASTCreateRowPolicyQuery.h +++ b/src/Parsers/Access/ASTCreateRowPolicyQuery.h @@ -35,6 +35,7 @@ public: bool if_exists = false; bool if_not_exists = false; bool or_replace = false; + String storage_name; std::shared_ptr names; String new_short_name; diff --git a/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp index 56ddef433ef..3379486d789 100644 --- a/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp +++ b/src/Parsers/Access/ASTCreateSettingsProfileQuery.cpp @@ -81,6 +81,12 @@ void ASTCreateSettingsProfileQuery::formatImpl(const FormatSettings & format, Fo format.ostr << (format.hilite ? hilite_keyword : "") << " OR REPLACE" << (format.hilite ? hilite_none : ""); formatNames(names, format); + + if (!storage_name.empty()) + format.ostr << (format.hilite ? IAST::hilite_keyword : "") + << " IN " << (format.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(format); if (!new_name.empty()) diff --git a/src/Parsers/Access/ASTCreateSettingsProfileQuery.h b/src/Parsers/Access/ASTCreateSettingsProfileQuery.h index c1a64998f29..be01aae1e26 100644 --- a/src/Parsers/Access/ASTCreateSettingsProfileQuery.h +++ b/src/Parsers/Access/ASTCreateSettingsProfileQuery.h @@ -28,6 +28,7 @@ public: bool if_exists = false; bool if_not_exists = false; bool or_replace = false; + String storage_name; Strings names; String new_name; diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index 0611545adf0..b7290c16013 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -208,6 +208,11 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState & format.ostr << " "; names->format(format); + if (!storage_name.empty()) + format.ostr << (format.hilite ? IAST::hilite_keyword : "") + << " IN " << (format.hilite ? IAST::hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(format); if (new_name) diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h index 62ddbfd0040..06940e67d91 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.h +++ b/src/Parsers/Access/ASTCreateUserQuery.h @@ -45,6 +45,7 @@ public: std::shared_ptr names; std::optional new_name; + String storage_name; std::shared_ptr auth_data; diff --git a/src/Parsers/Access/ASTDropAccessEntityQuery.cpp b/src/Parsers/Access/ASTDropAccessEntityQuery.cpp index 88f2d7bce63..bcd7105d0e9 100644 --- a/src/Parsers/Access/ASTDropAccessEntityQuery.cpp +++ b/src/Parsers/Access/ASTDropAccessEntityQuery.cpp @@ -53,6 +53,11 @@ void ASTDropAccessEntityQuery::formatImpl(const FormatSettings & settings, Forma else formatNames(names, settings); + if (!storage_name.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") + << " FROM " << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(storage_name); + formatOnCluster(settings); } diff --git a/src/Parsers/Access/ASTDropAccessEntityQuery.h b/src/Parsers/Access/ASTDropAccessEntityQuery.h index f3a065c50df..32f4a8f8047 100644 --- a/src/Parsers/Access/ASTDropAccessEntityQuery.h +++ b/src/Parsers/Access/ASTDropAccessEntityQuery.h @@ -21,6 +21,7 @@ public: AccessEntityType type; bool if_exists = false; Strings names; + String storage_name; std::shared_ptr row_policy_names; String getID(char) const override; diff --git a/src/Parsers/Access/ASTMoveAccessEntityQuery.cpp b/src/Parsers/Access/ASTMoveAccessEntityQuery.cpp new file mode 100644 index 00000000000..285f07854c0 --- /dev/null +++ b/src/Parsers/Access/ASTMoveAccessEntityQuery.cpp @@ -0,0 +1,64 @@ +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ + void formatNames(const Strings & names, const IAST::FormatSettings & settings) + { + bool need_comma = false; + for (const auto & name : names) + { + if (std::exchange(need_comma, true)) + settings.ostr << ','; + settings.ostr << ' ' << backQuoteIfNeed(name); + } + } +} + +String ASTMoveAccessEntityQuery::getID(char) const +{ + return String("MOVE ") + toString(type) + " query"; +} + +ASTPtr ASTMoveAccessEntityQuery::clone() const +{ + auto res = std::make_shared(*this); + + if (row_policy_names) + res->row_policy_names = std::static_pointer_cast(row_policy_names->clone()); + + return res; +} + +void ASTMoveAccessEntityQuery::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << (settings.hilite ? hilite_keyword : "") + << "MOVE " << AccessEntityTypeInfo::get(type).name + << (settings.hilite ? hilite_none : ""); + + if (type == AccessEntityType::ROW_POLICY) + { + settings.ostr << " "; + row_policy_names->format(settings); + } + else + formatNames(names, settings); + + settings.ostr << (settings.hilite ? hilite_keyword : "") + << " TO " << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(storage_name); + + formatOnCluster(settings); +} + +void ASTMoveAccessEntityQuery::replaceEmptyDatabase(const String & current_database) const +{ + if (row_policy_names) + row_policy_names->replaceEmptyDatabase(current_database); +} +} diff --git a/src/Parsers/Access/ASTMoveAccessEntityQuery.h b/src/Parsers/Access/ASTMoveAccessEntityQuery.h new file mode 100644 index 00000000000..aa2b3b0f98c --- /dev/null +++ b/src/Parsers/Access/ASTMoveAccessEntityQuery.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +class ASTRowPolicyNames; + +/** MOVE {USER | ROLE | QUOTA | [ROW] POLICY | [SETTINGS] PROFILE} [IF EXISTS] name [,...] [ON [database.]table [,...]] TO storage_name + */ +class ASTMoveAccessEntityQuery : public IAST, public ASTQueryWithOnCluster +{ +public: + AccessEntityType type; + Strings names; + std::shared_ptr row_policy_names; + + String storage_name; + + String getID(char) const override; + ASTPtr clone() const override; + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster(clone()); } + + void replaceEmptyDatabase(const String & current_database) const; + + QueryKind getQueryKind() const override { return QueryKind::Move; } +}; +} diff --git a/src/Parsers/Access/ParserCreateQuotaQuery.cpp b/src/Parsers/Access/ParserCreateQuotaQuery.cpp index 2330ba7771c..bc3512e3fc0 100644 --- a/src/Parsers/Access/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ParserCreateQuotaQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -108,7 +109,8 @@ namespace }); } - template || std::is_same_v>> + template + requires std::same_as || std::same_as T fieldToNumber(const Field & f) { if (f.getType() == Field::Types::String) @@ -288,6 +290,7 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe std::optional key_type; std::vector all_limits; String cluster; + String storage_name; while (true) { @@ -310,6 +313,9 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -332,6 +338,7 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->key_type = key_type; query->all_limits = std::move(all_limits); query->roles = std::move(roles); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserCreateRoleQuery.cpp b/src/Parsers/Access/ParserCreateRoleQuery.cpp index da9749958ee..99a97f6901d 100644 --- a/src/Parsers/Access/ParserCreateRoleQuery.cpp +++ b/src/Parsers/Access/ParserCreateRoleQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -91,6 +92,7 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec String new_name; std::shared_ptr settings; String cluster; + String storage_name; while (true) { @@ -110,6 +112,9 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -125,6 +130,7 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->names = std::move(names); query->new_name = std::move(new_name); query->settings = std::move(settings); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp b/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp index 2c25fc14e7d..f9e9466e35d 100644 --- a/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp +++ b/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -245,6 +246,7 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & String new_short_name; std::optional is_restrictive; std::vector> filters; + String storage_name; while (true) { @@ -271,6 +273,9 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -294,6 +299,7 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->is_restrictive = is_restrictive; query->filters = std::move(filters); query->roles = std::move(roles); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp b/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp index c58a3035dc6..fcb6ebd8806 100644 --- a/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp +++ b/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -111,6 +112,7 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec String new_name; std::shared_ptr settings; String cluster; + String storage_name; while (true) { @@ -130,6 +132,9 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -152,6 +157,7 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec query->new_name = std::move(new_name); query->settings = std::move(settings); query->to_roles = std::move(to_roles); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 0344fb99c04..577b6772ab2 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -414,6 +415,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec std::shared_ptr grantees; std::shared_ptr default_database; String cluster; + String storage_name; while (true) { @@ -480,6 +482,9 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } } + if (storage_name.empty() && ParserKeyword{"IN"}.ignore(pos, expected) && parseAccessStorageName(pos, expected, storage_name)) + continue; + break; } @@ -514,6 +519,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->settings = std::move(settings); query->grantees = std::move(grantees); query->default_database = std::move(default_database); + query->storage_name = std::move(storage_name); if (query->auth_data) query->children.push_back(query->auth_data); diff --git a/src/Parsers/Access/ParserDropAccessEntityQuery.cpp b/src/Parsers/Access/ParserDropAccessEntityQuery.cpp index d21164bc1a2..7fd34a6f8e4 100644 --- a/src/Parsers/Access/ParserDropAccessEntityQuery.cpp +++ b/src/Parsers/Access/ParserDropAccessEntityQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -53,6 +54,7 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & Strings names; std::shared_ptr row_policy_names; + String storage_name; String cluster; if ((type == AccessEntityType::USER) || (type == AccessEntityType::ROLE)) @@ -76,6 +78,9 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; } + if (ParserKeyword{"FROM"}.ignore(pos, expected)) + parseAccessStorageName(pos, expected, storage_name); + if (cluster.empty()) parseOnCluster(pos, expected, cluster); @@ -87,6 +92,7 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->cluster = std::move(cluster); query->names = std::move(names); query->row_policy_names = std::move(row_policy_names); + query->storage_name = std::move(storage_name); return true; } diff --git a/src/Parsers/Access/ParserMoveAccessEntityQuery.cpp b/src/Parsers/Access/ParserMoveAccessEntityQuery.cpp new file mode 100644 index 00000000000..798e200bfb3 --- /dev/null +++ b/src/Parsers/Access/ParserMoveAccessEntityQuery.cpp @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ + bool parseEntityType(IParserBase::Pos & pos, Expected & expected, AccessEntityType & type) + { + for (auto i : collections::range(AccessEntityType::MAX)) + { + const auto & type_info = AccessEntityTypeInfo::get(i); + if (ParserKeyword{type_info.name}.ignore(pos, expected) + || (!type_info.alias.empty() && ParserKeyword{type_info.alias}.ignore(pos, expected))) + { + type = i; + return true; + } + } + return false; + } + + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } +} + + +bool ParserMoveAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (!ParserKeyword{"MOVE"}.ignore(pos, expected)) + return false; + + AccessEntityType type; + if (!parseEntityType(pos, expected, type)) + return false; + + Strings names; + std::shared_ptr row_policy_names; + String storage_name; + String cluster; + + if ((type == AccessEntityType::USER) || (type == AccessEntityType::ROLE)) + { + if (!parseUserNames(pos, expected, names)) + return false; + } + else if (type == AccessEntityType::ROW_POLICY) + { + ParserRowPolicyNames parser; + ASTPtr ast; + parser.allowOnCluster(); + if (!parser.parse(pos, ast, expected)) + return false; + row_policy_names = typeid_cast>(ast); + cluster = std::exchange(row_policy_names->cluster, ""); + } + else + { + if (!parseIdentifiersOrStringLiterals(pos, expected, names)) + return false; + } + + if (!ParserKeyword{"TO"}.ignore(pos, expected) || !parseAccessStorageName(pos, expected, storage_name)) + return false; + + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + + auto query = std::make_shared(); + node = query; + + query->type = type; + query->cluster = std::move(cluster); + query->names = std::move(names); + query->row_policy_names = std::move(row_policy_names); + query->storage_name = std::move(storage_name); + + return true; +} +} diff --git a/src/Parsers/Access/ParserMoveAccessEntityQuery.h b/src/Parsers/Access/ParserMoveAccessEntityQuery.h new file mode 100644 index 00000000000..3accec883dd --- /dev/null +++ b/src/Parsers/Access/ParserMoveAccessEntityQuery.h @@ -0,0 +1,17 @@ +#pragma once + +#include + + +namespace DB +{ +/** Parses queries like + * MOVE {USER | ROLE | QUOTA | [ROW] POLICY | [SETTINGS] PROFILE} [IF EXISTS] name [,...] [ON [database.]table [,...]] TO storage_name + */ +class ParserMoveAccessEntityQuery : public IParserBase +{ +protected: + const char * getName() const override { return "MOVE access entity query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/src/Parsers/Access/parseUserName.h b/src/Parsers/Access/parseUserName.h index fbad78e7bce..10c548d8826 100644 --- a/src/Parsers/Access/parseUserName.h +++ b/src/Parsers/Access/parseUserName.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index d5cf2bd4784..d74137f8a91 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -4,8 +4,12 @@ add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) add_headers_and_sources(clickhouse_parsers ./Kusto) +add_headers_and_sources(clickhouse_parsers ./PRQL) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) +if (TARGET ch_rust::prql) + target_link_libraries(clickhouse_parsers PRIVATE ch_rust::prql) +endif () if (USE_DEBUG_HELPERS) # CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc. diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 3a7e8790bb4..587908eb49c 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -243,6 +243,38 @@ bool ParserIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } +bool ParserTableAsStringLiteralIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::StringLiteral) + return false; + + ReadBufferFromMemory in(pos->begin, pos->size()); + String s; + + if (!tryReadQuotedStringInto(s, in)) + { + expected.add(pos, "string literal"); + return false; + } + + if (in.count() != pos->size()) + { + expected.add(pos, "string literal"); + return false; + } + + if (s.empty()) + { + expected.add(pos, "non-empty string literal"); + return false; + } + + node = std::make_shared(s); + ++pos; + return true; +} + + bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr id_list; @@ -1900,6 +1932,39 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } +bool ParserMySQLComment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::QuotedIdentifier && pos->type != TokenType::StringLiteral) + return false; + String s; + ReadBufferFromMemory in(pos->begin, pos->size()); + try + { + if (pos->type == TokenType::StringLiteral) + readQuotedStringWithSQLStyle(s, in); + else + readDoubleQuotedStringWithSQLStyle(s, in); + } + catch (const Exception &) + { + expected.add(pos, "string literal or double quoted string"); + return false; + } + + if (in.count() != pos->size()) + { + expected.add(pos, "string literal or double quoted string"); + return false; + } + + auto literal = std::make_shared(s); + literal->begin = pos; + literal->end = ++pos; + node = literal; + return true; +} + + bool ParserMySQLGlobalVariable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (pos->type != TokenType::DoubleAt) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index cc88faf2653..2d06b26c6dc 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -34,6 +34,19 @@ protected: }; +/** An identifier for tables written as string literal, for example, 'mytable.avro' + */ +class ParserTableAsStringLiteralIdentifier : public IParserBase +{ +public: + explicit ParserTableAsStringLiteralIdentifier() {} + +protected: + const char * getName() const override { return "string literal table identifier"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + /** An identifier, possibly containing a dot, for example, x_yz123 or `something special` or Hits.EventTime, * possibly with UUID clause like `db name`.`table name` UUID 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' */ @@ -367,6 +380,21 @@ protected: }; +/** MySQL comment: + * CREATE TABLE t ( + * i INT PRIMARY KEY, + * first_name VARCHAR(255) COMMENT 'FIRST_NAME', + * last_name VARCHAR(255) COMMENT "LAST_NAME" + * ) + */ +class ParserMySQLComment : public IParserBase +{ +protected: + const char * getName() const override { return "MySQL comment parser"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + /** MySQL-style global variable: @@var */ class ParserMySQLGlobalVariable : public IParserBase diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 0138372ce89..a494a528cd2 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -118,9 +118,7 @@ IAST::Hash IAST::getTreeHash() const { SipHash hash_state; updateTreeHash(hash_state); - IAST::Hash res; - hash_state.get128(res); - return res; + return getSipHash128AsPair(hash_state); } @@ -170,7 +168,9 @@ size_t IAST::checkDepthImpl(size_t max_depth) const String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const { WriteBufferFromOwnString buf; - format({buf, one_line, show_secrets}); + FormatSettings settings(buf, one_line); + settings.show_secrets = show_secrets; + format(settings); return wipeSensitiveDataAndCutToLength(buf.str(), max_length); } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index aa5302a15b9..812fd082476 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -78,7 +79,7 @@ public: /** Get hash code, identifying this element and its subtree. */ - using Hash = std::pair; + using Hash = CityHash_v1_0_2::uint128; Hash getTreeHash() const; void updateTreeHash(SipHash & hash_state) const; virtual void updateTreeHashImpl(SipHash & hash_state) const; @@ -191,27 +192,43 @@ public: struct FormatSettings { WriteBuffer & ostr; - bool hilite = false; bool one_line; - bool always_quote_identifiers = false; - IdentifierQuotingStyle identifier_quoting_style = IdentifierQuotingStyle::Backticks; - bool show_secrets = true; /// Show secret parts of the AST (e.g. passwords, encryption keys). + bool hilite; + bool always_quote_identifiers; + IdentifierQuotingStyle identifier_quoting_style; + bool show_secrets; /// Show secret parts of the AST (e.g. passwords, encryption keys). + char nl_or_ws; /// Newline or whitespace. + LiteralEscapingStyle literal_escaping_style; - // Newline or whitespace. - char nl_or_ws; - - FormatSettings(WriteBuffer & ostr_, bool one_line_, bool show_secrets_ = true) - : ostr(ostr_), one_line(one_line_), show_secrets(show_secrets_) + explicit FormatSettings( + WriteBuffer & ostr_, + bool one_line_, + bool hilite_ = false, + bool always_quote_identifiers_ = false, + IdentifierQuotingStyle identifier_quoting_style_ = IdentifierQuotingStyle::Backticks, + bool show_secrets_ = true, + LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular) + : ostr(ostr_) + , one_line(one_line_) + , hilite(hilite_) + , always_quote_identifiers(always_quote_identifiers_) + , identifier_quoting_style(identifier_quoting_style_) + , show_secrets(show_secrets_) + , nl_or_ws(one_line ? ' ' : '\n') + , literal_escaping_style(literal_escaping_style_) { - nl_or_ws = one_line ? ' ' : '\n'; } FormatSettings(WriteBuffer & ostr_, const FormatSettings & other) - : ostr(ostr_), hilite(other.hilite), one_line(other.one_line), - always_quote_identifiers(other.always_quote_identifiers), identifier_quoting_style(other.identifier_quoting_style), - show_secrets(other.show_secrets) + : ostr(ostr_) + , one_line(other.one_line) + , hilite(other.hilite) + , always_quote_identifiers(other.always_quote_identifiers) + , identifier_quoting_style(other.identifier_quoting_style) + , show_secrets(other.show_secrets) + , nl_or_ws(other.nl_or_ws) + , literal_escaping_style(other.literal_escaping_style) { - nl_or_ws = one_line ? ' ' : '\n'; } void writeIdentifier(const String & name) const; @@ -290,6 +307,7 @@ public: Alter, Grant, Revoke, + Move, System, Set, Use, @@ -305,6 +323,7 @@ public: Commit, Rollback, SetTransactionSnapshot, + AsyncInsertFlush }; /// Return QueryKind of this AST query. virtual QueryKind getQueryKind() const { return QueryKind::None; } diff --git a/src/Parsers/Kusto/Formatters.cpp b/src/Parsers/Kusto/Formatters.cpp new file mode 100644 index 00000000000..f12af479445 --- /dev/null +++ b/src/Parsers/Kusto/Formatters.cpp @@ -0,0 +1,27 @@ +#include "Formatters.h" + +#include + +namespace DB +{ +std::string formatKQLTimespan(const Int64 ticks) +{ + static constexpr Int64 TICKS_PER_SECOND = 10000000; + static constexpr auto TICKS_PER_MINUTE = TICKS_PER_SECOND * 60; + static constexpr auto TICKS_PER_HOUR = TICKS_PER_MINUTE * 60; + static constexpr auto TICKS_PER_DAY = TICKS_PER_HOUR * 24; + + const auto abs_ticks = std::abs(ticks); + std::string result = ticks < 0 ? "-" : ""; + if (abs_ticks >= TICKS_PER_DAY) + result.append(std::format("{}.", abs_ticks / TICKS_PER_DAY)); + + result.append(std::format( + "{:02}:{:02}:{:02}", (abs_ticks / TICKS_PER_HOUR) % 24, (abs_ticks / TICKS_PER_MINUTE) % 60, (abs_ticks / TICKS_PER_SECOND) % 60)); + + if (const auto fractional_second = abs_ticks % TICKS_PER_SECOND) + result.append(std::format(".{:07}", fractional_second)); + + return result; +} +} diff --git a/src/Parsers/Kusto/Formatters.h b/src/Parsers/Kusto/Formatters.h new file mode 100644 index 00000000000..16f52baf941 --- /dev/null +++ b/src/Parsers/Kusto/Formatters.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +#include + +namespace DB +{ +std::string formatKQLTimespan(Int64 ticks); +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9796ae10c07..72e25cc3cf9 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -31,10 +31,10 @@ protected: not_endswith, endswith_cs, not_endswith_cs, - equal, //=~ - not_equal,//!~ - equal_cs, //= - not_equal_cs,//!= + equal, /// =~ + not_equal, /// !~ + equal_cs, /// = + not_equal_cs, /// != has, not_has, has_all, @@ -49,10 +49,10 @@ protected: not_hassuffix, hassuffix_cs, not_hassuffix_cs, - in_cs, //in - not_in_cs, //!in - in, //in~ - not_in ,//!in~ + in_cs, /// in + not_in_cs, /// !in + in, /// in~ + not_in, /// !in~ matches_regex, startswith, not_startswith, diff --git a/src/Parsers/LiteralEscapingStyle.h b/src/Parsers/LiteralEscapingStyle.h new file mode 100644 index 00000000000..10d4d84a85d --- /dev/null +++ b/src/Parsers/LiteralEscapingStyle.h @@ -0,0 +1,14 @@ +#pragma once + + +namespace DB +{ + +/// Method to escape single quotes. +enum class LiteralEscapingStyle +{ + Regular, /// Escape backslashes with backslash (\\) and quotes with backslash (\') + PostgreSQL, /// Do not escape backslashes (\), escape quotes with quote ('') +}; + +} diff --git a/src/Parsers/MySQL/ASTDeclareColumn.cpp b/src/Parsers/MySQL/ASTDeclareColumn.cpp index e585dcb670c..e5f2b7870e2 100644 --- a/src/Parsers/MySQL/ASTDeclareColumn.cpp +++ b/src/Parsers/MySQL/ASTDeclareColumn.cpp @@ -50,7 +50,7 @@ static inline bool parseColumnDeclareOptions(IParser::Pos & pos, ASTPtr & node, OptionDescribe("PRIMARY KEY", "primary_key", std::make_unique()), OptionDescribe("UNIQUE", "unique_key", std::make_unique()), OptionDescribe("KEY", "primary_key", std::make_unique()), - OptionDescribe("COMMENT", "comment", std::make_unique()), + OptionDescribe("COMMENT", "comment", std::make_unique()), OptionDescribe("CHARACTER SET", "charset_name", std::make_unique()), OptionDescribe("CHARSET", "charset", std::make_unique()), OptionDescribe("COLLATE", "collate", std::make_unique()), diff --git a/src/Parsers/PRQL/ParserPRQLQuery.cpp b/src/Parsers/PRQL/ParserPRQLQuery.cpp new file mode 100644 index 00000000000..b3733b727dc --- /dev/null +++ b/src/Parsers/PRQL/ParserPRQLQuery.cpp @@ -0,0 +1,86 @@ +#include +#include + +#include "Parsers/Lexer.h" +#include "config.h" + +#if USE_PRQL +# include +#endif + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; + extern const int SUPPORT_IS_DISABLED; +} + +bool ParserPRQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserSetQuery set_p; + + if (set_p.parse(pos, node, expected)) + return true; + +#if !USE_PRQL + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, "PRQL is not available. Rust code or PRQL itself may be disabled. Use another dialect!"); +#else + const auto * begin = pos->begin; + + // The same parsers are used in the client and the server, so the parser have to detect the end of a single query in case of multiquery queries + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + ++pos; + + const auto * end = pos->begin; + + uint8_t * sql_query_ptr{nullptr}; + uint64_t sql_query_size{0}; + + const auto res + = prql_to_sql(reinterpret_cast(begin), static_cast(end - begin), &sql_query_ptr, &sql_query_size); + + SCOPE_EXIT({ prql_free_pointer(sql_query_ptr); }); + + const auto * sql_query_char_ptr = reinterpret_cast(sql_query_ptr); + const auto * const original_sql_query_ptr = sql_query_char_ptr; + + if (res != 0) + { + throw Exception(ErrorCodes::SYNTAX_ERROR, "PRQL syntax error: '{}'", sql_query_char_ptr); + } + chassert(sql_query_size > 0); + + ParserQuery query_p(end, false); + String error_message; + node = tryParseQuery( + query_p, + sql_query_char_ptr, + sql_query_char_ptr + sql_query_size - 1, + error_message, + false, + "", + false, + max_query_size, + max_parser_depth); + + if (!node) + throw Exception( + ErrorCodes::SYNTAX_ERROR, + "Error while parsing the SQL query generated from PRQL query :'{}'.\nPRQL Query:'{}'\nSQL query: '{}'", + error_message, + std::string_view{begin, end}, + std::string_view(original_sql_query_ptr, original_sql_query_ptr + sql_query_size)); + + + return true; +#endif +} +} diff --git a/src/Parsers/PRQL/ParserPRQLQuery.h b/src/Parsers/PRQL/ParserPRQLQuery.h new file mode 100644 index 00000000000..4fc450df6b6 --- /dev/null +++ b/src/Parsers/PRQL/ParserPRQLQuery.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ +// Even when PRQL is disabled, it is not possible to exclude this parser because changing the dialect via `SET dialect = '...'` queries should succeed. +// Another solution would be disabling setting the dialect to PRQL, but it requires a lot of finicky conditional compiling around the Dialect setting enum. +// Therefore the decision, for now, is to use this parser even when PRQL is disabled to enable users to switch to another dialect. +class ParserPRQLQuery final : public IParserBase +{ +private: + // These fields are not used when PRQL is disabled at build time. + [[maybe_unused]] size_t max_query_size; + [[maybe_unused]] size_t max_parser_depth; + +public: + ParserPRQLQuery(size_t max_query_size_, size_t max_parser_depth_) : max_query_size{max_query_size_}, max_parser_depth{max_parser_depth_} + { + } + + const char * getName() const override { return "PRQL Statement"; } + +protected: + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; +} diff --git a/src/Parsers/ParserAlterNamedCollectionQuery.cpp b/src/Parsers/ParserAlterNamedCollectionQuery.cpp index 9108747ad82..8fb84f86246 100644 --- a/src/Parsers/ParserAlterNamedCollectionQuery.cpp +++ b/src/Parsers/ParserAlterNamedCollectionQuery.cpp @@ -13,8 +13,9 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod { ParserKeyword s_alter("ALTER"); ParserKeyword s_collection("NAMED COLLECTION"); + ParserKeyword s_if_exists("IF EXISTS"); + ParserKeyword s_on("ON"); ParserKeyword s_delete("DELETE"); - ParserIdentifier name_p; ParserSetQuery set_p; ParserToken s_comma(TokenType::Comma); @@ -32,10 +33,13 @@ bool ParserAlterNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & nod if (!s_collection.ignore(pos, expected)) return false; + if (s_if_exists.ignore(pos, expected)) + if_exists = true; + if (!name_p.parse(pos, collection_name, expected)) return false; - if (ParserKeyword{"ON"}.ignore(pos, expected)) + if (s_on.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index f231573b920..67051d84999 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -17,24 +17,36 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected { ParserKeyword s_type("TYPE"); ParserKeyword s_granularity("GRANULARITY"); - + ParserToken open(TokenType::OpeningRoundBracket); + ParserToken close(TokenType::ClosingRoundBracket); + ParserOrderByExpressionList order_list; ParserDataType data_type_p; ParserExpression expression_p; ParserUnsignedInteger granularity_p; ASTPtr expr; + ASTPtr order; ASTPtr type; ASTPtr granularity; /// Skip name parser for SQL-standard CREATE INDEX - if (!expression_p.parse(pos, expr, expected)) - return false; + if (expression_p.parse(pos, expr, expected)) + { + } + else if (open.ignore(pos, expected)) + { + if (!order_list.parse(pos, order, expected)) + return false; - if (!s_type.ignore(pos, expected)) - return false; + if (!close.ignore(pos, expected)) + return false; + } - if (!data_type_p.parse(pos, type, expected)) - return false; + if (s_type.ignore(pos, expected)) + { + if (!data_type_p.parse(pos, type, expected)) + return false; + } if (s_granularity.ignore(pos, expected)) { @@ -45,13 +57,14 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected auto index = std::make_shared(); index->part_of_create_index_query = true; index->set(index->expr, expr); - index->set(index->type, type); + if (type) + index->set(index->type, type); if (granularity) index->granularity = granularity->as().value.safeGet(); else { - if (index->type->name == "annoy") + if (index->type && index->type->name == "annoy") index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY; else index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY; @@ -67,6 +80,7 @@ bool ParserCreateIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect node = query; ParserKeyword s_create("CREATE"); + ParserKeyword s_unique("UNIQUE"); ParserKeyword s_index("INDEX"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); ParserKeyword s_on("ON"); @@ -78,10 +92,14 @@ bool ParserCreateIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect String cluster_str; bool if_not_exists = false; + bool unique = false; if (!s_create.ignore(pos, expected)) return false; + if (s_unique.ignore(pos, expected)) + unique = true; + if (!s_index.ignore(pos, expected)) return false; @@ -118,6 +136,7 @@ bool ParserCreateIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect query->children.push_back(index_decl); query->if_not_exists = if_not_exists; + query->unique = unique; query->cluster = cluster_str; if (query->database) diff --git a/src/Parsers/ParserCreateIndexQuery.h b/src/Parsers/ParserCreateIndexQuery.h index 3cb91cd03c6..701586d6e11 100644 --- a/src/Parsers/ParserCreateIndexQuery.h +++ b/src/Parsers/ParserCreateIndexQuery.h @@ -6,7 +6,7 @@ namespace DB { /** Query like this: - * CREATE INDEX [IF NOT EXISTS] name ON [db].name (expression) TYPE type GRANULARITY value + * CREATE [UNIQUE] INDEX [IF NOT EXISTS] name ON [db].name (expression) TYPE type GRANULARITY value */ class ParserCreateIndexQuery : public IParserBase diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 415d3321eb5..fb6dae248c0 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -502,7 +502,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_table("TABLE"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); - ParserCompoundIdentifier table_name_p(true, true); + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); ParserKeyword s_from("FROM"); ParserKeyword s_on("ON"); ParserToken s_dot(TokenType::Dot); @@ -740,7 +740,7 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ParserKeyword s_create("CREATE"); ParserKeyword s_attach("ATTACH"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); - ParserCompoundIdentifier table_name_p(true, true); + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); ParserKeyword s_as("AS"); ParserKeyword s_view("VIEW"); ParserKeyword s_live("LIVE"); @@ -878,7 +878,7 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_attach("ATTACH"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); - ParserCompoundIdentifier table_name_p(true); + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); ParserKeyword s_as("AS"); ParserKeyword s_view("VIEW"); ParserKeyword s_window("WINDOW"); @@ -1015,12 +1015,17 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->if_not_exists = if_not_exists; query->is_window_view = true; - StorageID table_id = table->as()->getTableId(); - query->setDatabase(table_id.database_name); - query->setTable(table_id.table_name); - query->uuid = table_id.uuid; + auto * table_id = table->as(); + query->database = table_id->getDatabase(); + query->table = table_id->getTable(); + query->uuid = table_id->uuid; query->cluster = cluster_str; + if (query->database) + query->children.push_back(query->database); + if (query->table) + query->children.push_back(query->table); + if (to_table) query->to_table_id = to_table->as()->getTableId(); @@ -1265,7 +1270,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserKeyword s_create("CREATE"); ParserKeyword s_attach("ATTACH"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); - ParserCompoundIdentifier table_name_p(true, true); + ParserCompoundIdentifier table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); + ParserCompoundIdentifier to_table_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ false); ParserKeyword s_as("AS"); ParserKeyword s_view("VIEW"); ParserKeyword s_materialized("MATERIALIZED"); @@ -1421,15 +1427,17 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_create("CREATE"); - ParserKeyword s_attach("ATTACH"); ParserKeyword s_named_collection("NAMED COLLECTION"); + ParserKeyword s_if_not_exists("IF NOT EXISTS"); + ParserKeyword s_on("ON"); ParserKeyword s_as("AS"); - - ParserToken s_comma(TokenType::Comma); ParserIdentifier name_p; + ParserToken s_comma(TokenType::Comma); + + String cluster_str; + bool if_not_exists = false; ASTPtr collection_name; - String cluster_str; if (!s_create.ignore(pos, expected)) return false; @@ -1437,10 +1445,13 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec if (!s_named_collection.ignore(pos, expected)) return false; + if (s_if_not_exists.ignore(pos, expected)) + if_not_exists = true; + if (!name_p.parse(pos, collection_name, expected)) return false; - if (ParserKeyword{"ON"}.ignore(pos, expected)) + if (s_on.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; @@ -1465,7 +1476,9 @@ bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expec auto query = std::make_shared(); tryGetIdentifierNameInto(collection_name, query->collection_name); + query->if_not_exists = if_not_exists; query->changes = changes; + query->cluster = std::move(cluster_str); node = query; return true; @@ -1480,7 +1493,7 @@ bool ParserCreateDictionaryQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, E ParserKeyword s_dictionary("DICTIONARY"); ParserKeyword s_if_not_exists("IF NOT EXISTS"); ParserKeyword s_on("ON"); - ParserCompoundIdentifier dict_name_p(true, true); + ParserCompoundIdentifier dict_name_p(/*table_name_with_optional_uuid*/ true, /*allow_query_parameter*/ true); ParserToken s_left_paren(TokenType::OpeningRoundBracket); ParserToken s_right_paren(TokenType::ClosingRoundBracket); ParserToken s_dot(TokenType::Dot); diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 09935e2b608..0a98923436c 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -548,6 +548,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; +/// CREATE NAMED COLLECTION name [ON CLUSTER cluster] class ParserCreateNamedCollectionQuery : public IParserBase { protected: diff --git a/src/Parsers/ParserDescribeTableQuery.cpp b/src/Parsers/ParserDescribeTableQuery.cpp index ad6d2c5bcc6..fcfc4799dbe 100644 --- a/src/Parsers/ParserDescribeTableQuery.cpp +++ b/src/Parsers/ParserDescribeTableQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -16,8 +17,10 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex ParserKeyword s_describe("DESCRIBE"); ParserKeyword s_desc("DESC"); ParserKeyword s_table("TABLE"); + ParserKeyword s_settings("SETTINGS"); ParserToken s_dot(TokenType::Dot); ParserIdentifier name_p; + ParserSetQuery parser_settings(true); ASTPtr database; ASTPtr table; @@ -29,12 +32,21 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex s_table.ignore(pos, expected); - ASTPtr table_expression; - if (!ParserTableExpression().parse(pos, table_expression, expected)) + if (!ParserTableExpression().parse(pos, query->table_expression, expected)) return false; - query->children.push_back(std::move(table_expression)); - query->table_expression = query->children.back(); + /// For compatibility with SELECTs, where SETTINGS can be in front of FORMAT + ASTPtr settings; + if (s_settings.ignore(pos, expected)) + { + if (!parser_settings.parse(pos, query->settings_ast, expected)) + return false; + } + + query->children.push_back(query->table_expression); + + if (query->settings_ast) + query->children.push_back(query->settings_ast); node = query; diff --git a/src/Parsers/ParserDropNamedCollectionQuery.cpp b/src/Parsers/ParserDropNamedCollectionQuery.cpp index 1ea8aa6d75d..b0b010b5ef6 100644 --- a/src/Parsers/ParserDropNamedCollectionQuery.cpp +++ b/src/Parsers/ParserDropNamedCollectionQuery.cpp @@ -12,6 +12,7 @@ bool ParserDropNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node ParserKeyword s_drop("DROP"); ParserKeyword s_collection("NAMED COLLECTION"); ParserKeyword s_if_exists("IF EXISTS"); + ParserKeyword s_on("ON"); ParserIdentifier name_p; String cluster_str; @@ -31,7 +32,7 @@ bool ParserDropNamedCollectionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node if (!name_p.parse(pos, collection_name, expected)) return false; - if (ParserKeyword{"ON"}.ignore(pos, expected)) + if (s_on.ignore(pos, expected)) { if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; diff --git a/src/Parsers/ParserQuery.cpp b/src/Parsers/ParserQuery.cpp index 77e7b58e6b1..7ed69940bed 100644 --- a/src/Parsers/ParserQuery.cpp +++ b/src/Parsers/ParserQuery.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -54,6 +55,7 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserCreateIndexQuery create_index_p; ParserDropIndexQuery drop_index_p; ParserDropAccessEntityQuery drop_access_entity_p; + ParserMoveAccessEntityQuery move_access_entity_p; ParserGrantQuery grant_p; ParserSetRoleQuery set_role_p; ParserExternalDDLQuery external_ddl_p; @@ -80,6 +82,7 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) || create_index_p.parse(pos, node, expected) || drop_index_p.parse(pos, node, expected) || drop_access_entity_p.parse(pos, node, expected) + || move_access_entity_p.parse(pos, node, expected) || grant_p.parse(pos, node, expected) || external_ddl_p.parse(pos, node, expected) || transaction_control_p.parse(pos, node, expected) diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 2bfe7353be4..a2391495071 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -156,7 +156,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec // SETTINGS key1 = value1, key2 = value2, ... ParserKeyword s_settings("SETTINGS"); - if (s_settings.ignore(pos, expected)) + if (!query_with_output.settings_ast && s_settings.ignore(pos, expected)) { ParserSetQuery parser_settings(true); if (!parser_settings.parse(pos, query_with_output.settings_ast, expected)) diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 1c48f773823..341c1ef60b4 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -292,6 +292,9 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// This is needed for TOP expression, because it can also use WITH TIES. bool limit_with_ties_occured = false; + bool has_offset_clause = false; + bool offset_clause_has_sql_standard_row_or_rows = false; /// OFFSET offset_row_count {ROW | ROWS} + /// LIMIT length | LIMIT offset, length | LIMIT count BY expr-list | LIMIT offset, length BY expr-list if (s_limit.ignore(pos, expected)) { @@ -316,6 +319,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (!exp_elem.parse(pos, limit_offset, expected)) return false; + + has_offset_clause = true; } else if (s_with_ties.ignore(pos, expected)) { @@ -351,60 +356,65 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (s_offset.ignore(pos, expected)) { - /// OFFSET offset_row_count {ROW | ROWS} FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES} - bool offset_with_fetch_maybe = false; + /// OFFSET without LIMIT + + has_offset_clause = true; if (!exp_elem.parse(pos, limit_offset, expected)) return false; + /// SQL standard OFFSET N ROW[S] ... + + if (s_row.ignore(pos, expected)) + offset_clause_has_sql_standard_row_or_rows = true; + + if (s_rows.ignore(pos, expected)) + { + if (offset_clause_has_sql_standard_row_or_rows) + throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); + + offset_clause_has_sql_standard_row_or_rows = true; + } + } + + /// SQL standard FETCH (either following SQL standard OFFSET or following ORDER BY) + if ((!has_offset_clause || offset_clause_has_sql_standard_row_or_rows) + && s_fetch.ignore(pos, expected)) + { + /// FETCH clause must exist with "ORDER BY" + if (!order_expression_list) + throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY"); + + if (s_first.ignore(pos, expected)) + { + if (s_next.ignore(pos, expected)) + throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together"); + } + else if (!s_next.ignore(pos, expected)) + return false; + + if (!exp_elem.parse(pos, limit_length, expected)) + return false; + if (s_row.ignore(pos, expected)) { if (s_rows.ignore(pos, expected)) throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); - offset_with_fetch_maybe = true; } - else if (s_rows.ignore(pos, expected)) + else if (!s_rows.ignore(pos, expected)) + return false; + + if (s_with_ties.ignore(pos, expected)) { - offset_with_fetch_maybe = true; + select_query->limit_with_ties = true; } - - if (offset_with_fetch_maybe && s_fetch.ignore(pos, expected)) + else if (s_only.ignore(pos, expected)) { - /// OFFSET FETCH clause must exists with "ORDER BY" - if (!order_expression_list) - throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY"); - - if (s_first.ignore(pos, expected)) - { - if (s_next.ignore(pos, expected)) - throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together"); - } - else if (!s_next.ignore(pos, expected)) - return false; - - if (!exp_elem.parse(pos, limit_length, expected)) - return false; - - if (s_row.ignore(pos, expected)) - { - if (s_rows.ignore(pos, expected)) - throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); - } - else if (!s_rows.ignore(pos, expected)) - return false; - - if (s_with_ties.ignore(pos, expected)) - { - select_query->limit_with_ties = true; - } - else if (s_only.ignore(pos, expected)) - { - select_query->limit_with_ties = false; - } - else - { - return false; - } + select_query->limit_with_ties = false; + } + else + { + return false; } } diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 727d037112f..4df74c2dd82 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -215,7 +215,7 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p else if (ParserKeyword("FALSE").ignore(pos, expected)) value = std::make_shared(Field(static_cast(0))); /// for SETTINGS disk=disk(type='s3', path='', ...) - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name.starts_with("disk")) + else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { tryGetIdentifierNameInto(name, change.name); change.value = createFieldFromAST(function_ast); @@ -280,7 +280,7 @@ bool ParserSetQuery::parseNameValuePairWithParameterOrDefault( node = std::make_shared(Field(static_cast(1))); else if (ParserKeyword("FALSE").ignore(pos, expected)) node = std::make_shared(Field(static_cast(0))); - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name.starts_with("disk")) + else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { change.name = name; change.value = createFieldFromAST(function_ast); diff --git a/src/Parsers/ParserShowTablesQuery.cpp b/src/Parsers/ParserShowTablesQuery.cpp index 3540a6d3fc8..e3728eb2cd6 100644 --- a/src/Parsers/ParserShowTablesQuery.cpp +++ b/src/Parsers/ParserShowTablesQuery.cpp @@ -36,7 +36,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserKeyword s_where("WHERE"); ParserKeyword s_limit("LIMIT"); ParserStringLiteral like_p; - ParserIdentifier name_p; + ParserIdentifier name_p(true); ParserExpressionWithOptionalAlias exp_elem(false); ASTPtr like; @@ -174,7 +174,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; } - tryGetIdentifierNameInto(database, query->from); + query->set(query->from, database); if (like) query->like = like->as().value.safeGet(); diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 48dbe60e241..40fc1acae69 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -379,6 +379,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::START_REPLICATED_SENDS: case Type::STOP_REPLICATION_QUEUES: case Type::START_REPLICATION_QUEUES: + case Type::STOP_PULLING_REPLICATION_LOG: + case Type::START_PULLING_REPLICATION_LOG: if (!parseQueryWithOnCluster(res, pos, expected)) return false; parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); @@ -405,7 +407,15 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ParserLiteral path_parser; ASTPtr ast; if (path_parser.parse(pos, ast, expected)) + { res->filesystem_cache_name = ast->as()->value.safeGet(); + if (ParserKeyword{"KEY"}.ignore(pos, expected) && ParserIdentifier().parse(pos, ast, expected)) + { + res->key_to_drop = ast->as()->name(); + if (ParserKeyword{"OFFSET"}.ignore(pos, expected) && ParserLiteral().parse(pos, ast, expected)) + res->offset_to_drop = ast->as()->value.safeGet(); + } + } if (!parseQueryWithOnCluster(res, pos, expected)) return false; break; @@ -442,6 +452,42 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & break; } + case Type::START_LISTEN: + case Type::STOP_LISTEN: + { + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; + + ServerType::Type current_type = ServerType::Type::END; + std::string current_custom_name; + + for (const auto & type : magic_enum::enum_values()) + { + if (ParserKeyword{ServerType::serverTypeToString(type)}.ignore(pos, expected)) + { + current_type = type; + break; + } + } + + if (current_type == ServerType::Type::END) + return false; + + if (current_type == ServerType::CUSTOM) + { + ASTPtr ast; + + if (!ParserStringLiteral{}.parse(pos, ast, expected)) + return false; + + current_custom_name = ast->as().value.get(); + } + + res->server_type = ServerType(current_type, current_custom_name); + + break; + } + default: { if (!parseQueryWithOnCluster(res, pos, expected)) diff --git a/src/Parsers/ParserTablePropertiesQuery.cpp b/src/Parsers/ParserTablePropertiesQuery.cpp index b73ce8de359..94f264fcc89 100644 --- a/src/Parsers/ParserTablePropertiesQuery.cpp +++ b/src/Parsers/ParserTablePropertiesQuery.cpp @@ -14,8 +14,6 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & { ParserKeyword s_exists("EXISTS"); ParserKeyword s_temporary("TEMPORARY"); - ParserKeyword s_describe("DESCRIBE"); - ParserKeyword s_desc("DESC"); ParserKeyword s_show("SHOW"); ParserKeyword s_create("CREATE"); ParserKeyword s_database("DATABASE"); diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index 08b6f77fafa..b3ae6ca0bb9 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -24,6 +24,8 @@ bool ParserTableExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!ParserWithOptionalAlias(std::make_unique(), allow_alias_without_as_keyword).parse(pos, res->subquery, expected) && !ParserWithOptionalAlias(std::make_unique(false, true), allow_alias_without_as_keyword).parse(pos, res->table_function, expected) && !ParserWithOptionalAlias(std::make_unique(true, true), allow_alias_without_as_keyword) + .parse(pos, res->database_and_table_name, expected) + && !ParserWithOptionalAlias(std::make_unique(), allow_alias_without_as_keyword) .parse(pos, res->database_and_table_name, expected)) return false; diff --git a/src/Parsers/ParserUseQuery.cpp b/src/Parsers/ParserUseQuery.cpp index 40511d8c142..350e573281c 100644 --- a/src/Parsers/ParserUseQuery.cpp +++ b/src/Parsers/ParserUseQuery.cpp @@ -11,7 +11,7 @@ namespace DB bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_use("USE"); - ParserIdentifier name_p; + ParserIdentifier name_p{/*allow_query_parameter*/ true}; if (!s_use.ignore(pos, expected)) return false; @@ -21,7 +21,7 @@ bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; auto query = std::make_shared(); - tryGetIdentifierNameInto(database, query->database); + query->set(query->database, database); node = query; return true; diff --git a/src/Parsers/QueryParameterVisitor.cpp b/src/Parsers/QueryParameterVisitor.cpp index 1282c12cce6..b8679cc3b96 100644 --- a/src/Parsers/QueryParameterVisitor.cpp +++ b/src/Parsers/QueryParameterVisitor.cpp @@ -31,7 +31,7 @@ private: void visitQueryParameter(const ASTQueryParameter & query_parameter) { - query_parameters[query_parameter.name]= query_parameter.type; + query_parameters[query_parameter.name] = query_parameter.type; } }; diff --git a/src/Parsers/examples/CMakeLists.txt b/src/Parsers/examples/CMakeLists.txt index 82ca7bc0688..261f234081c 100644 --- a/src/Parsers/examples/CMakeLists.txt +++ b/src/Parsers/examples/CMakeLists.txt @@ -3,8 +3,8 @@ set(SRCS) clickhouse_add_executable(lexer lexer.cpp ${SRCS}) target_link_libraries(lexer PRIVATE clickhouse_parsers) -clickhouse_add_executable(select_parser select_parser.cpp ${SRCS}) -target_link_libraries(select_parser PRIVATE clickhouse_parsers) +clickhouse_add_executable(select_parser select_parser.cpp ${SRCS} "../../Server/ServerType.cpp") +target_link_libraries(select_parser PRIVATE dbms) -clickhouse_add_executable(create_parser create_parser.cpp ${SRCS}) -target_link_libraries(create_parser PRIVATE clickhouse_parsers) +clickhouse_add_executable(create_parser create_parser.cpp ${SRCS} "../../Server/ServerType.cpp") +target_link_libraries(create_parser PRIVATE dbms) diff --git a/src/Parsers/formatAST.cpp b/src/Parsers/formatAST.cpp index fca8ea0aa35..9315279eae6 100644 --- a/src/Parsers/formatAST.cpp +++ b/src/Parsers/formatAST.cpp @@ -4,18 +4,17 @@ namespace DB { -void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite, bool one_line) +void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite, bool one_line, bool show_secrets) { - IAST::FormatSettings settings(buf, one_line); - settings.hilite = hilite; - + IAST::FormatSettings settings(buf, one_line, hilite); + settings.show_secrets = show_secrets; ast.format(settings); } -String serializeAST(const IAST & ast, bool one_line) +String serializeAST(const IAST & ast) { WriteBufferFromOwnString buf; - formatAST(ast, buf, false, one_line); + formatAST(ast, buf, false, true); return buf.str(); } diff --git a/src/Parsers/formatAST.h b/src/Parsers/formatAST.h index 28af2400a4c..dd72a59b4a2 100644 --- a/src/Parsers/formatAST.h +++ b/src/Parsers/formatAST.h @@ -8,12 +8,13 @@ namespace DB class WriteBuffer; -/** Takes a syntax tree and turns it back into text. - * In case of INSERT query, the data will be missing. - */ -void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite = true, bool one_line = false); +/// Takes a syntax tree and turns it into text. +/// Intended for pretty-printing (multi-line + hiliting). +/// In case of INSERT query, the data will be missing. +void formatAST(const IAST & ast, WriteBuffer & buf, bool hilite = true, bool one_line = false, bool show_secrets = true); -String serializeAST(const IAST & ast, bool one_line = true); +/// Like formatAST() but intended for serialization w/o pretty-printing (single-line, no hiliting). +String serializeAST(const IAST & ast); inline WriteBuffer & operator<<(WriteBuffer & buf, const IAST & ast) { diff --git a/src/Parsers/getInsertQuery.cpp b/src/Parsers/getInsertQuery.cpp index 6f52056dfe2..9d111b147bd 100644 --- a/src/Parsers/getInsertQuery.cpp +++ b/src/Parsers/getInsertQuery.cpp @@ -19,9 +19,7 @@ std::string getInsertQuery(const std::string & db_name, const std::string & tabl query.columns->children.emplace_back(std::make_shared(column.name)); WriteBufferFromOwnString buf; - IAST::FormatSettings settings(buf, true); - settings.always_quote_identifiers = true; - settings.identifier_quoting_style = quoting; + IAST::FormatSettings settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ true, /*identifier_quoting_style*/ quoting); query.IAST::format(settings); return buf.str(); } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 2795de64b1d..18e91c533e0 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -64,7 +65,10 @@ TEST_P(ParserTest, parseQuery) if (std::string("CREATE USER or ALTER USER query") != parser->getName() && std::string("ATTACH access entity query") != parser->getName()) { - EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false)); + WriteBufferFromOwnString buf; + formatAST(*ast->clone(), buf, false, false); + String formatted_ast = buf.str(); + EXPECT_EQ(expected_ast, formatted_ast); } else { @@ -75,7 +79,10 @@ TEST_P(ParserTest, parseQuery) } else { - EXPECT_TRUE(std::regex_match(serializeAST(*ast->clone(), false), std::regex(expected_ast))); + WriteBufferFromOwnString buf; + formatAST(*ast->clone(), buf, false, false); + String formatted_ast = buf.str(); + EXPECT_TRUE(std::regex_match(formatted_ast, std::regex(expected_ast))); } } } @@ -352,11 +359,11 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "SELECT *\nFROM Customers\nORDER BY LastName DESC" }, { - "Customers | order by Age desc , FirstName asc ", + "Customers | order by Age desc, FirstName asc ", "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" }, { - "Customers | order by Age asc , FirstName desc", + "Customers | order by Age asc, FirstName desc", "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" }, { @@ -476,3 +483,22 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" } }))); + +static constexpr size_t kDummyMaxQuerySize = 256 * 1024; +static constexpr size_t kDummyMaxParserDepth = 256; + +INSTANTIATE_TEST_SUITE_P( + ParserPRQL, + ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared(kDummyMaxQuerySize, kDummyMaxParserDepth)), + ::testing::ValuesIn(std::initializer_list{ + { + "from albums\ngroup [author_id] (\n aggregate [first_pushlied = min published]\n)\njoin a=author side:left [==author_id]\njoin p=purchases side:right [==author_id]\ngroup [a.id, p.purchase_id] (\n aggregate [avg_sell = min first_pushlied]\n)", + "WITH table_1 AS\n (\n SELECT\n MIN(published) AS _expr_0,\n author_id\n FROM albums\n GROUP BY author_id\n )\nSELECT\n a.id,\n p.purchase_id,\n MIN(table_0._expr_0) AS avg_sell\nFROM table_1 AS table_0\nLEFT JOIN author AS a ON table_0.author_id = a.author_id\nRIGHT JOIN purchases AS p ON table_0.author_id = p.author_id\nGROUP BY\n a.id,\n p.purchase_id", + }, + { + "from matches\nfilter start_date > @2023-05-30 # Some comment here\nderive [\n some_derived_value_1 = a + (b ?? 0), # And there\n some_derived_value_2 = c + some_derived_value\n]\nfilter some_derived_value_2 > 0\ngroup [country, city] (\n aggregate [\n average some_derived_value_2,\n aggr = max some_derived_value_2,\n ]\n)\nderive place = f\"{city} in {country}\"\nderive country_code = s\"LEFT(country, 2)\"\nsort [aggr, -country]\ntake 1..20", + "WITH\n table_3 AS\n (\n SELECT\n country,\n city,\n c + some_derived_value AS _expr_1\n FROM matches\n WHERE start_date > toDate('2023-05-30')\n ),\n table_1 AS\n (\n SELECT\n country,\n city,\n AVG(_expr_1) AS _expr_0,\n MAX(_expr_1) AS aggr\n FROM table_3 AS table_2\n WHERE _expr_1 > 0\n GROUP BY\n country,\n city\n )\nSELECT\n country,\n city,\n _expr_0,\n aggr,\n CONCAT(city, ' in ', country) AS place,\n LEFT(country, 2) AS country_code\nFROM table_1 AS table_0\nORDER BY\n aggr ASC,\n country DESC\nLIMIT 20", + }, + }))); diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index 22484727ea2..c0a975f7a38 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -155,7 +155,7 @@ TEST(ParserDictionaryDDL, AttributesWithMultipleProperties) EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); - EXPECT_EQ(serializeAST(*attributes_children[2]->as()->expression, true), "(rand() % 100) * 77"); + EXPECT_EQ(serializeAST(*attributes_children[2]->as()->expression), "(rand() % 100) * 77"); EXPECT_EQ(attributes_children[0]->as()->hierarchical, false); EXPECT_EQ(attributes_children[1]->as()->hierarchical, true); @@ -201,7 +201,7 @@ TEST(ParserDictionaryDDL, CustomAttributePropertiesOrder) EXPECT_EQ(attributes_children[0]->as()->expression, nullptr); EXPECT_EQ(attributes_children[1]->as()->expression, nullptr); - EXPECT_EQ(serializeAST(*attributes_children[2]->as()->expression, true), "(rand() % 100) * 77"); + EXPECT_EQ(serializeAST(*attributes_children[2]->as()->expression), "(rand() % 100) * 77"); EXPECT_EQ(attributes_children[0]->as()->hierarchical, false); EXPECT_EQ(attributes_children[1]->as()->hierarchical, true); @@ -288,7 +288,7 @@ TEST(ParserDictionaryDDL, Formatting) ParserCreateDictionaryQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); ASTCreateQuery * create = ast->as(); - auto str = serializeAST(*create, true); + auto str = serializeAST(*create); EXPECT_EQ(str, "CREATE DICTIONARY test.dict5 (`key_column1` UInt64 DEFAULT 1 HIERARCHICAL INJECTIVE, `key_column2` String DEFAULT '', `second_column` UInt8 EXPRESSION intDiv(50, rand() % 1000), `third_column` UInt8) PRIMARY KEY key_column1, key_column2 SOURCE(MYSQL(HOST 'localhost' PORT 9000 USER 'default' REPLICA (HOST '127.0.0.1' PRIORITY 1) PASSWORD '')) LIFETIME(MIN 1 MAX 10) LAYOUT(CACHE(SIZE_IN_CELLS 50)) RANGE(MIN second_column MAX third_column)"); } @@ -303,7 +303,7 @@ TEST(ParserDictionaryDDL, ParseDropQuery) EXPECT_TRUE(drop1->is_dictionary); EXPECT_EQ(drop1->getDatabase(), "test"); EXPECT_EQ(drop1->getTable(), "dict1"); - auto str1 = serializeAST(*drop1, true); + auto str1 = serializeAST(*drop1); EXPECT_EQ(input1, str1); String input2 = "DROP DICTIONARY IF EXISTS dict2"; @@ -314,7 +314,7 @@ TEST(ParserDictionaryDDL, ParseDropQuery) EXPECT_TRUE(drop2->is_dictionary); EXPECT_EQ(drop2->getDatabase(), ""); EXPECT_EQ(drop2->getTable(), "dict2"); - auto str2 = serializeAST(*drop2, true); + auto str2 = serializeAST(*drop2); EXPECT_EQ(input2, str2); } diff --git a/src/Parsers/tests/gtest_format_hiliting.cpp b/src/Parsers/tests/gtest_format_hiliting.cpp index d0ce8f2c897..a4c3ed86182 100644 --- a/src/Parsers/tests/gtest_format_hiliting.cpp +++ b/src/Parsers/tests/gtest_format_hiliting.cpp @@ -51,8 +51,7 @@ void compare(const String & expected, const String & query) ASTPtr ast = parseQuery(parser, query, 0, 0); WriteBufferFromOwnString write_buffer; - IAST::FormatSettings settings(write_buffer, true); - settings.hilite = true; + IAST::FormatSettings settings(write_buffer, true, true); ast->format(settings); ASSERT_PRED2(HiliteComparator::are_equal_with_hilites_removed, expected, write_buffer.str()); diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 9f6c22f90f3..7cce495dfb8 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1047,7 +1047,7 @@ PlannerContextPtr buildPlannerContext(const QueryTreeNodePtr & query_tree_node, } Planner::Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_) + SelectQueryOptions & select_query_options_) : query_tree(query_tree_) , select_query_options(select_query_options_) , planner_context(buildPlannerContext(query_tree, select_query_options, std::make_shared())) @@ -1055,7 +1055,7 @@ Planner::Planner(const QueryTreeNodePtr & query_tree_, } Planner::Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, + SelectQueryOptions & select_query_options_, GlobalPlannerContextPtr global_planner_context_) : query_tree(query_tree_) , select_query_options(select_query_options_) @@ -1064,7 +1064,7 @@ Planner::Planner(const QueryTreeNodePtr & query_tree_, } Planner::Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, + SelectQueryOptions & select_query_options_, PlannerContextPtr planner_context_) : query_tree(query_tree_) , select_query_options(select_query_options_) diff --git a/src/Planner/Planner.h b/src/Planner/Planner.h index 783a07f6e99..f8d151365cf 100644 --- a/src/Planner/Planner.h +++ b/src/Planner/Planner.h @@ -22,16 +22,16 @@ class Planner public: /// Initialize planner with query tree after analysis phase Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_); + SelectQueryOptions & select_query_options_); /// Initialize planner with query tree after query analysis phase and global planner context Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, + SelectQueryOptions & select_query_options_, GlobalPlannerContextPtr global_planner_context_); /// Initialize planner with query tree after query analysis phase and planner context Planner(const QueryTreeNodePtr & query_tree_, - const SelectQueryOptions & select_query_options_, + SelectQueryOptions & select_query_options_, PlannerContextPtr planner_context_); const QueryPlan & getQueryPlan() const @@ -66,7 +66,7 @@ private: void buildPlanForQueryNode(); QueryTreeNodePtr query_tree; - SelectQueryOptions select_query_options; + SelectQueryOptions & select_query_options; PlannerContextPtr planner_context; QueryPlan query_plan; StorageLimitsList storage_limits; diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 7575828e64d..9b6034cd0de 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -100,8 +100,9 @@ public: if (isNameOfInFunction(function_node.getFunctionName())) { + const auto & in_first_argument_node = function_node.getArguments().getNodes().at(0); const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1); - in_function_second_argument_node_name = planner_context.createSetKey(in_second_argument_node); + in_function_second_argument_node_name = planner_context.createSetKey(in_first_argument_node->getResultType(), in_second_argument_node); } WriteBufferFromOwnString buffer; @@ -165,7 +166,7 @@ public: case QueryTreeNodeType::LAMBDA: { auto lambda_hash = node->getTreeHash(); - result = "__lambda_" + toString(lambda_hash.first) + '_' + toString(lambda_hash.second); + result = "__lambda_" + toString(lambda_hash); break; } default: @@ -628,8 +629,6 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma auto in_first_argument = function_node.getArguments().getNodes().at(0); auto in_second_argument = function_node.getArguments().getNodes().at(1); - //auto set_key = planner_context->createSetKey(in_second_argument); - DataTypes set_element_types; auto in_second_argument_node_type = in_second_argument->getNodeType(); @@ -665,7 +664,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma PreparedSets::toString(set_key, set_element_types)); ColumnWithTypeAndName column; - column.name = planner_context->createSetKey(in_second_argument); + column.name = planner_context->createSetKey(in_first_argument->getResultType(), in_second_argument); column.type = std::make_shared(); bool set_is_created = set->get() != nullptr; diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp index 3c75d4fbea8..0fde034b87a 100644 --- a/src/Planner/PlannerContext.cpp +++ b/src/Planner/PlannerContext.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -112,10 +113,24 @@ const ColumnIdentifier * PlannerContext::getColumnNodeIdentifierOrNull(const Que return table_expression_data->getColumnIdentifierOrNull(column_name); } -PlannerContext::SetKey PlannerContext::createSetKey(const QueryTreeNodePtr & set_source_node) +PlannerContext::SetKey PlannerContext::createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node) { - auto set_source_hash = set_source_node->getTreeHash(); - return "__set_" + toString(set_source_hash.first) + '_' + toString(set_source_hash.second); + const auto set_source_hash = set_source_node->getTreeHash(); + if (set_source_node->as()) + { + /* We need to hash the type of the left operand because we can build different sets for different types. + * (It's done for performance reasons. It's cheaper to convert a small set of values from literal to the type of the left operand.) + * + * For example in expression `(a :: Decimal(9, 1) IN (1.0, 2.5)) AND (b :: Decimal(9, 0) IN (1, 2.5))` + * we need to build two different sets: + * - `{1, 2.5} :: Set(Decimal(9, 1))` for a + * - `{1} :: Set(Decimal(9, 0))` for b (2.5 omitted because bercause it's not representable as Decimal(9, 0)). + */ + return "__set_" + left_operand_type->getName() + '_' + toString(set_source_hash); + } + + /// For other cases we will cast left operand to the type of the set source, so no difference in types. + return "__set_" + toString(set_source_hash); } } diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index aceb313d4b5..44b24d09c5d 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -132,7 +132,7 @@ public: using SetKey = std::string; /// Create set key for set source node - static SetKey createSetKey(const QueryTreeNodePtr & set_source_node); + static SetKey createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node); PreparedSets & getPreparedSets() { return prepared_sets; } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 5d8f8ca8741..f6ce029a295 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -101,7 +101,7 @@ void checkAccessRights(const TableNode & table_node, const Names & column_names, } throw Exception(ErrorCodes::ACCESS_DENIED, - "{}: Not enough privileges. To execute this query it's necessary to have grant SELECT for at least one column on {}", + "{}: Not enough privileges. To execute this query, it's necessary to have the grant SELECT for at least one column on {}", query_context->getUserName(), storage_id.getFullTableName()); } @@ -113,6 +113,20 @@ void checkAccessRights(const TableNode & table_node, const Names & column_names, query_context->checkAccess(AccessType::SELECT, storage_id, column_names); } +bool shouldIgnoreQuotaAndLimits(const TableNode & table_node) +{ + const auto & storage_id = table_node.getStorageID(); + if (!storage_id.hasDatabase()) + return false; + if (storage_id.database_name == DatabaseCatalog::SYSTEM_DATABASE) + { + static const boost::container::flat_set tables_ignoring_quota{"quotas", "quota_limits", "quota_usage", "quotas_usage", "one"}; + if (tables_ignoring_quota.count(storage_id.table_name)) + return true; + } + return false; +} + NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot) { /** We need to read at least one column to find the number of rows. @@ -182,6 +196,9 @@ bool applyTrivialCountIfPossible( return false; const auto & storage = table_node.getStorage(); + if (!storage->supportsTrivialCountOptimization()) + return false; + auto storage_id = storage->getStorageID(); auto row_policy_filter = query_context->getRowPolicyFilter(storage_id.getDatabaseName(), storage_id.getTableName(), @@ -825,8 +842,9 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres } else { + SelectQueryOptions analyze_query_options = SelectQueryOptions(from_stage).analyze(); Planner planner(select_query_info.query_tree, - SelectQueryOptions(from_stage).analyze(), + analyze_query_options, select_query_info.planner_context); planner.buildQueryPlanIfNeeded(); @@ -1372,7 +1390,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_ JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node, const SelectQueryInfo & select_query_info, - const SelectQueryOptions & select_query_options, + SelectQueryOptions & select_query_options, const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context) { @@ -1383,6 +1401,16 @@ JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node, std::vector table_expressions_outer_scope_columns(table_expressions_stack_size); ColumnIdentifierSet current_outer_scope_columns = outer_scope_columns; + if (is_single_table_expression) + { + auto * table_node = table_expressions_stack[0]->as(); + if (table_node && shouldIgnoreQuotaAndLimits(*table_node)) + { + select_query_options.ignore_quota = true; + select_query_options.ignore_limits = true; + } + } + /// For each table, table function, query, union table expressions prepare before query plan build for (size_t i = 0; i < table_expressions_stack_size; ++i) { diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h index acbc96ddae0..9d3b98175d0 100644 --- a/src/Planner/PlannerJoinTree.h +++ b/src/Planner/PlannerJoinTree.h @@ -20,7 +20,7 @@ struct JoinTreeQueryPlan /// Build JOIN TREE query plan for query node JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node, const SelectQueryInfo & select_query_info, - const SelectQueryOptions & select_query_options, + SelectQueryOptions & select_query_options, const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context); diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 7da10a8523b..e87930a4d6b 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -542,7 +542,8 @@ void trySetStorageInTableJoin(const QueryTreeNodePtr & table_expression, std::sh if (!table_join->isEnabledAlgorithm(JoinAlgorithm::DIRECT)) return; - if (auto storage_dictionary = std::dynamic_pointer_cast(storage); storage_dictionary) + if (auto storage_dictionary = std::dynamic_pointer_cast(storage); + storage_dictionary && storage_dictionary->getDictionary()->getSpecialKeyType() != DictionarySpecialKeyType::Range) table_join->setStorageJoin(std::dynamic_pointer_cast(storage_dictionary->getDictionary())); else if (auto storage_key_value = std::dynamic_pointer_cast(storage); storage_key_value) table_join->setStorageJoin(storage_key_value); @@ -634,6 +635,7 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo /// JOIN with JOIN engine. if (auto storage = table_join->getStorageJoin()) { + Names required_column_names; for (const auto & result_column : right_table_expression_header) { const auto * source_column_name = right_table_expression_data.getColumnNameOrNull(result_column.name); @@ -643,8 +645,9 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo fmt::join(storage->getKeyNames(), ", "), result_column.name); table_join->setRename(*source_column_name, result_column.name); + required_column_names.push_back(*source_column_name); } - return storage->getJoinLocked(table_join, planner_context->getQueryContext()); + return storage->getJoinLocked(table_join, planner_context->getQueryContext(), required_column_names); } /** JOIN with constant. diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 413872d512d..f50e45db644 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -114,16 +114,20 @@ private: using Chunks = std::vector; -/// ChunkOffsets marks offsets of different sub-chunks, which will be used by async inserts. -class ChunkOffsets : public ChunkInfo +/// AsyncInsert needs two kinds of information: +/// - offsets of different sub-chunks +/// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`. +class AsyncInsertInfo : public ChunkInfo { public: - ChunkOffsets() = default; - explicit ChunkOffsets(const std::vector & offsets_) : offsets(offsets_) {} + AsyncInsertInfo() = default; + explicit AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) : offsets(offsets_), tokens(tokens_) {} + std::vector offsets; + std::vector tokens; }; -using ChunkOffsetsPtr = std::shared_ptr; +using AsyncInsertInfoPtr = std::shared_ptr; /// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults. class ChunkMissingValues : public ChunkInfo diff --git a/src/Processors/Executors/CompletedPipelineExecutor.cpp b/src/Processors/Executors/CompletedPipelineExecutor.cpp index 2964d9b6aa2..b0f842dec1b 100644 --- a/src/Processors/Executors/CompletedPipelineExecutor.cpp +++ b/src/Processors/Executors/CompletedPipelineExecutor.cpp @@ -115,7 +115,7 @@ CompletedPipelineExecutor::~CompletedPipelineExecutor() } catch (...) { - tryLogCurrentException("PullingAsyncPipelineExecutor"); + tryLogCurrentException("CompletedPipelineExecutor"); } } diff --git a/src/Processors/Executors/ExecutionThreadContext.cpp b/src/Processors/Executors/ExecutionThreadContext.cpp index 794f478b272..0fa7e0b552f 100644 --- a/src/Processors/Executors/ExecutionThreadContext.cpp +++ b/src/Processors/Executors/ExecutionThreadContext.cpp @@ -56,6 +56,9 @@ static void executeJob(ExecutingGraph::Node * node, ReadProgressCallback * read_ if (read_progress->counters.total_rows_approx) read_progress_callback->addTotalRowsApprox(read_progress->counters.total_rows_approx); + if (read_progress->counters.total_bytes) + read_progress_callback->addTotalBytes(read_progress->counters.total_bytes); + if (!read_progress_callback->onProgress(read_progress->counters.read_rows, read_progress->counters.read_bytes, read_progress->limits)) node->processor->cancel(); } diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index f523e7b7cf9..1508d834592 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -272,7 +272,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie /// Prepare processor after execution. if (!graph->updateNode(context.getProcessorID(), queue, async_queue)) - finish(); + cancel(); /// Push other tasks to global queue. tasks.pushTasks(queue, async_queue, context); diff --git a/src/Processors/Executors/StreamingFormatExecutor.cpp b/src/Processors/Executors/StreamingFormatExecutor.cpp index 2223721439e..46818989032 100644 --- a/src/Processors/Executors/StreamingFormatExecutor.cpp +++ b/src/Processors/Executors/StreamingFormatExecutor.cpp @@ -1,6 +1,5 @@ #include #include -#include namespace DB { diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index 0728aecf61f..7395d9b916a 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -86,7 +86,21 @@ void IRowInputFormat::logError() Chunk IRowInputFormat::generate() { if (total_rows == 0) - readPrefix(); + { + try + { + readPrefix(); + } + catch (Exception & e) + { + auto file_name = getFileNameFromReadBuffer(getReadBuffer()); + if (!file_name.empty()) + e.addMessage(fmt::format("(in file/uri {})", file_name)); + + e.addMessage("(while reading header)"); + throw; + } + } const Block & header = getPort().getHeader(); @@ -97,7 +111,6 @@ Chunk IRowInputFormat::generate() size_t num_rows = 0; size_t chunk_start_offset = getDataOffsetMaybeCompressed(getReadBuffer()); - try { RowReadExtension info; diff --git a/src/Processors/Formats/IRowInputFormat.h b/src/Processors/Formats/IRowInputFormat.h index b7b1b0b29a6..00888cfa5e9 100644 --- a/src/Processors/Formats/IRowInputFormat.h +++ b/src/Processors/Formats/IRowInputFormat.h @@ -85,7 +85,7 @@ private: size_t num_errors = 0; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; }; } diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 2fadc09e80f..4293407379e 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -143,7 +143,6 @@ void ArrowBlockInputFormat::prepareReader() arrow_column_to_ch_column = std::make_unique( getPort().getHeader(), "Arrow", - format_settings.arrow.import_nested, format_settings.arrow.allow_missing_columns, format_settings.null_as_default, format_settings.arrow.case_insensitive_column_matching); @@ -190,7 +189,6 @@ void registerInputFormatArrow(FormatFactory & factory) { return std::make_shared(buf, sample, false, format_settings); }); - factory.markFormatSupportsSubcolumns("Arrow"); factory.markFormatSupportsSubsetOfColumns("Arrow"); factory.registerInputFormat( "ArrowStream", diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h index df77994c3d5..2db8bd6c59c 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h @@ -50,7 +50,7 @@ private: int record_batch_current = 0; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; const FormatSettings format_settings; diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 5a7306111a5..74d4553a58b 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1032,13 +1032,11 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( ArrowColumnToCHColumn::ArrowColumnToCHColumn( const Block & header_, const std::string & format_name_, - bool import_nested_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_) : header(header_) , format_name(format_name_) - , import_nested(import_nested_) , allow_missing_columns(allow_missing_columns_) , null_as_default(null_as_default_) , case_insensitive_matching(case_insensitive_matching_) @@ -1080,42 +1078,40 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & if (!name_to_column_ptr.contains(search_column_name)) { bool read_from_nested = false; - /// Check if it's a column from nested table. - if (import_nested) + /// Check if it's a subcolumn from some struct. + String nested_table_name = Nested::extractTableName(header_column.name); + String search_nested_table_name = nested_table_name; + if (case_insensitive_matching) + boost::to_lower(search_nested_table_name); + if (name_to_column_ptr.contains(search_nested_table_name)) { - String nested_table_name = Nested::extractTableName(header_column.name); - String search_nested_table_name = nested_table_name; - if (case_insensitive_matching) - boost::to_lower(search_nested_table_name); - if (name_to_column_ptr.contains(search_nested_table_name)) + if (!nested_tables.contains(search_nested_table_name)) { - if (!nested_tables.contains(search_nested_table_name)) + NamesAndTypesList nested_columns; + for (const auto & name_and_type : header.getNamesAndTypesList()) { - NamesAndTypesList nested_columns; - for (const auto & name_and_type : header.getNamesAndTypesList()) - { - if (name_and_type.name.starts_with(nested_table_name + ".")) - nested_columns.push_back(name_and_type); - } - auto nested_table_type = Nested::collect(nested_columns).front().type; + if (name_and_type.name.starts_with(nested_table_name + ".")) + nested_columns.push_back(name_and_type); + } + auto nested_table_type = Nested::collect(nested_columns).front().type; - std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; - ColumnsWithTypeAndName cols = {readColumnFromArrowColumn( - arrow_column, nested_table_name, format_name, false, dictionary_infos, true, false, skipped, nested_table_type)}; - BlockPtr block_ptr = std::make_shared(cols); - auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); - nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; - } - auto nested_column = nested_tables[search_nested_table_name].second->extractColumn(search_column_name); - if (nested_column) - { - column = *nested_column; - if (case_insensitive_matching) - column.name = header_column.name; - read_from_nested = true; - } + std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; + ColumnsWithTypeAndName cols = {readColumnFromArrowColumn( + arrow_column, nested_table_name, format_name, false, dictionary_infos, true, false, skipped, nested_table_type)}; + BlockPtr block_ptr = std::make_shared(cols); + auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); + nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; + } + auto nested_column = nested_tables[search_nested_table_name].second->extractColumn(search_column_name); + if (nested_column) + { + column = *nested_column; + if (case_insensitive_matching) + column.name = header_column.name; + read_from_nested = true; } } + if (!read_from_nested) { if (!allow_missing_columns) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 64ff99c70ac..57f33069e0e 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -24,7 +24,6 @@ public: ArrowColumnToCHColumn( const Block & header_, const std::string & format_name_, - bool import_nested_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_ = false); @@ -53,7 +52,6 @@ public: private: const Block & header; const std::string format_name; - bool import_nested; /// If false, throw exception if some columns in header not exists in arrow table. bool allow_missing_columns; bool null_as_default; diff --git a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h index 4beffbcf869..676ce50d04f 100644 --- a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h +++ b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h @@ -35,7 +35,7 @@ public: /// - key: field name with full path. eg. a struct field's name is like a.x.i /// - value: a pair, first value refers to this field's start index, second value refers to how many /// indices this field take. eg. - /// For a parquet schema {x: int , y: {i: int, j: int}}, the return will be + /// For a parquet schema {x: int, y: {i: int, j: int}}, the return will be /// - x: (0, 1) /// - y: (1, 2) /// - y.i: (1, 1) @@ -75,7 +75,7 @@ public: { if (!allow_missing_columns) throw Exception( - ErrorCodes::THERE_IS_NO_COLUMN, "Not found field({}) in arrow schema:{}.", named_col.name, schema.ToString()); + ErrorCodes::THERE_IS_NO_COLUMN, "Not found field ({}) in the following Arrow schema:\n{}\n", named_col.name, schema.ToString()); else continue; } @@ -168,4 +168,3 @@ private: }; } #endif - diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 4cd73cb23b5..e7ea64e487c 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -52,6 +52,8 @@ #include #include #include +#include +#include #include #include #include @@ -367,14 +369,25 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro break; case avro::AVRO_UNION: { - if (root_node->leaves() == 2 + if (root_node->leaves() == 1) + { + auto nested_deserialize = createDeserializeFn(root_node->leafAt(0), target_type); + return [nested_deserialize](IColumn & column, avro::Decoder & decoder) + { + decoder.decodeUnionIndex(); + nested_deserialize(column, decoder); + return true; + }; + } + /// FIXME Support UNION has more than two datatypes. + else if ( + root_node->leaves() == 2 && (root_node->leafAt(0)->type() == avro::AVRO_NULL || root_node->leafAt(1)->type() == avro::AVRO_NULL)) { int non_null_union_index = root_node->leafAt(0)->type() == avro::AVRO_NULL ? 1 : 0; if (target.isNullable()) { - auto nested_deserialize = this->createDeserializeFn( - root_node->leafAt(non_null_union_index), removeNullable(target_type)); + auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), removeNullable(target_type)); return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder) { ColumnNullable & col = assert_cast(column); @@ -393,7 +406,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(const avro } else if (null_as_default) { - auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), target_type); + auto nested_deserialize = createDeserializeFn(root_node->leafAt(non_null_union_index), target_type); return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder) { int union_index = static_cast(decoder.decodeUnionIndex()); @@ -934,6 +947,27 @@ private: Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, url.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); request.setHost(url.getHost()); + if (!url.getUserInfo().empty()) + { + Poco::Net::HTTPCredentials http_credentials; + Poco::Net::HTTPBasicCredentials http_basic_credentials; + + http_credentials.fromUserInfo(url.getUserInfo()); + + std::string decoded_username; + Poco::URI::decode(http_credentials.getUsername(), decoded_username); + http_basic_credentials.setUsername(decoded_username); + + if (!http_credentials.getPassword().empty()) + { + std::string decoded_password; + Poco::URI::decode(http_credentials.getPassword(), decoded_password); + http_basic_credentials.setPassword(decoded_password); + } + + http_basic_credentials.authenticate(request); + } + auto session = makePooledHTTPSession(url, timeouts, 1); session->sendRequest(request); @@ -977,7 +1011,7 @@ private: using ConfluentSchemaRegistry = AvroConfluentRowInputFormat::SchemaRegistry; #define SCHEMA_REGISTRY_CACHE_MAX_SIZE 1000 /// Cache of Schema Registry URL -> SchemaRegistry -static CacheBase schema_registry_cache(SCHEMA_REGISTRY_CACHE_MAX_SIZE); +static CacheBase schema_registry_cache(SCHEMA_REGISTRY_CACHE_MAX_SIZE); static std::shared_ptr getConfluentSchemaRegistry(const FormatSettings & format_settings) { @@ -1169,12 +1203,19 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node) case avro::Type::AVRO_NULL: return std::make_shared(); case avro::Type::AVRO_UNION: - if (node->leaves() == 2 && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL)) + if (node->leaves() == 1) + { + return avroNodeToDataType(node->leafAt(0)); + } + else if ( + node->leaves() == 2 + && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL)) { int nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0; auto nested_type = avroNodeToDataType(node->leafAt(nested_leaf_index)); return nested_type->canBeInsideNullable() ? makeNullable(nested_type) : nested_type; } + /// FIXME Support UNION has more than two datatypes. throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Avro type UNION is not supported for inserting."); case avro::Type::AVRO_SYMBOLIC: return avroNodeToDataType(avro::resolveSymbol(node)); @@ -1217,6 +1258,8 @@ void registerInputFormatAvro(FormatFactory & factory) { return std::make_shared(sample, buf, params, settings); }); + + factory.markFormatSupportsSubsetOfColumns("AvroConfluent"); } void registerAvroSchemaReader(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index a4f779076eb..ac5da172210 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -13,7 +13,8 @@ namespace ErrorCodes extern const int CANNOT_SKIP_UNKNOWN_FIELD; } -BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) +template +BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) : RowInputFormatWithNamesAndTypes( header, in_, @@ -22,16 +23,17 @@ BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & heade with_names_, with_types_, format_settings_, - std::make_unique(in_, format_settings_)) + std::make_unique>(in_, format_settings_)) { } - -BinaryFormatReader::BinaryFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_) +template +BinaryFormatReader::BinaryFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_) { } -std::vector BinaryFormatReader::readHeaderRow() +template +std::vector BinaryFormatReader::readHeaderRow() { std::vector fields; String field; @@ -43,13 +45,15 @@ std::vector BinaryFormatReader::readHeaderRow() return fields; } -std::vector BinaryFormatReader::readNames() +template +std::vector BinaryFormatReader::readNames() { readVarUInt(read_columns, *in); return readHeaderRow(); } -std::vector BinaryFormatReader::readTypes() +template +std::vector BinaryFormatReader::readTypes() { auto types = readHeaderRow(); for (const auto & type_name : types) @@ -57,26 +61,40 @@ std::vector BinaryFormatReader::readTypes() return types; } -bool BinaryFormatReader::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/) +template +bool BinaryFormatReader::readField(IColumn & column, const DataTypePtr & /*type*/, const SerializationPtr & serialization, bool /*is_last_file_column*/, const String & /*column_name*/) { + if constexpr (with_defaults) + { + UInt8 is_default; + readBinary(is_default, *in); + if (is_default) + { + column.insertDefault(); + return false; + } + } serialization->deserializeBinary(column, *in, format_settings); return true; } -void BinaryFormatReader::skipHeaderRow() +template +void BinaryFormatReader::skipHeaderRow() { String tmp; for (size_t i = 0; i < read_columns; ++i) readStringBinary(tmp, *in); } -void BinaryFormatReader::skipNames() +template +void BinaryFormatReader::skipNames() { readVarUInt(read_columns, *in); skipHeaderRow(); } -void BinaryFormatReader::skipTypes() +template +void BinaryFormatReader::skipTypes() { if (read_columns == 0) { @@ -87,7 +105,8 @@ void BinaryFormatReader::skipTypes() skipHeaderRow(); } -void BinaryFormatReader::skipField(size_t file_column) +template +void BinaryFormatReader::skipField(size_t file_column) { if (file_column >= read_data_types.size()) throw Exception(ErrorCodes::CANNOT_SKIP_UNKNOWN_FIELD, @@ -111,12 +130,21 @@ void registerInputFormatRowBinary(FormatFactory & factory) const IRowInputFormat::Params & params, const FormatSettings & settings) { - return std::make_shared(buf, sample, params, with_names, with_types, settings); + return std::make_shared>(buf, sample, params, with_names, with_types, settings); }); }; registerWithNamesAndTypes("RowBinary", register_func); factory.registerFileExtension("bin", "RowBinary"); + + factory.registerInputFormat("RowBinaryWithDefaults", []( + ReadBuffer & buf, + const Block & sample, + const IRowInputFormat::Params & params, + const FormatSettings & settings) + { + return std::make_shared>(buf, sample, params, false, false, settings); + }); } void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory) @@ -125,6 +153,8 @@ void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory) { return std::make_shared(buf, settings); }); + + } diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.h b/src/Processors/Formats/Impl/BinaryRowInputFormat.h index 3d3d80f1043..6f2042d1315 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.h +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.h @@ -12,6 +12,7 @@ class ReadBuffer; /** A stream for inputting data in a binary line-by-line format. */ +template class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes { public: @@ -25,6 +26,7 @@ public: std::string getDiagnosticInfo() override { return {}; } }; +template class BinaryFormatReader final : public FormatWithNamesAndTypesReader { public: @@ -54,7 +56,7 @@ public: BinaryWithNamesAndTypesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); private: - BinaryFormatReader reader; + BinaryFormatReader reader; }; } diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index c3685e813d3..e2383d1bfab 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -208,7 +208,7 @@ namespace DB const String & column_name, ColumnPtr & column, const DataTypePtr & column_type, - const PaddedPODArray * null_bytemap, + const PaddedPODArray *, arrow::ArrayBuilder * array_builder, String format_name, size_t start, @@ -231,7 +231,11 @@ namespace DB /// Start new array. components_status = builder.Append(); checkStatus(components_status, nested_column->getName(), format_name); - fillArrowArray(column_name, nested_column, nested_type, null_bytemap, value_builder, format_name, offsets[array_idx - 1], offsets[array_idx], output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values); + + /// Pass null null_map, because fillArrowArray will decide whether nested_type is nullable, if nullable, it will create a new null_map from nested_column + /// Note that it is only needed by gluten(https://github.com/oap-project/gluten), because array type in gluten is by default nullable. + /// And it does not influence the original ClickHouse logic, because null_map passed to fillArrowArrayWithArrayColumnData is always nullptr for ClickHouse doesn't allow nullable complex types including array type. + fillArrowArray(column_name, nested_column, nested_type, nullptr, value_builder, format_name, offsets[array_idx - 1], offsets[array_idx], output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values); } } @@ -680,9 +684,6 @@ namespace DB bool output_fixed_string_as_fixed_byte_array, std::unordered_map & dictionary_values) { - const String column_type_name = column_type->getFamilyName(); - WhichDataType which(column_type); - switch (column_type->getTypeId()) { case TypeIndex::Nullable: @@ -792,7 +793,7 @@ namespace DB FOR_INTERNAL_NUMERIC_TYPES(DISPATCH) #undef DISPATCH default: - throw Exception(ErrorCodes::UNKNOWN_TYPE, "Internal type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type_name, column_name, format_name); + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Internal type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type->getFamilyName(), column_name, format_name); } } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index c17828c6c38..244b906549e 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -283,6 +284,11 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) return true; } +bool CSVFormatReader::allowVariableNumberOfColumns() +{ + return format_settings.csv.allow_variable_number_of_columns; +} + bool CSVFormatReader::readField( IColumn & column, const DataTypePtr & type, @@ -310,17 +316,54 @@ bool CSVFormatReader::readField( return false; } + if (format_settings.csv.use_default_on_bad_values) + return readFieldOrDefault(column, type, serialization); + return readFieldImpl(*buf, column, type, serialization); +} + +bool CSVFormatReader::readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization) +{ if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) { /// If value is null but type is not nullable then use default value instead. - return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); + return SerializationNullable::deserializeTextCSVImpl(column, istr, format_settings, serialization); } /// Read the column normally. - serialization->deserializeTextCSV(column, *buf, format_settings); + serialization->deserializeTextCSV(column, istr, format_settings); return true; } +bool CSVFormatReader::readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization) +{ + String field; + readCSVField(field, *buf, format_settings.csv); + ReadBufferFromString tmp_buf(field); + bool is_bad_value = false; + bool res = false; + + size_t col_size = column.size(); + try + { + res = readFieldImpl(tmp_buf, column, type, serialization); + /// Check if we parsed the whole field successfully. + if (!field.empty() && !tmp_buf.eof()) + is_bad_value = true; + } + catch (const Exception &) + { + is_bad_value = true; + } + + if (!is_bad_value) + return res; + + if (column.size() == col_size + 1) + column.popBack(1); + column.insertDefault(); + return false; +} + void CSVFormatReader::skipPrefixBeforeHeader() { for (size_t i = 0; i != format_settings.csv.skip_first_lines; ++i) @@ -347,6 +390,12 @@ bool CSVFormatReader::checkForSuffix() return false; } +bool CSVFormatReader::checkForEndOfRow() +{ + skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); + return buf->eof() || *buf->position() == '\n' || *buf->position() == '\r'; +} + CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesSchemaReader( buf, diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 0c8099a216c..7b1a1fc433d 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -69,6 +69,9 @@ public: void skipRowEndDelimiter() override; void skipPrefixBeforeHeader() override; + bool checkForEndOfRow() override; + bool allowVariableNumberOfColumns() override; + std::vector readNames() override { return readHeaderRow(); } std::vector readTypes() override { return readHeaderRow(); } std::vector readHeaderRow() { return readRowImpl(); } @@ -86,6 +89,8 @@ public: void setReadBuffer(ReadBuffer & in_) override; FormatSettings::EscapingRule getEscapingRule() const override { return FormatSettings::EscapingRule::CSV; } + bool readFieldImpl(ReadBuffer & istr, DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization); + bool readFieldOrDefault(DB::IColumn & column, const DB::DataTypePtr & type, const DB::SerializationPtr & serialization); protected: PeekableReadBuffer * buf; diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index c056ee2b4a4..d43367a5b44 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -17,12 +17,12 @@ namespace ErrorCodes extern const int INCORRECT_DATA; } -CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings) +CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const CapnProtoSchemaInfo & info, const FormatSettings & format_settings) : IRowInputFormat(std::move(header_), in_, std::move(params_)) , parser(std::make_shared()) { // Parse the schema and fetch the root object - schema = parser->getMessageSchema(info); + schema = parser->getMessageSchema(info.getSchemaInfo()); const auto & header = getPort().getHeader(); serializer = std::make_unique(header.getDataTypes(), header.getNames(), schema, format_settings.capn_proto); } @@ -106,8 +106,12 @@ void registerInputFormatCapnProto(FormatFactory & factory) "CapnProto", [](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings) { - return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings, "CapnProto", true), settings); + return std::make_shared( + buf, + sample, + std::move(params), + CapnProtoSchemaInfo(settings, "CapnProto", sample, settings.capn_proto.use_autogenerated_schema), + settings); }); factory.markFormatSupportsSubsetOfColumns("CapnProto"); factory.registerFileExtension("capnp", "CapnProto"); diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h index 06e94da123f..480be8b760f 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h @@ -24,7 +24,7 @@ class ReadBuffer; class CapnProtoRowInputFormat final : public IRowInputFormat { public: - CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_); + CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const CapnProtoSchemaInfo & info, const FormatSettings & format_settings); String getName() const override { return "CapnProtoRowInputFormat"; } diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index 66a7160dd89..8ab19331b11 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -23,14 +23,14 @@ void CapnProtoOutputStream::write(const void * buffer, size_t size) CapnProtoRowOutputFormat::CapnProtoRowOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & info, + const CapnProtoSchemaInfo & info, const FormatSettings & format_settings) : IRowOutputFormat(header_, out_) , column_names(header_.getNames()) , column_types(header_.getDataTypes()) , output_stream(std::make_unique(out_)) { - schema = schema_parser.getMessageSchema(info); + schema = schema_parser.getMessageSchema(info.getSchemaInfo()); const auto & header = getPort(PortKind::Main).getHeader(); serializer = std::make_unique(header.getDataTypes(), header.getNames(), schema, format_settings.capn_proto); capnp::MallocMessageBuilder message; @@ -52,7 +52,11 @@ void registerOutputFormatCapnProto(FormatFactory & factory) const Block & sample, const FormatSettings & format_settings) { - return std::make_shared(buf, sample, FormatSchemaInfo(format_settings, "CapnProto", true), format_settings); + return std::make_shared( + buf, + sample, + CapnProtoSchemaInfo(format_settings, "CapnProto", sample, format_settings.capn_proto.use_autogenerated_schema), + format_settings); }); } diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h index dd9dcc6b340..c00dceb498e 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h @@ -31,8 +31,8 @@ public: CapnProtoRowOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & info, - const FormatSettings & format_settings_); + const CapnProtoSchemaInfo & info, + const FormatSettings & format_settings); String getName() const override { return "CapnProtoRowOutputFormat"; } diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 5d438d47de6..fe6fb42d0a0 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -177,6 +177,14 @@ private: if (function.name == "lambda") return; + /// Parsing of INTERVALs is quite hacky. Expressions are rewritten during parsing like this: + /// "now() + interval 1 day" -> "now() + toIntervalDay(1)" + /// "select now() + INTERVAL '1 day 1 hour 1 minute'" -> "now() + (toIntervalDay(1), toIntervalHour(1), toIntervalMinute(1))" + /// so the AST is completely different from the original expression . + /// Avoid extracting these literals and simply compare tokens. It makes the template less flexible but much simpler. + if (function.name.starts_with("toInterval")) + return; + FunctionOverloadResolverPtr builder = FunctionFactory::instance().get(function.name, context); /// Do not replace literals which must be constant ColumnNumbers dont_visit_children = builder->getArgumentsThatAreAlwaysConstant(); @@ -350,6 +358,31 @@ ConstantExpressionTemplate::TemplateStructure::TemplateStructure(LiteralsInfo & } +String ConstantExpressionTemplate::TemplateStructure::dumpTemplate() const +{ + WriteBufferFromOwnString res; + + size_t cur_column = 0; + size_t cur_token = 0; + size_t num_columns = literals.columns(); + while (cur_column < num_columns) + { + size_t skip_tokens_until = token_after_literal_idx[cur_column]; + while (cur_token < skip_tokens_until) + res << quote << tokens[cur_token++] << ", "; + + const DataTypePtr & type = literals.getByPosition(cur_column).type; + res << type->getName() << ", "; + ++cur_column; + } + + while (cur_token < tokens.size()) + res << quote << tokens[cur_token++] << ", "; + + res << "eof"; + return res.str(); +} + size_t ConstantExpressionTemplate::TemplateStructure::getTemplateHash(const ASTPtr & expression, const LiteralsInfo & replaced_literals, const DataTypePtr & result_column_type, @@ -369,11 +402,10 @@ size_t ConstantExpressionTemplate::TemplateStructure::getTemplateHash(const ASTP /// Allows distinguish expression in the last column in Values format hash_state.update(salt); - IAST::Hash res128; - hash_state.get128(res128); + const auto res128 = getSipHash128AsPair(hash_state); size_t res = 0; - boost::hash_combine(res, res128.first); - boost::hash_combine(res, res128.second); + boost::hash_combine(res, res128.low64); + boost::hash_combine(res, res128.high64); return res; } diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h index fbb3cbcd22a..71d0d0f7134 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h @@ -31,6 +31,8 @@ class ConstantExpressionTemplate : boost::noncopyable static size_t getTemplateHash(const ASTPtr & expression, const LiteralsInfo & replaced_literals, const DataTypePtr & result_column_type, bool null_as_default, const String & salt); + String dumpTemplate() const; + String result_column_name; std::vector tokens; diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h index 5ab20c796ea..bb52e2aa516 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h @@ -67,7 +67,7 @@ protected: Serializations serializations; std::unique_ptr reader; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; }; diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index e5f52936021..b1b08cdf256 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -236,10 +236,10 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi bool JSONEachRowRowInputFormat::checkEndOfData(bool is_first_row) { - /// We consume , or \n before scanning a new row, instead scanning to next row at the end. + /// We consume ',' or '\n' before scanning a new row, instead scanning to next row at the end. /// The reason is that if we want an exact number of rows read with LIMIT x /// from a streaming table engine with text data format, like File or Kafka - /// then seeking to next ;, or \n would trigger reading of an extra row at the end. + /// then seeking to next ';,' or '\n' would trigger reading of an extra row at the end. /// Semicolon is added for convenience as it could be used at end of INSERT query. if (!in->eof()) diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index f8c9a39eedf..65ea87479a3 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -66,7 +66,7 @@ private: std::unique_ptr reader; Block header; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; }; class NativeOutputFormat final : public IOutputFormat diff --git a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp index 43294355f2f..c8c00086e8c 100644 --- a/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ODBCDriver2BlockOutputFormat.cpp @@ -3,9 +3,6 @@ #include #include #include - - -#include #include diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 016f07731d5..ab4e07376f3 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -125,16 +125,12 @@ void ORCBlockInputFormat::prepareReader() arrow_column_to_ch_column = std::make_unique( getPort().getHeader(), "ORC", - format_settings.orc.import_nested, format_settings.orc.allow_missing_columns, format_settings.null_as_default, format_settings.orc.case_insensitive_column_matching); const bool ignore_case = format_settings.orc.case_insensitive_column_matching; - std::unordered_set nested_table_names; - if (format_settings.orc.import_nested) - nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); - + std::unordered_set nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); for (int i = 0; i < schema->num_fields(); ++i) { const auto & name = schema->field(i)->name(); @@ -171,7 +167,6 @@ void registerInputFormatORC(FormatFactory & factory) { return std::make_shared(buf, sample, settings); }); - factory.markFormatSupportsSubcolumns("ORC"); factory.markFormatSupportsSubsetOfColumns("ORC"); } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index 98561e72e61..7097ea3ac08 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -52,7 +52,7 @@ private: std::vector include_indices; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; const FormatSettings format_settings; const std::unordered_set & skip_stripes; diff --git a/src/Processors/Formats/Impl/OneFormat.cpp b/src/Processors/Formats/Impl/OneFormat.cpp new file mode 100644 index 00000000000..4a9c8caebf3 --- /dev/null +++ b/src/Processors/Formats/Impl/OneFormat.cpp @@ -0,0 +1,57 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +OneInputFormat::OneInputFormat(const Block & header, ReadBuffer & in_) : IInputFormat(header, &in_) +{ + if (header.columns() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "One input format is only suitable for tables with a single column of type UInt8 but the number of columns is {}", + header.columns()); + + if (!WhichDataType(header.getByPosition(0).type).isUInt8()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "One input format is only suitable for tables with a single column of type String but the column type is {}", + header.getByPosition(0).type->getName()); +} + +Chunk OneInputFormat::generate() +{ + if (done) + return {}; + + done = true; + auto column = ColumnUInt8::create(); + column->insertDefault(); + return Chunk(Columns{std::move(column)}, 1); +} + +void registerInputFormatOne(FormatFactory & factory) +{ + factory.registerInputFormat("One", []( + ReadBuffer & buf, + const Block & sample, + const RowInputFormatParams &, + const FormatSettings &) + { + return std::make_shared(sample, buf); + }); +} + +void registerOneSchemaReader(FormatFactory & factory) +{ + factory.registerExternalSchemaReader("One", [](const FormatSettings &) + { + return std::make_shared(); + }); +} + +} diff --git a/src/Processors/Formats/Impl/OneFormat.h b/src/Processors/Formats/Impl/OneFormat.h new file mode 100644 index 00000000000..f73b2dab66a --- /dev/null +++ b/src/Processors/Formats/Impl/OneFormat.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class OneInputFormat final : public IInputFormat +{ +public: + OneInputFormat(const Block & header, ReadBuffer & in_); + + String getName() const override { return "One"; } + +protected: + Chunk generate() override; + +private: + bool done = false; +}; + +class OneSchemaReader: public IExternalSchemaReader +{ +public: + NamesAndTypesList readSchema() override + { + return {{"dummy", std::make_shared()}}; + } +}; + +} diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h index 4495680f5b2..f61dc3fbc78 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h @@ -202,7 +202,7 @@ private: const size_t max_block_size; BlockMissingValues last_block_missing_values; - size_t last_approx_bytes_read_for_chunk; + size_t last_approx_bytes_read_for_chunk = 0; /// Non-atomic because it is used in one thread. std::optional next_block_in_current_unit; diff --git a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp new file mode 100644 index 00000000000..bc4c9ca3b72 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp @@ -0,0 +1,628 @@ +#include "Processors/Formats/Impl/Parquet/Write.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/// This file deals with schema conversion and with repetition and definition levels. + +/// Schema conversion is pretty straightforward. + +/// "Repetition and definition levels" are a somewhat tricky way of encoding information about +/// optional fields and lists. +/// +/// If you don't want to learn how these work, feel free to skip the updateRepDefLevels* functions. +/// All you need to know is: +/// * values for nulls are not encoded, so we have to filter nullable columns, +/// * information about all array lengths and nulls is encoded in the arrays `def` and `rep`, +/// which need to be encoded next to the data, +/// * `def` and `rep` arrays can be longer than `primitive_column`, because they include nulls and +/// empty arrays; the values in primitive_column correspond to positions where def[i] == max_def. +/// +/// If you do want to learn it, dremel paper: https://research.google/pubs/pub36632/ +/// Instead of reading the whole paper, try staring at figures 2-3 for a while - it might be enough. +/// (Why does Parquet do all this instead of just storing array lengths and null masks? I'm not +/// really sure.) +/// +/// We calculate the levels recursively, from inner to outer columns. +/// This means scanning the whole array for each Array/Nullable nesting level, which is probably not +/// the most efficient way to do it. But there's usually at most one nesting level, so it's fine. +/// +/// Most of this is moot because ClickHouse doesn't support nullable arrays or tuples right now, so +/// almost none of the tricky cases can happen. We implement it in full generality anyway (mostly +/// because I only learned the previous sentence after writing most of the code). + + +namespace DB::ErrorCodes +{ + extern const int UNKNOWN_TYPE; + extern const int TOO_DEEP_RECURSION; // I'm 14 and this is deep + extern const int UNKNOWN_COMPRESSION_METHOD; + extern const int LOGICAL_ERROR; +} + +namespace DB::Parquet +{ + +/// Thrift structs that Parquet uses for various metadata inside the parquet file. +namespace parq = parquet::format; + +namespace +{ + +void assertNoDefOverflow(ColumnChunkWriteState & s) +{ + if (s.max_def == UINT8_MAX) + throw Exception(ErrorCodes::TOO_DEEP_RECURSION, + "Column has more than 255 levels of nested Array/Nullable. Impressive! Unfortunately, " + "this is not supported by this Parquet encoder (but is supported by Parquet, if you " + "really need this for some reason)."); +} + +void updateRepDefLevelsAndFilterColumnForNullable(ColumnChunkWriteState & s, const NullMap & null_map) +{ + /// Increment definition levels for non-nulls. + /// Filter the column to contain only non-null values. + + assertNoDefOverflow(s); + ++s.max_def; + + /// Normal case: no arrays or nullables inside this nullable. + if (s.max_def == 1) + { + chassert(s.def.empty()); + s.def.resize(null_map.size()); + for (size_t i = 0; i < s.def.size(); ++i) + s.def[i] = !null_map[i]; + + /// We could be more efficient with this: + /// * Instead of doing the filter() here, we could defer it to writeColumnChunkBody(), at + /// least in the simple case of Nullable(Primitive). Then it'll parallelize if the table + /// consists of one big tuple. + /// * Instead of filtering explicitly, we could build filtering into the data encoder. + /// * Instead of filling out the `def` values above, we could point to null_map and build + /// the '!' into the encoder. + /// None of these seem worth the complexity right now. + s.primitive_column = s.primitive_column->filter(s.def, /*result_size_hint*/ -1); + + return; + } + + /// Weird general case: Nullable(Array), Nullable(Nullable), or any arbitrary nesting like that. + /// This is currently not allowed in ClickHouse, but let's support it anyway just in case. + + IColumn::Filter filter; + size_t row_idx = static_cast(-1); + for (size_t i = 0; i < s.def.size(); ++i) + { + row_idx += s.max_rep == 0 || s.rep[i] == 0; + if (s.def[i] == s.max_def - 1) + filter.push_back(!null_map[row_idx]); + s.def[i] += !null_map[row_idx]; + } + s.primitive_column = s.primitive_column->filter(filter, /*result_size_hint*/ -1); +} + +void updateRepDefLevelsForArray(ColumnChunkWriteState & s, const IColumn::Offsets & offsets) +{ + /// Increment all definition levels. + /// For non-first elements of arrays, increment repetition levels. + /// For empty arrays, insert a zero into repetition and definition levels arrays. + + assertNoDefOverflow(s); + ++s.max_def; + ++s.max_rep; + + /// Common case: no arrays or nullables inside this array. + if (s.max_rep == 1 && s.max_def == 1) + { + s.def.resize_fill(s.primitive_column->size(), 1); + s.rep.resize_fill(s.primitive_column->size(), 1); + size_t i = 0; + for (ssize_t row = 0; row < static_cast(offsets.size()); ++row) + { + size_t n = offsets[row] - offsets[row - 1]; + if (n) + { + s.rep[i] = 0; + i += n; + } + else + { + s.def.push_back(1); + s.rep.push_back(1); + s.def[i] = 0; + s.rep[i] = 0; + i += 1; + } + } + return; + } + + /// General case: Array(Array), Array(Nullable), or any arbitrary nesting like that. + + for (auto & x : s.def) + ++x; + + if (s.max_rep == 1) + s.rep.resize_fill(s.def.size(), 1); + else + for (auto & x : s.rep) + ++x; + + PaddedPODArray mask(s.def.size(), 1); // for inserting zeroes to rep and def + size_t i = 0; // in the input (s.def/s.rep) + size_t empty_arrays = 0; + for (ssize_t row = 0; row < static_cast(offsets.size()); ++row) + { + size_t n = offsets[row] - offsets[row - 1]; + if (n) + { + /// Un-increment the first rep of the array. + /// Skip n "items" in the nested column; first element of each item has rep = 1 + /// (we incremented it above). + chassert(s.rep[i] == 1); + --s.rep[i]; + do + { + ++i; + if (i == s.rep.size()) + { + --n; + chassert(n == 0); + break; + } + n -= s.rep[i] == 1; + } while (n); + } + else + { + mask.push_back(1); + mask[i + empty_arrays] = 0; + ++empty_arrays; + } + } + + if (empty_arrays != 0) + { + expandDataByMask(s.def, mask, false); + expandDataByMask(s.rep, mask, false); + } +} + +parq::CompressionCodec::type compressionMethodToParquet(CompressionMethod c) +{ + switch (c) + { + case CompressionMethod::None: return parq::CompressionCodec::UNCOMPRESSED; + case CompressionMethod::Snappy: return parq::CompressionCodec::SNAPPY; + case CompressionMethod::Gzip: return parq::CompressionCodec::GZIP; + case CompressionMethod::Brotli: return parq::CompressionCodec::BROTLI; + case CompressionMethod::Lz4: return parq::CompressionCodec::LZ4_RAW; + case CompressionMethod::Zstd: return parq::CompressionCodec::ZSTD; + + default: + throw Exception(ErrorCodes::UNKNOWN_COMPRESSION_METHOD, "Compression method {} is not supported by Parquet", toContentEncodingName(c)); + } +} + +/// Depth-first traversal of the schema tree for this column. +void prepareColumnRecursive( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas); + +void preparePrimitiveColumn(ColumnPtr column, DataTypePtr type, const std::string & name, + const WriteOptions & options, ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + /// Add physical column info. + auto & state = states.emplace_back(); + state.primitive_column = column; + state.compression = options.compression; + + state.column_chunk.__isset.meta_data = true; + state.column_chunk.meta_data.__set_path_in_schema({name}); + state.column_chunk.meta_data.__set_codec(compressionMethodToParquet(state.compression)); + + /// Add logical schema leaf. + auto & schema = schemas.emplace_back(); + schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + schema.__set_name(name); + + /// Convert the type enums. + + using T = parq::Type; + using C = parq::ConvertedType; + + auto types = [&](T::type type_, std::optional converted = std::nullopt, std::optional logical = std::nullopt) + { + state.column_chunk.meta_data.__set_type(type_); + schema.__set_type(type_); + if (converted) + schema.__set_converted_type(*converted); + if (logical) + schema.__set_logicalType(*logical); + }; + + auto int_type = [](Int8 bits, bool signed_) + { + parq::LogicalType t; + t.__isset.INTEGER = true; + t.INTEGER.__set_bitWidth(bits); + t.INTEGER.__set_isSigned(signed_); + return t; + }; + + auto fixed_string = [&](size_t size, std::optional converted = std::nullopt, std::optional logical = std::nullopt) + { + state.column_chunk.meta_data.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type_length(static_cast(size)); + if (converted) + schema.__set_converted_type(*converted); + if (logical) + schema.__set_logicalType(*logical); + }; + + auto decimal = [&](Int32 bytes, UInt32 precision, UInt32 scale) + { + state.column_chunk.meta_data.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type_length(bytes); + schema.__set_scale(static_cast(scale)); + schema.__set_precision(static_cast(precision)); + schema.__set_converted_type(parq::ConvertedType::DECIMAL); + parq::DecimalType d; + d.__set_scale(static_cast(scale)); + d.__set_precision(static_cast(precision)); + parq::LogicalType t; + t.__set_DECIMAL(d); + schema.__set_logicalType(t); + }; + + switch (type->getTypeId()) + { + case TypeIndex::UInt8: + if (isBool(type)) + { + types(T::BOOLEAN); + state.is_bool = true; + } + else + { + types(T::INT32, C::UINT_8, int_type(8, false)); + } + break; + case TypeIndex::UInt16: types(T::INT32, C::UINT_16, int_type(16, false)); break; + case TypeIndex::UInt32: types(T::INT32, C::UINT_32, int_type(32, false)); break; + case TypeIndex::UInt64: types(T::INT64, C::UINT_64, int_type(64, false)); break; + case TypeIndex::Int8: types(T::INT32, C::INT_8, int_type(8, true)); break; + case TypeIndex::Int16: types(T::INT32, C::INT_16, int_type(16, true)); break; + case TypeIndex::Int32: types(T::INT32); break; + case TypeIndex::Int64: types(T::INT64); break; + case TypeIndex::Float32: types(T::FLOAT); break; + case TypeIndex::Float64: types(T::DOUBLE); break; + + /// These don't have suitable parquet logical types, so we write them as plain numbers. + /// (Parquet has "enums" but they're just strings, with nowhere to declare all possible enum + /// values in advance as part of the data type.) + case TypeIndex::Enum8: types(T::INT32, C::INT_8, int_type(8, true)); break; // Int8 + case TypeIndex::Enum16: types(T::INT32, C::INT_16, int_type(16, true)); break; // Int16 + case TypeIndex::IPv4: types(T::INT32, C::UINT_32, int_type(32, false)); break; // UInt32 + case TypeIndex::Date: types(T::INT32, C::UINT_16, int_type(16, false)); break; // UInt16 + case TypeIndex::DateTime: types(T::INT32, C::UINT_32, int_type(32, false)); break; // UInt32 + + case TypeIndex::Date32: + { + parq::LogicalType t; + t.__set_DATE({}); + types(T::INT32, C::DATE, t); + break; + } + + case TypeIndex::DateTime64: + { + std::optional converted; + std::optional unit; + switch (assert_cast(*type).getScale()) + { + case 3: + converted = parq::ConvertedType::TIMESTAMP_MILLIS; + unit.emplace().__set_MILLIS({}); + break; + case 6: + converted = parq::ConvertedType::TIMESTAMP_MICROS; + unit.emplace().__set_MICROS({}); + break; + case 9: + unit.emplace().__set_NANOS({}); + break; + } + + std::optional t; + if (unit) + { + parq::TimestampType tt; + tt.__set_isAdjustedToUTC(true); + tt.__set_unit(*unit); + t.emplace().__set_TIMESTAMP(tt); + } + types(T::INT64, converted, t); + break; + } + + case TypeIndex::String: + case TypeIndex::FixedString: + { + if (options.output_fixed_string_as_fixed_byte_array && + type->getTypeId() == TypeIndex::FixedString) + { + fixed_string(assert_cast(*type).getN()); + } + else if (options.output_string_as_string) + { + parq::LogicalType t; + t.__set_STRING({}); + types(T::BYTE_ARRAY, C::UTF8, t); + } + else + { + types(T::BYTE_ARRAY); + } + break; + } + + /// Parquet doesn't have logical types for these. + case TypeIndex::UInt128: fixed_string(16); break; + case TypeIndex::UInt256: fixed_string(32); break; + case TypeIndex::Int128: fixed_string(16); break; + case TypeIndex::Int256: fixed_string(32); break; + case TypeIndex::IPv6: fixed_string(16); break; + + case TypeIndex::Decimal32: decimal(4, getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal64: decimal(8, getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal128: decimal(16, getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal256: decimal(32, getDecimalPrecision(*type), getDecimalScale(*type)); break; + + default: + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Internal type '{}' of column '{}' is not supported for conversion into Parquet data format.", type->getFamilyName(), name); + } +} + +void prepareColumnNullable( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const ColumnNullable * column_nullable = assert_cast(column.get()); + ColumnPtr nested_column = column_nullable->getNestedColumnPtr(); + DataTypePtr nested_type = assert_cast(type.get())->getNestedType(); + const NullMap & null_map = column_nullable->getNullMapData(); + + size_t child_states_begin = states.size(); + size_t child_schema_idx = schemas.size(); + + prepareColumnRecursive(nested_column, nested_type, name, options, states, schemas); + + if (schemas[child_schema_idx].repetition_type == parq::FieldRepetitionType::REQUIRED) + { + /// Normal case: we just slap a FieldRepetitionType::OPTIONAL onto the nested column. + schemas[child_schema_idx].repetition_type = parq::FieldRepetitionType::OPTIONAL; + } + else + { + /// Weird case: Nullable(Nullable(...)). Or Nullable(Tuple(Nullable(...))), etc. + /// This is probably not allowed in ClickHouse, but let's support it just in case. + auto & schema = *schemas.insert(schemas.begin() + child_schema_idx, {}); + schema.__set_repetition_type(parq::FieldRepetitionType::OPTIONAL); + schema.__set_name("nullable"); + schema.__set_num_children(1); + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + path.insert(path.begin(), schema.name + "."); + } + } + + for (size_t i = child_states_begin; i < states.size(); ++i) + { + auto & s = states[i]; + updateRepDefLevelsAndFilterColumnForNullable(s, null_map); + } +} + +void prepareColumnTuple( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const auto * column_tuple = assert_cast(column.get()); + const auto * type_tuple = assert_cast(type.get()); + + auto & tuple_schema = schemas.emplace_back(); + tuple_schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + tuple_schema.__set_name(name); + tuple_schema.__set_num_children(static_cast(type_tuple->getElements().size())); + + size_t child_states_begin = states.size(); + + for (size_t i = 0; i < type_tuple->getElements().size(); ++i) + prepareColumnRecursive(column_tuple->getColumnPtr(i), type_tuple->getElement(i), type_tuple->getNameByPosition(i + 1), options, states, schemas); + + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + /// O(nesting_depth^2), but who cares. + path.insert(path.begin(), name); + } +} + +void prepareColumnArray( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const auto * column_array = assert_cast(column.get()); + ColumnPtr nested_column = column_array->getDataPtr(); + DataTypePtr nested_type = assert_cast(type.get())->getNestedType(); + const auto & offsets = column_array->getOffsets(); + + /// Schema for lists https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists + /// + /// required group `name` (List): + /// repeated group "list": + /// "element" + + /// Add the groups schema. + + schemas.emplace_back(); + schemas.emplace_back(); + auto & list_schema = schemas[schemas.size() - 2]; + auto & item_schema = schemas[schemas.size() - 1]; + + list_schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + list_schema.__set_name(name); + list_schema.__set_num_children(1); + list_schema.__set_converted_type(parq::ConvertedType::LIST); + list_schema.__isset.logicalType = true; + list_schema.logicalType.__set_LIST({}); + + item_schema.__set_repetition_type(parq::FieldRepetitionType::REPEATED); + item_schema.__set_name("list"); + item_schema.__set_num_children(1); + + std::array path_prefix = {list_schema.name, item_schema.name}; + size_t child_states_begin = states.size(); + + /// Recurse. + prepareColumnRecursive(nested_column, nested_type, "element", options, states, schemas); + + /// Update repetition+definition levels and fully-qualified column names (x -> myarray.list.x). + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + path.insert(path.begin(), path_prefix.begin(), path_prefix.end()); + + updateRepDefLevelsForArray(states[i], offsets); + } +} + +void prepareColumnMap( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const auto * column_map = assert_cast(column.get()); + const auto * column_array = &column_map->getNestedColumn(); + const auto & offsets = column_array->getOffsets(); + ColumnPtr column_tuple = column_array->getDataPtr(); + + const auto * map_type = assert_cast(type.get()); + DataTypePtr tuple_type = std::make_shared(map_type->getKeyValueTypes(), Strings{"key", "value"}); + + /// Map is an array of tuples + /// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps + /// + /// required group `name` (Map): + /// repeated group "key_value": + /// reqiured <...> "key" + /// <...> "value" + + auto & map_schema = schemas.emplace_back(); + map_schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + map_schema.__set_name(name); + map_schema.__set_num_children(1); + map_schema.__set_converted_type(parq::ConvertedType::MAP); + map_schema.__set_logicalType({}); + map_schema.logicalType.__set_MAP({}); + + size_t tuple_schema_idx = schemas.size(); + size_t child_states_begin = states.size(); + + prepareColumnTuple(column_tuple, tuple_type, "key_value", options, states, schemas); + + schemas[tuple_schema_idx].__set_repetition_type(parq::FieldRepetitionType::REPEATED); + schemas[tuple_schema_idx].__set_converted_type(parq::ConvertedType::MAP_KEY_VALUE); + + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + path.insert(path.begin(), name); + + updateRepDefLevelsForArray(states[i], offsets); + } +} + +void prepareColumnRecursive( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + switch (type->getTypeId()) + { + case TypeIndex::Nullable: prepareColumnNullable(column, type, name, options, states, schemas); break; + case TypeIndex::Array: prepareColumnArray(column, type, name, options, states, schemas); break; + case TypeIndex::Tuple: prepareColumnTuple(column, type, name, options, states, schemas); break; + case TypeIndex::Map: prepareColumnMap(column, type, name, options, states, schemas); break; + case TypeIndex::LowCardinality: + { + auto nested_type = assert_cast(*type).getDictionaryType(); + if (nested_type->isNullable()) + prepareColumnNullable( + column->convertToFullColumnIfLowCardinality(), nested_type, name, options, states, schemas); + else + /// Use nested data type, but keep ColumnLowCardinality. The encoder can deal with it. + preparePrimitiveColumn(column, nested_type, name, options, states, schemas); + break; + } + default: + preparePrimitiveColumn(column, type, name, options, states, schemas); + break; + } +} + +} + +SchemaElements convertSchema(const Block & sample, const WriteOptions & options) +{ + SchemaElements schema; + auto & root = schema.emplace_back(); + root.__set_name("schema"); + root.__set_num_children(static_cast(sample.columns())); + + for (const auto & c : sample) + prepareColumnForWrite(c.column, c.type, c.name, options, nullptr, &schema); + + return schema; +} + +void prepareColumnForWrite( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates * out_columns_to_write, SchemaElements * out_schema) +{ + if (column->empty() && out_columns_to_write != nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty column passed to Parquet encoder"); + + ColumnChunkWriteStates states; + SchemaElements schemas; + prepareColumnRecursive(column, type, name, options, states, schemas); + + if (out_columns_to_write) + for (auto & s : states) + out_columns_to_write->push_back(std::move(s)); + if (out_schema) + out_schema->insert(out_schema->end(), schemas.begin(), schemas.end()); + + if (column->empty()) + states.clear(); +} + +} diff --git a/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp b/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp new file mode 100644 index 00000000000..2a99b028ae0 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp @@ -0,0 +1,35 @@ +#include +#include + +namespace DB::Parquet +{ + +class WriteBufferTransport : public apache::thrift::transport::TTransport +{ +public: + WriteBuffer & out; + size_t bytes = 0; + + explicit WriteBufferTransport(WriteBuffer & out_) : out(out_) {} + + void write(const uint8_t* buf, uint32_t len) + { + out.write(reinterpret_cast(buf), len); + bytes += len; + } +}; + +template +size_t serializeThriftStruct(const T & obj, WriteBuffer & out) +{ + auto trans = std::make_shared(out); + auto proto = apache::thrift::protocol::TCompactProtocolFactoryT().getProtocol(trans); + obj.write(proto.get()); + return trans->bytes; +} + +template size_t serializeThriftStruct(const parquet::format::PageHeader &, WriteBuffer & out); +template size_t serializeThriftStruct(const parquet::format::ColumnChunk &, WriteBuffer & out); +template size_t serializeThriftStruct(const parquet::format::FileMetaData &, WriteBuffer & out); + +} diff --git a/src/Processors/Formats/Impl/Parquet/ThriftUtil.h b/src/Processors/Formats/Impl/Parquet/ThriftUtil.h new file mode 100644 index 00000000000..1efbe0002d4 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ThriftUtil.h @@ -0,0 +1,17 @@ +#pragma once + +#include // in contrib/arrow/cpp/src/ , generated from parquet.thrift +#include + +namespace DB::Parquet +{ + +/// Returns number of bytes written. +template +size_t serializeThriftStruct(const T & obj, WriteBuffer & out); + +extern template size_t serializeThriftStruct(const parquet::format::PageHeader &, WriteBuffer & out); +extern template size_t serializeThriftStruct(const parquet::format::ColumnChunk &, WriteBuffer & out); +extern template size_t serializeThriftStruct(const parquet::format::FileMetaData &, WriteBuffer & out); + +} diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp new file mode 100644 index 00000000000..22d256b89ed --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -0,0 +1,913 @@ +#include "Processors/Formats/Impl/Parquet/Write.h" +#include "Processors/Formats/Impl/Parquet/ThriftUtil.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "config_version.h" + +#if USE_SNAPPY +#include +#endif + +namespace DB::ErrorCodes +{ + extern const int CANNOT_COMPRESS; + extern const int LIMIT_EXCEEDED; + extern const int LOGICAL_ERROR; +} + +namespace DB::Parquet +{ + +namespace parq = parquet::format; + +namespace +{ + +template +struct StatisticsNumeric +{ + T min = std::numeric_limits::has_infinity + ? std::numeric_limits::infinity() : std::numeric_limits::max(); + T max = std::numeric_limits::has_infinity + ? -std::numeric_limits::infinity() : std::numeric_limits::lowest(); + + void add(SourceType x) + { + min = std::min(min, static_cast(x)); + max = std::max(max, static_cast(x)); + } + + void merge(const StatisticsNumeric & s) + { + min = std::min(min, s.min); + max = std::max(max, s.max); + } + + void clear() { *this = {}; } + + parq::Statistics get(const WriteOptions &) + { + parq::Statistics s; + s.__isset.min_value = s.__isset.max_value = true; + s.min_value.resize(sizeof(T)); + s.max_value.resize(sizeof(T)); + memcpy(s.min_value.data(), &min, sizeof(T)); + memcpy(s.max_value.data(), &max, sizeof(T)); + + if constexpr (std::is_signed::value) + { + s.__set_min(s.min_value); + s.__set_max(s.max_value); + } + return s; + } +}; + +struct StatisticsFixedStringRef +{ + size_t fixed_string_size = UINT64_MAX; + const uint8_t * min = nullptr; + const uint8_t * max = nullptr; + + void add(parquet::FixedLenByteArray a) + { + chassert(fixed_string_size != UINT64_MAX); + addMin(a.ptr); + addMax(a.ptr); + } + + void merge(const StatisticsFixedStringRef & s) + { + chassert(fixed_string_size == UINT64_MAX || fixed_string_size == s.fixed_string_size); + fixed_string_size = s.fixed_string_size; + if (s.min == nullptr) + return; + addMin(s.min); + addMax(s.max); + } + + void clear() { min = max = nullptr; } + + parq::Statistics get(const WriteOptions & options) const + { + parq::Statistics s; + if (min == nullptr || fixed_string_size > options.max_statistics_size) + return s; + s.__set_min_value(std::string(reinterpret_cast(min), fixed_string_size)); + s.__set_max_value(std::string(reinterpret_cast(max), fixed_string_size)); + return s; + } + + void addMin(const uint8_t * p) + { + if (min == nullptr || memcmp(p, min, fixed_string_size) < 0) + min = p; + } + void addMax(const uint8_t * p) + { + if (max == nullptr || memcmp(p, max, fixed_string_size) > 0) + max = p; + } +}; + +template +struct StatisticsFixedStringCopy +{ + bool empty = true; + std::array min {}; + std::array max {}; + + void add(parquet::FixedLenByteArray a) + { + addMin(a.ptr); + addMax(a.ptr); + empty = false; + } + + void merge(const StatisticsFixedStringCopy & s) + { + if (s.empty) + return; + addMin(&s.min[0]); + addMax(&s.max[0]); + empty = false; + } + + void clear() { empty = true; } + + parq::Statistics get(const WriteOptions &) const + { + parq::Statistics s; + if (empty) + return s; + s.__set_min_value(std::string(reinterpret_cast(min.data()), S)); + s.__set_max_value(std::string(reinterpret_cast(max.data()), S)); + return s; + } + + void addMin(const uint8_t * p) + { + if (empty || memcmp(p, min.data(), S) < 0) + memcpy(min.data(), p, S); + } + void addMax(const uint8_t * p) + { + if (empty || memcmp(p, max.data(), S) > 0) + memcpy(max.data(), p, S); + } +}; + +struct StatisticsStringRef +{ + parquet::ByteArray min; + parquet::ByteArray max; + + void add(parquet::ByteArray x) + { + addMin(x); + addMax(x); + } + + void merge(const StatisticsStringRef & s) + { + if (s.min.ptr == nullptr) + return; + addMin(s.min); + addMax(s.max); + } + + void clear() { *this = {}; } + + parq::Statistics get(const WriteOptions & options) const + { + parq::Statistics s; + if (min.ptr == nullptr) + return s; + if (static_cast(min.len) <= options.max_statistics_size) + s.__set_min_value(std::string(reinterpret_cast(min.ptr), static_cast(min.len))); + if (static_cast(max.len) <= options.max_statistics_size) + s.__set_max_value(std::string(reinterpret_cast(max.ptr), static_cast(max.len))); + return s; + } + + void addMin(parquet::ByteArray x) + { + if (min.ptr == nullptr || compare(x, min) < 0) + min = x; + } + + void addMax(parquet::ByteArray x) + { + if (max.ptr == nullptr || compare(x, max) > 0) + max = x; + } + + static int compare(parquet::ByteArray a, parquet::ByteArray b) + { + int t = memcmp(a.ptr, b.ptr, std::min(a.len, b.len)); + if (t != 0) + return t; + return a.len - b.len; + } +}; + +/// The column usually needs to be converted to one of Parquet physical types, e.g. UInt16 -> Int32 +/// or [element of ColumnString] -> std::string_view. +/// We do this conversion in small batches rather than all at once, just before encoding the batch, +/// in hopes of getting better performance through cache locality. +/// The Coverter* structs below are responsible for that. +/// When conversion is not needed, getBatch() will just return pointer into original data. + +template ::value, + To, + typename std::make_unsigned::type>::type> +struct ConverterNumeric +{ + using Statistics = StatisticsNumeric; + + const Col & column; + PODArray buf; + + explicit ConverterNumeric(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const To * getBatch(size_t offset, size_t count) + { + if constexpr (sizeof(*column.getData().data()) == sizeof(To)) + return reinterpret_cast(column.getData().data() + offset); + else + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i] = static_cast(column.getData()[offset + i]); // NOLINT + return buf.data(); + } + } +}; + +struct ConverterString +{ + using Statistics = StatisticsStringRef; + + const ColumnString & column; + PODArray buf; + + explicit ConverterString(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const parquet::ByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + { + StringRef s = column.getDataAt(offset + i); + buf[i] = parquet::ByteArray(static_cast(s.size), reinterpret_cast(s.data)); + } + return buf.data(); + } +}; + +struct ConverterFixedString +{ + using Statistics = StatisticsFixedStringRef; + + const ColumnFixedString & column; + PODArray buf; + + explicit ConverterFixedString(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const parquet::FixedLenByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i].ptr = reinterpret_cast(column.getChars().data() + (offset + i) * column.getN()); + return buf.data(); + } + + size_t fixedStringSize() { return column.getN(); } +}; + +struct ConverterFixedStringAsString +{ + using Statistics = StatisticsStringRef; + + const ColumnFixedString & column; + PODArray buf; + + explicit ConverterFixedStringAsString(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const parquet::ByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i] = parquet::ByteArray(static_cast(column.getN()), reinterpret_cast(column.getChars().data() + (offset + i) * column.getN())); + return buf.data(); + } +}; + +template +struct ConverterNumberAsFixedString +{ + /// Calculate min/max statistics for little-endian fixed strings, not numbers, because parquet + /// doesn't know it's numbers. + using Statistics = StatisticsFixedStringCopy; + + const ColumnVector & column; + PODArray buf; + + explicit ConverterNumberAsFixedString(const ColumnPtr & c) : column(assert_cast &>(*c)) {} + + const parquet::FixedLenByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i].ptr = reinterpret_cast(column.getData().data() + offset + i); + return buf.data(); + } + + size_t fixedStringSize() { return sizeof(T); } +}; + +/// Like ConverterNumberAsFixedString, but converts to big-endian. Because that's the byte order +/// Parquet uses for decimal types and literally nothing else, for some reason. +template +struct ConverterDecimal +{ + using Statistics = StatisticsFixedStringCopy; + + const ColumnDecimal & column; + PODArray data_buf; + PODArray ptr_buf; + + explicit ConverterDecimal(const ColumnPtr & c) : column(assert_cast &>(*c)) {} + + const parquet::FixedLenByteArray * getBatch(size_t offset, size_t count) + { + data_buf.resize(count * sizeof(T)); + ptr_buf.resize(count); + memcpy(data_buf.data(), reinterpret_cast(column.getData().data() + offset), count * sizeof(T)); + for (size_t i = 0; i < count; ++i) + { + std::reverse(data_buf.data() + i * sizeof(T), data_buf.data() + (i + 1) * sizeof(T)); + ptr_buf[i].ptr = data_buf.data() + i * sizeof(T); + } + return ptr_buf.data(); + } + + size_t fixedStringSize() { return sizeof(T); } +}; + +/// Returns either `source` or `scratch`. +PODArray & compress(PODArray & source, PODArray & scratch, CompressionMethod method) +{ + /// We could use wrapWriteBufferWithCompressionMethod() for everything, but I worry about the + /// overhead of creating a bunch of WriteBuffers on each page (thousands of values). + switch (method) + { + case CompressionMethod::None: + return source; + + case CompressionMethod::Lz4: + { + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wold-style-cast" + + size_t max_dest_size = LZ4_COMPRESSBOUND(source.size()); + + #pragma clang diagnostic pop + + if (max_dest_size > std::numeric_limits::max()) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(source.size())); + + scratch.resize(max_dest_size); + + int compressed_size = LZ4_compress_default( + source.data(), + scratch.data(), + static_cast(source.size()), + static_cast(max_dest_size)); + + scratch.resize(static_cast(compressed_size)); + return scratch; + } + +#if USE_SNAPPY + case CompressionMethod::Snappy: + { + size_t max_dest_size = snappy::MaxCompressedLength(source.size()); + + if (max_dest_size > std::numeric_limits::max()) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(source.size())); + + scratch.resize(max_dest_size); + + size_t compressed_size; + snappy::RawCompress(source.data(), source.size(), scratch.data(), &compressed_size); + + scratch.resize(static_cast(compressed_size)); + return scratch; + } +#endif + + default: + { + auto dest_buf = std::make_unique>>(scratch); + auto compressed_buf = wrapWriteBufferWithCompressionMethod( + std::move(dest_buf), + method, + /*level*/ 3, + source.size(), + /*existing_memory*/ source.data()); + chassert(compressed_buf->position() == source.data()); + chassert(compressed_buf->available() == source.size()); + compressed_buf->position() += source.size(); + compressed_buf->finalize(); + return scratch; + } + } +} + +void encodeRepDefLevelsRLE(const UInt8 * data, size_t size, UInt8 max_level, PODArray & out) +{ + using arrow::util::RleEncoder; + + chassert(max_level > 0); + size_t offset = out.size(); + size_t prefix_size = sizeof(Int32); + + int bit_width = bitScanReverse(max_level) + 1; + int max_rle_size = RleEncoder::MaxBufferSize(bit_width, static_cast(size)) + + RleEncoder::MinBufferSize(bit_width); + + out.resize(offset + prefix_size + max_rle_size); + + RleEncoder encoder(reinterpret_cast(out.data() + offset + prefix_size), max_rle_size, bit_width); + for (size_t i = 0; i < size; ++i) + encoder.Put(data[i]); + encoder.Flush(); + Int32 len = encoder.len(); + + memcpy(out.data() + offset, &len, prefix_size); + out.resize(offset + prefix_size + len); +} + +void addToEncodingsUsed(ColumnChunkWriteState & s, parq::Encoding::type e) +{ + if (!std::count(s.column_chunk.meta_data.encodings.begin(), s.column_chunk.meta_data.encodings.end(), e)) + s.column_chunk.meta_data.encodings.push_back(e); +} + +void writePage(const parq::PageHeader & header, const PODArray & compressed, ColumnChunkWriteState & s, WriteBuffer & out) +{ + size_t header_size = serializeThriftStruct(header, out); + out.write(compressed.data(), compressed.size()); + + /// Remember first data page and first dictionary page. + if (header.__isset.data_page_header && s.column_chunk.meta_data.data_page_offset == -1) + s.column_chunk.meta_data.__set_data_page_offset(s.column_chunk.meta_data.total_compressed_size); + if (header.__isset.dictionary_page_header && !s.column_chunk.meta_data.__isset.dictionary_page_offset) + s.column_chunk.meta_data.__set_dictionary_page_offset(s.column_chunk.meta_data.total_compressed_size); + + s.column_chunk.meta_data.total_uncompressed_size += header.uncompressed_page_size + header_size; + s.column_chunk.meta_data.total_compressed_size += header.compressed_page_size + header_size; +} + +template +void writeColumnImpl( + ColumnChunkWriteState & s, const WriteOptions & options, WriteBuffer & out, Converter && converter) +{ + size_t num_values = s.max_def > 0 ? s.def.size() : s.primitive_column->size(); + auto encoding = options.encoding; + + typename Converter::Statistics page_statistics; + typename Converter::Statistics total_statistics; + + bool use_dictionary = options.use_dictionary_encoding && !s.is_bool; + + std::optional fixed_string_descr; + if constexpr (std::is_same::value) + { + /// This just communicates one number to MakeTypedEncoder(): the fixed string length. + fixed_string_descr.emplace(parquet::schema::PrimitiveNode::Make( + "", parquet::Repetition::REQUIRED, parquet::Type::FIXED_LEN_BYTE_ARRAY, + parquet::ConvertedType::NONE, static_cast(converter.fixedStringSize())), 0, 0); + + if constexpr (std::is_same::value) + page_statistics.fixed_string_size = converter.fixedStringSize(); + } + + /// Could use an arena here (by passing a custom MemoryPool), to reuse memory across pages. + /// Alternatively, we could avoid using arrow's dictionary encoding code and leverage + /// ColumnLowCardinality instead. It would work basically the same way as what this function + /// currently does: add values to the ColumnRowCardinality (instead of `encoder`) in batches, + /// checking dictionary size after each batch. That might be faster. + auto encoder = parquet::MakeTypedEncoder( + // ignored if using dictionary + static_cast(encoding), + use_dictionary, fixed_string_descr ? &*fixed_string_descr : nullptr); + + struct PageData + { + parq::PageHeader header; + PODArray data; + }; + std::vector dict_encoded_pages; // can't write them out until we have full dictionary + + /// Reused across pages to reduce number of allocations and improve locality. + PODArray encoded; + PODArray compressed_maybe; + + /// Start of current page. + size_t def_offset = 0; // index in def and rep + size_t data_offset = 0; // index in primitive_column + + auto flush_page = [&](size_t def_count, size_t data_count) + { + encoded.clear(); + + /// Concatenate encoded rep, def, and data. + + if (s.max_rep > 0) + encodeRepDefLevelsRLE(s.rep.data() + def_offset, def_count, s.max_rep, encoded); + if (s.max_def > 0) + encodeRepDefLevelsRLE(s.def.data() + def_offset, def_count, s.max_def, encoded); + + std::shared_ptr values = encoder->FlushValues(); // resets it for next page + + encoded.resize(encoded.size() + values->size()); + memcpy(encoded.data() + encoded.size() - values->size(), values->data(), values->size()); + values.reset(); + + if (encoded.size() > INT32_MAX) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Uncompressed page is too big: {}", encoded.size()); + + size_t uncompressed_size = encoded.size(); + auto & compressed = compress(encoded, compressed_maybe, s.compression); + + if (compressed.size() > INT32_MAX) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Compressed page is too big: {}", compressed.size()); + + parq::PageHeader header; + header.__set_type(parq::PageType::DATA_PAGE); + header.__set_uncompressed_page_size(static_cast(uncompressed_size)); + header.__set_compressed_page_size(static_cast(compressed.size())); + header.__isset.data_page_header = true; + auto & d = header.data_page_header; + d.__set_num_values(static_cast(def_count)); + d.__set_encoding(use_dictionary ? parq::Encoding::RLE_DICTIONARY : encoding); + d.__set_definition_level_encoding(parq::Encoding::RLE); + d.__set_repetition_level_encoding(parq::Encoding::RLE); + /// We could also put checksum in `header.crc`, but apparently no one uses it: + /// https://issues.apache.org/jira/browse/PARQUET-594 + + if (options.write_page_statistics) + { + d.__set_statistics(page_statistics.get(options)); + + if (s.max_def == 1 && s.max_rep == 0) + d.statistics.__set_null_count(static_cast(def_count - data_count)); + } + + total_statistics.merge(page_statistics); + page_statistics.clear(); + + if (use_dictionary) + { + dict_encoded_pages.push_back({.header = std::move(header)}); + std::swap(dict_encoded_pages.back().data, compressed); + } + else + { + writePage(header, compressed, s, out); + } + + def_offset += def_count; + data_offset += data_count; + }; + + auto flush_dict = [&] -> bool + { + auto * dict_encoder = dynamic_cast *>(encoder.get()); + int dict_size = dict_encoder->dict_encoded_size(); + + encoded.resize(static_cast(dict_size)); + dict_encoder->WriteDict(reinterpret_cast(encoded.data())); + + auto & compressed = compress(encoded, compressed_maybe, s.compression); + + if (compressed.size() > INT32_MAX) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Compressed dictionary page is too big: {}", compressed.size()); + + parq::PageHeader header; + header.__set_type(parq::PageType::DICTIONARY_PAGE); + header.__set_uncompressed_page_size(dict_size); + header.__set_compressed_page_size(static_cast(compressed.size())); + header.__isset.dictionary_page_header = true; + header.dictionary_page_header.__set_num_values(dict_encoder->num_entries()); + header.dictionary_page_header.__set_encoding(parq::Encoding::PLAIN); + + writePage(header, compressed, s, out); + + for (auto & p : dict_encoded_pages) + writePage(p.header, p.data, s, out); + + dict_encoded_pages.clear(); + encoder.reset(); + + return true; + }; + + auto is_dict_too_big = [&] { + auto * dict_encoder = dynamic_cast *>(encoder.get()); + int dict_size = dict_encoder->dict_encoded_size(); + return static_cast(dict_size) >= options.dictionary_size_limit; + }; + + while (def_offset < num_values) + { + /// Pick enough data for a page. + size_t next_def_offset = def_offset; + size_t next_data_offset = data_offset; + while (true) + { + /// Bite off a batch of defs and corresponding data values. + size_t def_count = std::min(options.write_batch_size, num_values - next_def_offset); + size_t data_count = 0; + if (s.max_def == 0) + data_count = def_count; + else + for (size_t i = 0; i < def_count; ++i) + data_count += s.def[next_def_offset + i] == s.max_def; + + /// Encode the data (but not the levels yet), so that we can estimate its encoded size. + const typename ParquetDType::c_type * converted = converter.getBatch(next_data_offset, data_count); + + if (options.write_page_statistics || options.write_column_chunk_statistics) +/// Workaround for clang bug: https://github.com/llvm/llvm-project/issues/63630 +#ifdef MEMORY_SANITIZER +#pragma clang loop vectorize(disable) +#endif + for (size_t i = 0; i < data_count; ++i) + page_statistics.add(converted[i]); + + encoder->Put(converted, static_cast(data_count)); + + next_def_offset += def_count; + next_data_offset += data_count; + + if (use_dictionary && is_dict_too_big()) + { + /// Fallback to non-dictionary encoding. + /// + /// Discard encoded data and start over. + /// This is different from what arrow does: arrow writes out the dictionary-encoded + /// data, then uses non-dictionary encoding for later pages. + /// Starting over seems better: it produces slightly smaller files (I saw 1-4%) in + /// exchange for slight decrease in speed (I saw < 5%). This seems like a good + /// trade because encoding speed is much less important than decoding (as evidenced + /// by arrow not supporting parallel encoding, even though it's easy to support). + + def_offset = 0; + data_offset = 0; + dict_encoded_pages.clear(); + use_dictionary = false; + +#ifndef NDEBUG + /// Arrow's DictEncoderImpl destructor asserts that FlushValues() was called, so we + /// call it even though we don't need its output. + encoder->FlushValues(); +#endif + + encoder = parquet::MakeTypedEncoder( + static_cast(encoding), /* use_dictionary */ false, + fixed_string_descr ? &*fixed_string_descr : nullptr); + break; + } + + if (next_def_offset == num_values || + static_cast(encoder->EstimatedDataEncodedSize()) >= options.data_page_size) + { + flush_page(next_def_offset - def_offset, next_data_offset - data_offset); + break; + } + } + } + + if (use_dictionary) + flush_dict(); + + chassert(data_offset == s.primitive_column->size()); + + if (options.write_column_chunk_statistics) + { + s.column_chunk.meta_data.__set_statistics(total_statistics.get(options)); + + if (s.max_def == 1 && s.max_rep == 0) + s.column_chunk.meta_data.statistics.__set_null_count(static_cast(def_offset - data_offset)); + } + + /// Report which encodings we've used. + if (s.max_rep > 0 || s.max_def > 0) + addToEncodingsUsed(s, parq::Encoding::RLE); // levels + if (use_dictionary) + { + addToEncodingsUsed(s, parq::Encoding::PLAIN); // dictionary itself + addToEncodingsUsed(s, parq::Encoding::RLE_DICTIONARY); // ids + } + else + { + addToEncodingsUsed(s, encoding); + } +} + +} + +void writeColumnChunkBody(ColumnChunkWriteState & s, const WriteOptions & options, WriteBuffer & out) +{ + s.column_chunk.meta_data.__set_num_values(s.max_def > 0 ? s.def.size() : s.primitive_column->size()); + + /// We'll be updating these as we go. + s.column_chunk.meta_data.__set_encodings({}); + s.column_chunk.meta_data.__set_total_compressed_size(0); + s.column_chunk.meta_data.__set_total_uncompressed_size(0); + s.column_chunk.meta_data.__set_data_page_offset(-1); + + s.primitive_column = s.primitive_column->convertToFullColumnIfLowCardinality(); + + switch (s.primitive_column->getDataType()) + { + /// Numeric conversion to Int32 or Int64. + #define N(source_type, parquet_dtype) \ + writeColumnImpl(s, options, out, \ + ConverterNumeric, parquet::parquet_dtype::c_type>( \ + s.primitive_column)) + + case TypeIndex::UInt8: + if (s.is_bool) + writeColumnImpl(s, options, out, + ConverterNumeric, bool, bool>(s.primitive_column)); + else + N(UInt8, Int32Type); + break; + case TypeIndex::UInt16 : N(UInt16, Int32Type); break; + case TypeIndex::UInt32 : N(UInt32, Int32Type); break; + case TypeIndex::UInt64 : N(UInt64, Int64Type); break; + case TypeIndex::Int8 : N(Int8, Int32Type); break; + case TypeIndex::Int16 : N(Int16, Int32Type); break; + case TypeIndex::Int32 : N(Int32, Int32Type); break; + case TypeIndex::Int64 : N(Int64, Int64Type); break; + + case TypeIndex::Enum8: N(Int8, Int32Type); break; + case TypeIndex::Enum16: N(Int16, Int32Type); break; + case TypeIndex::Date: N(UInt16, Int32Type); break; + case TypeIndex::Date32: N(Int32, Int32Type); break; + case TypeIndex::DateTime: N(UInt32, Int32Type); break; + + #undef N + + case TypeIndex::Float32: + writeColumnImpl( + s, options, out, ConverterNumeric, Float32, Float32>( + s.primitive_column)); + break; + + case TypeIndex::Float64: + writeColumnImpl( + s, options, out, ConverterNumeric, Float64, Float64>( + s.primitive_column)); + break; + + case TypeIndex::DateTime64: + writeColumnImpl( + s, options, out, ConverterNumeric, Int64, Int64>( + s.primitive_column)); + break; + + case TypeIndex::IPv4: + writeColumnImpl( + s, options, out, ConverterNumeric, Int32, UInt32>( + s.primitive_column)); + break; + + case TypeIndex::String: + writeColumnImpl( + s, options, out, ConverterString(s.primitive_column)); + break; + + case TypeIndex::FixedString: + if (options.output_fixed_string_as_fixed_byte_array) + writeColumnImpl( + s, options, out, ConverterFixedString(s.primitive_column)); + else + writeColumnImpl( + s, options, out, ConverterFixedStringAsString(s.primitive_column)); + break; + + #define F(source_type) \ + writeColumnImpl( \ + s, options, out, ConverterNumberAsFixedString(s.primitive_column)) + case TypeIndex::UInt128: F(UInt128); break; + case TypeIndex::UInt256: F(UInt256); break; + case TypeIndex::Int128: F(Int128); break; + case TypeIndex::Int256: F(Int256); break; + case TypeIndex::IPv6: F(IPv6); break; + #undef F + + #define D(source_type) \ + writeColumnImpl( \ + s, options, out, ConverterDecimal(s.primitive_column)) + case TypeIndex::Decimal32: D(Decimal32); break; + case TypeIndex::Decimal64: D(Decimal64); break; + case TypeIndex::Decimal128: D(Decimal128); break; + case TypeIndex::Decimal256: D(Decimal256); break; + #undef D + + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column type: {}", s.primitive_column->getFamilyName()); + } + + /// Free some memory. + s.primitive_column = {}; + s.def = {}; + s.rep = {}; +} + +void writeFileHeader(WriteBuffer & out) +{ + /// Write the magic bytes. We're a wizard now. + out.write("PAR1", 4); +} + +parq::ColumnChunk finalizeColumnChunkAndWriteFooter( + size_t offset_in_file, ColumnChunkWriteState s, const WriteOptions &, WriteBuffer & out) +{ + if (s.column_chunk.meta_data.data_page_offset != -1) + s.column_chunk.meta_data.data_page_offset += offset_in_file; + if (s.column_chunk.meta_data.__isset.dictionary_page_offset) + s.column_chunk.meta_data.dictionary_page_offset += offset_in_file; + s.column_chunk.file_offset = offset_in_file + s.column_chunk.meta_data.total_compressed_size; + + serializeThriftStruct(s.column_chunk, out); + + return s.column_chunk; +} + +parq::RowGroup makeRowGroup(std::vector column_chunks, size_t num_rows) +{ + parq::RowGroup r; + r.__set_num_rows(num_rows); + r.__set_columns(column_chunks); + r.__set_total_compressed_size(0); + for (auto & c : r.columns) + { + r.total_byte_size += c.meta_data.total_uncompressed_size; + r.total_compressed_size += c.meta_data.total_compressed_size; + } + if (!r.columns.empty()) + { + auto & m = r.columns[0].meta_data; + r.__set_file_offset(m.__isset.dictionary_page_offset ? m.dictionary_page_offset : m.data_page_offset); + } + return r; +} + +void writeFileFooter(std::vector row_groups, SchemaElements schema, const WriteOptions & options, WriteBuffer & out) +{ + parq::FileMetaData meta; + meta.version = 2; + meta.schema = std::move(schema); + meta.row_groups = std::move(row_groups); + for (auto & r : meta.row_groups) + meta.num_rows += r.num_rows; + meta.__set_created_by(VERSION_NAME " " VERSION_DESCRIBE); + + if (options.write_page_statistics || options.write_column_chunk_statistics) + { + meta.__set_column_orders({}); + for (auto & s : meta.schema) + if (!s.__isset.num_children) + meta.column_orders.emplace_back(); + for (auto & c : meta.column_orders) + c.__set_TYPE_ORDER({}); + } + + size_t footer_size = serializeThriftStruct(meta, out); + + if (footer_size > INT32_MAX) + throw Exception(ErrorCodes::LIMIT_EXCEEDED, "Parquet file metadata too big: {}", footer_size); + + writeIntBinary(static_cast(footer_size), out); + out.write("PAR1", 4); +} + +} diff --git a/src/Processors/Formats/Impl/Parquet/Write.h b/src/Processors/Formats/Impl/Parquet/Write.h new file mode 100644 index 00000000000..9197eae5384 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/Write.h @@ -0,0 +1,136 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB::Parquet +{ + +/// A good resource for learning how Parquet format works is +/// contrib/arrow/cpp/src/parquet/parquet.thrift + +struct WriteOptions +{ + bool output_string_as_string = false; + bool output_fixed_string_as_fixed_byte_array = true; + + CompressionMethod compression = CompressionMethod::Lz4; + + size_t data_page_size = 1024 * 1024; + size_t write_batch_size = 1024; + + bool use_dictionary_encoding = true; + size_t dictionary_size_limit = 1024 * 1024; + /// If using dictionary, this encoding is used as a fallback when dictionary gets too big. + /// Otherwise, this is used for everything. + parquet::format::Encoding::type encoding = parquet::format::Encoding::PLAIN; + + bool write_page_statistics = true; + bool write_column_chunk_statistics = true; + size_t max_statistics_size = 4096; +}; + +/// Information about a primitive column (leaf of the schema tree) to write to Parquet file. +struct ColumnChunkWriteState +{ + /// After writeColumnChunkBody(), offsets in this struct are relative to the start of column chunk. + /// Then finalizeColumnChunkAndWriteFooter() fixes them up before writing to file. + parquet::format::ColumnChunk column_chunk; + + ColumnPtr primitive_column; + CompressionMethod compression; // must match what's inside column_chunk + bool is_bool = false; + + /// Repetition and definition levels. Produced by prepareColumnForWrite(). + /// def is empty iff max_def == 0, which means no arrays or nullables. + /// rep is empty iff max_rep == 0, which means no arrays. + PaddedPODArray def; // definition levels + PaddedPODArray rep; // repetition levels + /// Max possible levels, according to schema. Actual max in def/rep may be smaller. + UInt8 max_def = 0; + UInt8 max_rep = 0; + + ColumnChunkWriteState() = default; + /// Prevent accidental copying. + ColumnChunkWriteState(ColumnChunkWriteState &&) = default; + ColumnChunkWriteState & operator=(ColumnChunkWriteState &&) = default; + + /// Estimated memory usage. + size_t allocatedBytes() const + { + size_t r = def.allocated_bytes() + rep.allocated_bytes(); + if (primitive_column) + r += primitive_column->allocatedBytes(); + return r; + } +}; + +using SchemaElements = std::vector; +using ColumnChunkWriteStates = std::vector; + +/// Parquet file consists of row groups, which consist of column chunks. +/// +/// Column chunks can be encoded mostly independently of each other, in parallel. +/// But there are two small complications: +/// 1. One ClickHouse column can translate to multiple leaf columns in parquet. +/// E.g. tuples and maps. +/// If all primitive columns are in one big tuple, we'd like to encode them in parallel too, +/// even though they're one top-level ClickHouse column. +/// 2. At the end of each encoded column chunk there's a footer (struct ColumnMetaData) that +/// contains some absolute offsets in the file. We can't encode it until we know the exact +/// position in the file where the column chunk will go. So these footers have to be serialized +/// sequentially, after we know sizes of all previous column chunks. +/// +/// With that in mind, here's how to write a parquet file: +/// +/// (1) writeFileHeader() +/// (2) For each row group: +/// | (3) For each ClickHouse column: +/// | (4) Call prepareColumnForWrite(). +/// | It'll produce one or more ColumnChunkWriteStates, corresponding to primitive columns that +/// | we need to write. +/// | It'll also produce SchemaElements as a byproduct, describing the logical types and +/// | groupings of the physical columns (e.g. tuples, arrays, maps). +/// | (5) For each ColumnChunkWriteState: +/// | (6) Call writeColumnChunkBody() to write the actual data to the given WriteBuffer. +/// | (7) Call finalizeColumnChunkAndWriteFooter() to write the footer of the column chunk. +/// | (8) Call makeRowGroup() using the ColumnChunk metadata structs from previous step. +/// (9) Call writeFileFooter() using the row groups from previous step and SchemaElements from +/// convertSchema(). +/// +/// Steps (4) and (6) can be parallelized, both within and across row groups. + +/// Parquet schema is a tree of SchemaElements, flattened into a list in depth-first order. +/// Leaf nodes correspond to physical columns of primitive types. Inner nodes describe logical +/// groupings of those columns, e.g. tuples or structs. +SchemaElements convertSchema(const Block & sample, const WriteOptions & options); + +void prepareColumnForWrite( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates * out_columns_to_write, SchemaElements * out_schema = nullptr); + +void writeFileHeader(WriteBuffer & out); + +/// Encodes a column chunk, without the footer. +/// The ColumnChunkWriteState-s should then passed to finalizeColumnChunkAndWriteFooter(). +void writeColumnChunkBody(ColumnChunkWriteState & s, const WriteOptions & options, WriteBuffer & out); + +/// Unlike most of the column chunk data, the footer (`ColumnMetaData`) needs to know its absolute +/// offset in the file. So we encode it separately, after all previous row groups and column chunks +/// have been encoded. +/// (If you're wondering if the 8-byte offset values can be patched inside the encoded blob - no, +/// they're varint-encoded and can't be padded to a fixed length.) +/// `offset_in_file` is the absolute position in the file where the writeColumnChunkBody()'s output +/// starts. +/// Returns a ColumnChunk to add to the RowGroup. +parquet::format::ColumnChunk finalizeColumnChunkAndWriteFooter( + size_t offset_in_file, ColumnChunkWriteState s, const WriteOptions & options, WriteBuffer & out); + +parquet::format::RowGroup makeRowGroup(std::vector column_chunks, size_t num_rows); + +void writeFileFooter(std::vector row_groups, SchemaElements schema, const WriteOptions & options, WriteBuffer & out); + +} diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 3dde8ad6a6c..902a02130aa 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -59,7 +59,12 @@ ParquetBlockInputFormat::ParquetBlockInputFormat( pool = std::make_unique(CurrentMetrics::ParquetDecoderThreads, CurrentMetrics::ParquetDecoderThreadsActive, max_decoding_threads); } -ParquetBlockInputFormat::~ParquetBlockInputFormat() = default; +ParquetBlockInputFormat::~ParquetBlockInputFormat() +{ + is_stopped = true; + if (pool) + pool->wait(); +} void ParquetBlockInputFormat::initializeIfNeeded() { @@ -143,7 +148,6 @@ void ParquetBlockInputFormat::initializeRowGroupReader(size_t row_group_idx) row_group.arrow_column_to_ch_column = std::make_unique( getPort().getHeader(), "Parquet", - format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns, format_settings.null_as_default, format_settings.parquet.case_insensitive_column_matching); @@ -415,7 +419,6 @@ void registerInputFormatParquet(FormatFactory & factory) max_parsing_threads, min_bytes_for_seek); }); - factory.markFormatSupportsSubcolumns("Parquet"); factory.markFormatSupportsSubsetOfColumns("Parquet"); } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index a14c51f8b9f..dc14edf2099 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -273,7 +273,7 @@ private: std::unique_ptr pool; BlockMissingValues previous_block_missing_values; - size_t previous_approx_bytes_read_for_chunk; + size_t previous_approx_bytes_read_for_chunk = 0; std::exception_ptr background_exception = nullptr; std::atomic is_stopped{0}; diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 91840cd2c50..fbf8b3a7c87 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -3,14 +3,23 @@ #if USE_PARQUET #include +#include #include #include "ArrowBufferedStreams.h" #include "CHColumnToArrowColumn.h" +namespace CurrentMetrics +{ + extern const Metric ParquetEncoderThreads; + extern const Metric ParquetEncoderThreadsActive; +} + namespace DB { +using namespace Parquet; + namespace ErrorCodes { extern const int UNKNOWN_EXCEPTION; @@ -59,19 +68,229 @@ namespace if (method == FormatSettings::ParquetCompression::GZIP) return parquet::Compression::type::GZIP; - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported parquet compression method"); } - } ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) : IOutputFormat(header_, out_), format_settings{format_settings_} { + if (format_settings.parquet.use_custom_encoder) + { + if (format_settings.parquet.parallel_encoding && format_settings.max_threads > 1) + pool = std::make_unique( + CurrentMetrics::ParquetEncoderThreads, CurrentMetrics::ParquetEncoderThreadsActive, + format_settings.max_threads); + + using C = FormatSettings::ParquetCompression; + switch (format_settings.parquet.output_compression_method) + { + case C::NONE: options.compression = CompressionMethod::None; break; + case C::SNAPPY: options.compression = CompressionMethod::Snappy; break; + case C::ZSTD: options.compression = CompressionMethod::Zstd; break; + case C::LZ4: options.compression = CompressionMethod::Lz4; break; + case C::GZIP: options.compression = CompressionMethod::Gzip; break; + case C::BROTLI: options.compression = CompressionMethod::Brotli; break; + } + options.output_string_as_string = format_settings.parquet.output_string_as_string; + options.output_fixed_string_as_fixed_byte_array = format_settings.parquet.output_fixed_string_as_fixed_byte_array; + options.data_page_size = format_settings.parquet.data_page_size; + options.write_batch_size = format_settings.parquet.write_batch_size; + + schema = convertSchema(header_, options); + } } -void ParquetBlockOutputFormat::consumeStaged() +ParquetBlockOutputFormat::~ParquetBlockOutputFormat() { - const size_t columns_num = staging_chunks.at(0).getNumColumns(); + if (pool) + { + is_stopped = true; + pool->wait(); + } +} + +void ParquetBlockOutputFormat::consume(Chunk chunk) +{ + /// Poll background tasks. + if (pool) + { + std::unique_lock lock(mutex); + while (true) + { + /// If some row groups are ready to be written to the file, write them. + reapCompletedRowGroups(lock); + + if (background_exception) + std::rethrow_exception(background_exception); + + if (is_stopped) + return; + + /// If there's too much work in flight, wait for some of it to complete. + if (row_groups.size() < 2) + break; + if (bytes_in_flight <= format_settings.parquet.row_group_bytes * 4 && + task_queue.size() <= format_settings.max_threads * 4) + break; + + condvar.wait(lock); + } + } + + /// Do something like SquashingTransform to produce big enough row groups. + /// Because the real SquashingTransform is only used for INSERT, not for SELECT ... INTO OUTFILE. + /// The latter doesn't even have a pipeline where a transform could be inserted, so it's more + /// convenient to do the squashing here. It's also parallelized here. + + if (chunk.getNumRows() != 0) + { + staging_rows += chunk.getNumRows(); + staging_bytes += chunk.bytes(); + staging_chunks.push_back(std::move(chunk)); + } + + const size_t target_rows = std::max(static_cast(1), format_settings.parquet.row_group_rows); + + if (staging_rows < target_rows && + staging_bytes < format_settings.parquet.row_group_bytes) + return; + + /// In the rare case that more than `row_group_rows` rows arrived in one chunk, split the + /// staging chunk into multiple row groups. + if (staging_rows >= target_rows * 2) + { + /// Increase row group size slightly (by < 2x) to avoid a small row group at the end. + size_t num_row_groups = std::max(static_cast(1), staging_rows / target_rows); + size_t row_group_size = (staging_rows - 1) / num_row_groups + 1; // round up + + Chunk concatenated = std::move(staging_chunks[0]); + for (size_t i = 1; i < staging_chunks.size(); ++i) + concatenated.append(staging_chunks[i]); + staging_chunks.clear(); + + for (size_t offset = 0; offset < staging_rows; offset += row_group_size) + { + size_t count = std::min(row_group_size, staging_rows - offset); + MutableColumns columns = concatenated.cloneEmptyColumns(); + for (size_t i = 0; i < columns.size(); ++i) + columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); + + Chunks piece; + piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); + writeRowGroup(std::move(piece)); + } + } + else + { + writeRowGroup(std::move(staging_chunks)); + } + + staging_chunks.clear(); + staging_rows = 0; + staging_bytes = 0; +} + +void ParquetBlockOutputFormat::finalizeImpl() +{ + if (!staging_chunks.empty()) + writeRowGroup(std::move(staging_chunks)); + + if (format_settings.parquet.use_custom_encoder) + { + if (pool) + { + std::unique_lock lock(mutex); + + /// Wait for background work to complete. + while (true) + { + reapCompletedRowGroups(lock); + + if (background_exception) + std::rethrow_exception(background_exception); + + if (is_stopped) + return; + + if (row_groups.empty()) + break; + + condvar.wait(lock); + } + } + + if (row_groups_complete.empty()) + { + base_offset = out.count(); + writeFileHeader(out); + } + writeFileFooter(std::move(row_groups_complete), schema, options, out); + } + else + { + if (!file_writer) + { + Block header = materializeBlock(getPort(PortKind::Main).getHeader()); + std::vector chunks; + chunks.push_back(Chunk(header.getColumns(), 0)); + writeRowGroup(std::move(chunks)); + } + + if (file_writer) + { + auto status = file_writer->Close(); + if (!status.ok()) + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while closing a table: {}", status.ToString()); + } + } +} + +void ParquetBlockOutputFormat::resetFormatterImpl() +{ + if (pool) + { + is_stopped = true; + pool->wait(); + is_stopped = false; + } + + background_exception = nullptr; + threads_running = 0; + task_queue.clear(); + row_groups.clear(); + file_writer.reset(); + row_groups_complete.clear(); + staging_chunks.clear(); + staging_rows = 0; + staging_bytes = 0; +} + +void ParquetBlockOutputFormat::onCancel() +{ + is_stopped = true; +} + +void ParquetBlockOutputFormat::writeRowGroup(std::vector chunks) +{ + if (pool) + writeRowGroupInParallel(std::move(chunks)); + else if (!format_settings.parquet.use_custom_encoder) + writeUsingArrow(std::move(chunks)); + else + { + Chunk concatenated = std::move(chunks[0]); + for (size_t i = 1; i < chunks.size(); ++i) + concatenated.append(chunks[i]); + chunks.clear(); + + writeRowGroupInOneThread(std::move(concatenated)); + } +} + +void ParquetBlockOutputFormat::writeUsingArrow(std::vector chunks) +{ + const size_t columns_num = chunks.at(0).getNumColumns(); std::shared_ptr arrow_table; if (!ch_column_to_arrow_column) @@ -85,7 +304,7 @@ void ParquetBlockOutputFormat::consumeStaged() format_settings.parquet.output_fixed_string_as_fixed_byte_array); } - ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, staging_chunks, columns_num); + ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, chunks, columns_num); if (!file_writer) { @@ -112,64 +331,234 @@ void ParquetBlockOutputFormat::consumeStaged() file_writer = std::move(result.ValueOrDie()); } - // TODO: calculate row_group_size depending on a number of rows and table size - - // allow slightly bigger than row_group_size to avoid a very small tail row group - auto status = file_writer->WriteTable(*arrow_table, std::max(format_settings.parquet.row_group_rows, staging_rows)); + auto status = file_writer->WriteTable(*arrow_table, INT64_MAX); if (!status.ok()) throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while writing a table: {}", status.ToString()); } -void ParquetBlockOutputFormat::consume(Chunk chunk) +void ParquetBlockOutputFormat::writeRowGroupInOneThread(Chunk chunk) { - /// Do something like SquashingTransform to produce big enough row groups. - /// Because the real SquashingTransform is only used for INSERT, not for SELECT ... INTO OUTFILE. - /// The latter doesn't even have a pipeline where a transform could be inserted, so it's more - /// convenient to do the squashing here. - staging_rows += chunk.getNumRows(); - staging_bytes += chunk.bytes(); - staging_chunks.push_back(std::move(chunk)); - chassert(staging_chunks.back().getNumColumns() == staging_chunks.front().getNumColumns()); - if (staging_rows < format_settings.parquet.row_group_rows && - staging_bytes < format_settings.parquet.row_group_bytes) - { + if (chunk.getNumRows() == 0) return; - } - else + + const Block & header = getPort(PortKind::Main).getHeader(); + Parquet::ColumnChunkWriteStates columns_to_write; + chassert(header.columns() == chunk.getNumColumns()); + for (size_t i = 0; i < header.columns(); ++i) + prepareColumnForWrite( + chunk.getColumns()[i], header.getByPosition(i).type, header.getByPosition(i).name, + options, &columns_to_write); + + if (row_groups_complete.empty()) { - consumeStaged(); - staging_chunks.clear(); - staging_rows = 0; - staging_bytes = 0; + base_offset = out.count(); + writeFileHeader(out); + } + + std::vector column_chunks; + for (auto & s : columns_to_write) + { + size_t offset = out.count() - base_offset; + writeColumnChunkBody(s, options, out); + auto c = finalizeColumnChunkAndWriteFooter(offset, std::move(s), options, out); + column_chunks.push_back(std::move(c)); + } + + auto r = makeRowGroup(std::move(column_chunks), chunk.getNumRows()); + row_groups_complete.push_back(std::move(r)); +} + +void ParquetBlockOutputFormat::writeRowGroupInParallel(std::vector chunks) +{ + std::unique_lock lock(mutex); + + const Block & header = getPort(PortKind::Main).getHeader(); + + RowGroupState & r = row_groups.emplace_back(); + r.column_chunks.resize(header.columns()); + r.tasks_in_flight = r.column_chunks.size(); + + std::vector columnses; + for (auto & chunk : chunks) + { + chassert(header.columns() == chunk.getNumColumns()); + r.num_rows += chunk.getNumRows(); + columnses.push_back(chunk.detachColumns()); + } + + for (size_t i = 0; i < header.columns(); ++i) + { + Task & t = task_queue.emplace_back(&r, i, this); + t.column_type = header.getByPosition(i).type; + t.column_name = header.getByPosition(i).name; + + /// Defer concatenating the columns to the threads. + size_t bytes = 0; + for (size_t j = 0; j < chunks.size(); ++j) + { + auto & col = columnses[j][i]; + bytes += col->allocatedBytes(); + t.column_pieces.push_back(std::move(col)); + } + t.mem.set(bytes); + } + + startMoreThreadsIfNeeded(lock); +} + +void ParquetBlockOutputFormat::reapCompletedRowGroups(std::unique_lock & lock) +{ + while (!row_groups.empty() && row_groups.front().tasks_in_flight == 0 && !is_stopped) + { + RowGroupState & r = row_groups.front(); + + /// Write to the file. + + lock.unlock(); + + if (row_groups_complete.empty()) + { + base_offset = out.count(); + writeFileHeader(out); + } + + std::vector metadata; + for (auto & cols : r.column_chunks) + { + for (ColumnChunk & col : cols) + { + size_t offset = out.count() - base_offset; + + out.write(col.serialized.data(), col.serialized.size()); + auto m = finalizeColumnChunkAndWriteFooter(offset, std::move(col.state), options, out); + + metadata.push_back(std::move(m)); + } + } + + row_groups_complete.push_back(makeRowGroup(std::move(metadata), r.num_rows)); + + lock.lock(); + + row_groups.pop_front(); } } -void ParquetBlockOutputFormat::finalizeImpl() +void ParquetBlockOutputFormat::startMoreThreadsIfNeeded(const std::unique_lock &) { - if (!file_writer && staging_chunks.empty()) + /// Speculate that all current are already working on tasks. + size_t to_add = std::min(task_queue.size(), format_settings.max_threads - threads_running); + for (size_t i = 0; i < to_add; ++i) { - Block header = materializeBlock(getPort(PortKind::Main).getHeader()); + auto job = [this, thread_group = CurrentThread::getGroup()]() + { + if (thread_group) + CurrentThread::attachToGroupIfDetached(thread_group); + SCOPE_EXIT_SAFE(if (thread_group) CurrentThread::detachFromGroupIfNotDetached();); - consume(Chunk(header.getColumns(), 0)); // this will make staging_chunks non-empty + try + { + setThreadName("ParquetEncoder"); + + threadFunction(); + } + catch (...) + { + std::lock_guard lock(mutex); + background_exception = std::current_exception(); + condvar.notify_all(); + --threads_running; + } + }; + + if (threads_running == 0) + { + /// First thread. We need it to succeed; otherwise we may get stuck. + pool->scheduleOrThrowOnError(job); + ++threads_running; + } + else + { + /// More threads. This may be called from inside the thread pool, so avoid waiting; + /// otherwise it may deadlock. + if (!pool->trySchedule(job)) + break; + } } - - if (!staging_chunks.empty()) - { - consumeStaged(); - staging_chunks.clear(); - staging_rows = 0; - staging_bytes = 0; - } - - auto status = file_writer->Close(); - if (!status.ok()) - throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while closing a table: {}", status.ToString()); } -void ParquetBlockOutputFormat::resetFormatterImpl() +void ParquetBlockOutputFormat::threadFunction() { - file_writer.reset(); + std::unique_lock lock(mutex); + + while (true) + { + if (task_queue.empty() || is_stopped) + { + /// The check and the decrement need to be in the same critical section, to make sure + /// we never get stuck with tasks but no threads. + --threads_running; + return; + } + + auto task = std::move(task_queue.front()); + task_queue.pop_front(); + + if (task.column_type) + { + lock.unlock(); + + IColumn::MutablePtr concatenated = IColumn::mutate(std::move(task.column_pieces[0])); + for (size_t i = 1; i < task.column_pieces.size(); ++i) + { + auto & c = task.column_pieces[i]; + concatenated->insertRangeFrom(*c, 0, c->size()); + c.reset(); + } + task.column_pieces.clear(); + + std::vector subcolumns; + prepareColumnForWrite( + std::move(concatenated), task.column_type, task.column_name, options, &subcolumns); + + lock.lock(); + + for (size_t i = 0; i < subcolumns.size(); ++i) + { + task.row_group->column_chunks[task.column_idx].emplace_back(this); + task.row_group->tasks_in_flight += 1; + + auto & t = task_queue.emplace_back(task.row_group, task.column_idx, this); + t.subcolumn_idx = i; + t.state = std::move(subcolumns[i]); + t.mem.set(t.state.allocatedBytes()); + } + + startMoreThreadsIfNeeded(lock); + } + else + { + lock.unlock(); + + PODArray serialized; + { + WriteBufferFromVector buf(serialized); + writeColumnChunkBody(task.state, options, buf); + } + + lock.lock(); + + auto & c = task.row_group->column_chunks[task.column_idx][task.subcolumn_idx]; + c.state = std::move(task.state); + c.serialized = std::move(serialized); + c.mem.set(c.serialized.size() + c.state.allocatedBytes()); + } + + --task.row_group->tasks_in_flight; + + condvar.notify_all(); + } } void registerOutputFormatParquet(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h index 482c778bc52..aededc39dc4 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h @@ -2,8 +2,11 @@ #include "config.h" #if USE_PARQUET -# include -# include + +#include +#include +#include +#include namespace arrow { @@ -28,25 +31,129 @@ class ParquetBlockOutputFormat : public IOutputFormat { public: ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); + ~ParquetBlockOutputFormat() override; String getName() const override { return "ParquetBlockOutputFormat"; } String getContentType() const override { return "application/octet-stream"; } private: - void consumeStaged(); + struct MemoryToken + { + ParquetBlockOutputFormat * parent; + size_t bytes = 0; + + explicit MemoryToken(ParquetBlockOutputFormat * p, size_t b = 0) : parent(p) + { + set(b); + } + + MemoryToken(MemoryToken && t) + : parent(std::exchange(t.parent, nullptr)), bytes(std::exchange(t.bytes, 0)) {} + + MemoryToken & operator=(MemoryToken && t) + { + parent = std::exchange(t.parent, nullptr); + bytes = std::exchange(t.bytes, 0); + return *this; + } + + ~MemoryToken() + { + set(0); + } + + void set(size_t new_size) + { + if (new_size == bytes) + return; + parent->bytes_in_flight += new_size - bytes; // overflow is fine + bytes = new_size; + } + }; + + struct ColumnChunk + { + Parquet::ColumnChunkWriteState state; + PODArray serialized; + + MemoryToken mem; + + ColumnChunk(ParquetBlockOutputFormat * p) : mem(p) {} + }; + + struct RowGroupState + { + size_t tasks_in_flight = 0; + std::vector> column_chunks; + size_t num_rows = 0; + }; + + struct Task + { + RowGroupState * row_group; + size_t column_idx; + size_t subcolumn_idx = 0; + + MemoryToken mem; + + /// If not null, we need to call prepareColumnForWrite(). + /// Otherwise we need to call writeColumnChunkBody(). + DataTypePtr column_type; + std::string column_name; + std::vector column_pieces; + + Parquet::ColumnChunkWriteState state; + + Task(RowGroupState * rg, size_t ci, ParquetBlockOutputFormat * p) + : row_group(rg), column_idx(ci), mem(p) {} + }; + void consume(Chunk) override; void finalizeImpl() override; void resetFormatterImpl() override; + void onCancel() override; + void writeRowGroup(std::vector chunks); + void writeUsingArrow(std::vector chunks); + void writeRowGroupInOneThread(Chunk chunk); + void writeRowGroupInParallel(std::vector chunks); + + void threadFunction(); + void startMoreThreadsIfNeeded(const std::unique_lock & lock); + + /// Called in single-threaded fashion. Writes to the file. + void reapCompletedRowGroups(std::unique_lock & lock); + + const FormatSettings format_settings; + + /// Chunks to squash together to form a row group. std::vector staging_chunks; size_t staging_rows = 0; size_t staging_bytes = 0; - const FormatSettings format_settings; - std::unique_ptr file_writer; std::unique_ptr ch_column_to_arrow_column; + + Parquet::WriteOptions options; + Parquet::SchemaElements schema; + std::vector row_groups_complete; + size_t base_offset = 0; + + + std::mutex mutex; + std::condition_variable condvar; // wakes up consume() + std::unique_ptr pool; + + std::atomic_bool is_stopped{false}; + std::exception_ptr background_exception = nullptr; + + /// Invariant: if there's at least one task then there's at least one thread. + size_t threads_running = 0; + std::atomic bytes_in_flight{0}; + + std::deque task_queue; + std::deque row_groups; }; } diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp index 229a0630328..043e6d2260c 100644 --- a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp @@ -180,7 +180,7 @@ Chunk ParquetMetadataInputFormat::generate() else if (name == names[3]) { auto column = types[3]->createColumn(); - /// Version сan be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6). + /// Version can be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6). String version = metadata->version() == parquet::ParquetVersion::PARQUET_1_0 ? "1.0" : "2.6"; assert_cast(*column).insertData(version.data(), version.size()); res.addColumn(std::move(column)); @@ -504,7 +504,6 @@ void registerInputFormatParquetMetadata(FormatFactory & factory) { return std::make_shared(buf, sample, settings); }); - factory.markFormatSupportsSubcolumns("ParquetMetadata"); factory.markFormatSupportsSubsetOfColumns("ParquetMetadata"); } diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp index 6098923a195..4a5ce2705e7 100644 --- a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp @@ -14,7 +14,7 @@ ProtobufListInputFormat::ProtobufListInputFormat( ReadBuffer & in_, const Block & header_, const Params & params_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool flatten_google_wrappers_) : IRowInputFormat(header_, in_, params_) , reader(std::make_unique(in_)) @@ -22,7 +22,7 @@ ProtobufListInputFormat::ProtobufListInputFormat( header_.getNames(), header_.getDataTypes(), missing_column_indices, - *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::Yes), + *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes), /* with_length_delimiter = */ true, /* with_envelope = */ true, flatten_google_wrappers_, @@ -84,7 +84,7 @@ void registerInputFormatProtobufList(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings, "Protobuf", true), settings.protobuf.input_flatten_google_wrappers); + ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema), settings.protobuf.input_flatten_google_wrappers); }); factory.markFormatSupportsSubsetOfColumns("ProtobufList"); factory.registerAdditionalInfoForSchemaCacheGetter( diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.h b/src/Processors/Formats/Impl/ProtobufListInputFormat.h index ba2e8014878..09eabb0ee59 100644 --- a/src/Processors/Formats/Impl/ProtobufListInputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.h @@ -28,7 +28,7 @@ public: ReadBuffer & in_, const Block & header_, const Params & params_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool flatten_google_wrappers_); String getName() const override { return "ProtobufListInputFormat"; } diff --git a/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp index 815b9ebb61d..ae0b9db7357 100644 --- a/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp @@ -2,7 +2,6 @@ #if USE_PROTOBUF # include -# include # include # include # include @@ -13,14 +12,14 @@ namespace DB ProtobufListOutputFormat::ProtobufListOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool defaults_for_nullable_google_wrappers_) : IRowOutputFormat(header_, out_) , writer(std::make_unique(out)) , serializer(ProtobufSerializer::create( header_.getNames(), header_.getDataTypes(), - *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::Yes), + *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes), /* with_length_delimiter = */ true, /* with_envelope = */ true, defaults_for_nullable_google_wrappers_, @@ -55,7 +54,7 @@ void registerOutputFormatProtobufList(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared( - buf, header, FormatSchemaInfo(settings, "Protobuf", true), + buf, header, ProtobufSchemaInfo(settings, "Protobuf", header, settings.protobuf.use_autogenerated_schema), settings.protobuf.output_nullables_with_google_wrappers); }); } diff --git a/src/Processors/Formats/Impl/ProtobufListOutputFormat.h b/src/Processors/Formats/Impl/ProtobufListOutputFormat.h index d85018c0351..e7765590d51 100644 --- a/src/Processors/Formats/Impl/ProtobufListOutputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufListOutputFormat.h @@ -4,10 +4,10 @@ #if USE_PROTOBUF # include +# include namespace DB { -class FormatSchemaInfo; class ProtobufWriter; class ProtobufSerializer; @@ -26,7 +26,7 @@ public: ProtobufListOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool defaults_for_nullable_google_wrappers_); String getName() const override { return "ProtobufListOutputFormat"; } diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index 126f3673571..771a0790527 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -11,9 +11,9 @@ namespace DB { ProtobufRowInputFormat::ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, - const FormatSchemaInfo & schema_info_, bool with_length_delimiter_, bool flatten_google_wrappers_) + const ProtobufSchemaInfo & schema_info_, bool with_length_delimiter_, bool flatten_google_wrappers_) : IRowInputFormat(header_, in_, params_) - , message_descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::No)) + , message_descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No)) , with_length_delimiter(with_length_delimiter_) , flatten_google_wrappers(flatten_google_wrappers_) { @@ -89,7 +89,7 @@ void registerInputFormatProtobuf(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings, "Protobuf", true), + ProtobufSchemaInfo(settings, "Protobuf", sample, settings.protobuf.use_autogenerated_schema), with_length_delimiter, settings.protobuf.input_flatten_google_wrappers); }); diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h index 5c042f7c5ab..9d7cecd7146 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h @@ -33,7 +33,7 @@ public: ReadBuffer & in_, const Block & header_, const Params & params_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, bool with_length_delimiter_, bool flatten_google_wrappers_); diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp index 129c9ca3156..7b4cc1bf0be 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp @@ -3,7 +3,6 @@ #if USE_PROTOBUF # include # include -# include # include # include # include @@ -20,7 +19,7 @@ namespace ErrorCodes ProtobufRowOutputFormat::ProtobufRowOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, const FormatSettings & settings_, bool with_length_delimiter_) : IRowOutputFormat(header_, out_) @@ -28,7 +27,7 @@ ProtobufRowOutputFormat::ProtobufRowOutputFormat( , serializer(ProtobufSerializer::create( header_.getNames(), header_.getDataTypes(), - *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_, ProtobufSchemas::WithEnvelope::No), + *ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No), with_length_delimiter_, /* with_envelope = */ false, settings_.protobuf.output_nullables_with_google_wrappers, @@ -61,7 +60,7 @@ void registerOutputFormatProtobuf(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared( - buf, header, FormatSchemaInfo(settings, "Protobuf", true), + buf, header, ProtobufSchemaInfo(settings, "Protobuf", header, settings.protobuf.use_autogenerated_schema), settings, with_length_delimiter); }); } diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h index f6ff5bae999..213e1c785fd 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h @@ -4,11 +4,11 @@ #if USE_PROTOBUF # include +# include namespace DB { class DB; -class FormatSchemaInfo; class ProtobufSerializer; class ProtobufWriter; class WriteBuffer; @@ -30,7 +30,7 @@ public: ProtobufRowOutputFormat( WriteBuffer & out_, const Block & header_, - const FormatSchemaInfo & schema_info_, + const ProtobufSchemaInfo & schema_info_, const FormatSettings & settings_, bool with_length_delimiter_); diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index d61e723fd75..3a65a6fe4ea 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -474,6 +475,10 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx context, &found_in_cache, delimiter); + + LOG_TEST(&Poco::Logger::get("ValuesBlockInputFormat"), "Will use an expression template to parse column {}: {}", + column_idx, structure->dumpTemplate()); + templates[column_idx].emplace(structure); if (found_in_cache) ++attempts_to_deduce_template_cached[column_idx]; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index d540a24fa70..8f8d44ec088 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -96,7 +96,7 @@ private: Serializations serializations; BlockMissingValues block_missing_values; - size_t approx_bytes_read_for_chunk; + size_t approx_bytes_read_for_chunk = 0; }; class ValuesSchemaReader : public IRowSchemaReader diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index eaedbbb4a1e..fb49779e0af 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -227,7 +227,30 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE format_reader->skipField(file_column); if (!is_last_file_column) + { + if (format_reader->allowVariableNumberOfColumns() && format_reader->checkForEndOfRow()) + { + ++file_column; + while (file_column < column_mapping->column_indexes_for_input_fields.size()) + { + const auto & rem_column_index = column_mapping->column_indexes_for_input_fields[file_column]; + columns[*rem_column_index]->insertDefault(); + ++file_column; + } + } + else + format_reader->skipFieldDelimiter(); + } + } + + if (format_reader->allowVariableNumberOfColumns() && !format_reader->checkForEndOfRow()) + { + do + { format_reader->skipFieldDelimiter(); + format_reader->skipField(1); + } + while (!format_reader->checkForEndOfRow()); } format_reader->skipRowEndDelimiter(); diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index 5648acd392d..b5103d3db39 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -119,6 +119,10 @@ public: /// Check suffix. virtual bool checkForSuffix() { return in->eof(); } + virtual bool checkForEndOfRow() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method checkForEndOfRow is not implemented"); } + + virtual bool allowVariableNumberOfColumns() { return false; } + const FormatSettings & getFormatSettings() const { return format_settings; } virtual void setReadBuffer(ReadBuffer & in_) { in = &in_; } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 00d5b2ee089..ea3c3c2c1b0 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 34322acb2af..c6bef186877 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -343,6 +343,7 @@ public: uint64_t read_rows = 0; uint64_t read_bytes = 0; uint64_t total_rows_approx = 0; + uint64_t total_bytes = 0; }; struct ReadProgress diff --git a/src/Processors/ISource.h b/src/Processors/ISource.h index 292f79ba348..2593a241c63 100644 --- a/src/Processors/ISource.h +++ b/src/Processors/ISource.h @@ -43,6 +43,7 @@ public: std::optional getReadProgress() final; void addTotalRowsApprox(size_t value) { read_progress.total_rows_approx += value; } + void addTotalBytes(size_t value) { read_progress.total_bytes += value; } }; using SourcePtr = std::shared_ptr; diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index 418bf5e3f13..a3ff7a4ef87 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -163,12 +163,8 @@ static bool compareRetentions(const Retention & a, const Retention & b) { return false; } - String error_msg = "age and precision should only grow up: " - + std::to_string(a.age) + ":" + std::to_string(a.precision) + " vs " - + std::to_string(b.age) + ":" + std::to_string(b.precision); - throw Exception::createDeprecated( - error_msg, - DB::ErrorCodes::BAD_ARGUMENTS); + throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Age and precision should only grow up: {}:{} vs {}:{}", + a.age, a.precision, b.age, b.precision); } bool operator==(const Retention & a, const Retention & b) diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp index c54d32c1385..c5b5dad5aa5 100644 --- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp +++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp @@ -141,7 +141,7 @@ void CreateSetAndFilterOnTheFlyStep::transformPipeline(QueryPipelineBuilder & pi /// Add balancing transform auto idx = position == JoinTableSide::Left ? PingPongProcessor::First : PingPongProcessor::Second; auto stream_balancer = std::make_shared(input_header, num_ports, max_rows_in_set, idx); - stream_balancer->setDescription(getStepDescription()); + stream_balancer->setDescription("Reads rows from two streams evenly"); /// Regular inputs just bypass data for respective ports connectAllInputs(ports, stream_balancer->getInputs(), num_ports); @@ -163,7 +163,7 @@ void CreateSetAndFilterOnTheFlyStep::transformPipeline(QueryPipelineBuilder & pi { auto & port = *output_it++; auto transform = std::make_shared(port.getHeader(), column_names, filtering_set); - transform->setDescription(this->getStepDescription()); + transform->setDescription("Filter rows using other join table side's set"); connect(port, transform->getInputPort()); result_transforms.emplace_back(std::move(transform)); } diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index 9b9cc221ca8..2ff50ca4fe3 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -72,14 +72,10 @@ std::unique_ptr createLocalPlan( if (coordinator) { new_context->parallel_reading_coordinator = coordinator; - new_context->getClientInfo().interface = ClientInfo::Interface::LOCAL; - new_context->getClientInfo().collaborate_with_initiator = true; - new_context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; - new_context->getClientInfo().count_participating_replicas = replica_count; - new_context->getClientInfo().number_of_current_replica = replica_num; - new_context->getClientInfo().connection_client_version_major = DBMS_VERSION_MAJOR; - new_context->getClientInfo().connection_client_version_minor = DBMS_VERSION_MINOR; - new_context->getClientInfo().connection_tcp_protocol_version = DBMS_TCP_PROTOCOL_VERSION; + new_context->setClientInterface(ClientInfo::Interface::LOCAL); + new_context->setQueryKind(ClientInfo::QueryKind::SECONDARY_QUERY); + new_context->setReplicaInfo(true, replica_count, replica_num); + new_context->setConnectionClientVersion(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH, DBMS_TCP_PROTOCOL_VERSION); new_context->setParallelReplicasGroupUUID(group_uuid); new_context->setMergeTreeAllRangesCallback([coordinator](InitialAllRangesAnnouncement announcement) { diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index afdff44020f..b132d27670d 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -30,7 +30,7 @@ static Block checkHeaders(const DataStreams & input_streams_) } IntersectOrExceptStep::IntersectOrExceptStep( - DataStreams input_streams_ , Operator operator_ , size_t max_threads_) + DataStreams input_streams_, Operator operator_, size_t max_threads_) : header(checkHeaders(input_streams_)) , current_operator(operator_) , max_threads(max_threads_) diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 2ff8f161e99..63a5eeb51d2 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -2,6 +2,9 @@ #include #include #include +#include +#include +#include #include namespace DB @@ -54,7 +57,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines bool JoinStep::allowPushDownToRight() const { - return join->pipelineType() == JoinPipelineType::YShaped; + return join->pipelineType() == JoinPipelineType::YShaped || join->pipelineType() == JoinPipelineType::FillRightFirst; } void JoinStep::describePipeline(FormatSettings & settings) const @@ -62,6 +65,36 @@ void JoinStep::describePipeline(FormatSettings & settings) const IQueryPlanStep::describePipeline(processors, settings); } +void JoinStep::describeActions(FormatSettings & settings) const +{ + String prefix(settings.offset, ' '); + + const auto & table_join = join->getTableJoin(); + settings.out << prefix << "Type: " << toString(table_join.kind()) << '\n'; + settings.out << prefix << "Strictness: " << toString(table_join.strictness()) << '\n'; + settings.out << prefix << "Algorithm: " << join->getName() << '\n'; + + if (table_join.strictness() == JoinStrictness::Asof) + settings.out << prefix << "ASOF inequality: " << toString(table_join.getAsofInequality()) << '\n'; + + if (!table_join.getClauses().empty()) + settings.out << prefix << "Clauses: " << table_join.formatClauses(table_join.getClauses(), true /*short_format*/) << '\n'; +} + +void JoinStep::describeActions(JSONBuilder::JSONMap & map) const +{ + const auto & table_join = join->getTableJoin(); + map.add("Type", toString(table_join.kind())); + map.add("Strictness", toString(table_join.strictness())); + map.add("Algorithm", join->getName()); + + if (table_join.strictness() == JoinStrictness::Asof) + map.add("ASOF inequality", toString(table_join.getAsofInequality())); + + if (!table_join.getClauses().empty()) + map.add("Clauses", table_join.formatClauses(table_join.getClauses(), true /*short_format*/)); +} + void JoinStep::updateInputStream(const DataStream & new_input_stream_, size_t idx) { if (idx == 0) diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h index e7185f36588..369ee9bec8b 100644 --- a/src/Processors/QueryPlan/JoinStep.h +++ b/src/Processors/QueryPlan/JoinStep.h @@ -27,6 +27,9 @@ public: void describePipeline(FormatSettings & settings) const override; + void describeActions(JSONBuilder::JSONMap & map) const override; + void describeActions(FormatSettings & settings) const override; + const JoinPtr & getJoin() const { return join; } bool allowPushDownToRight() const; diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index bc47413cbb5..2230e50425c 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -16,7 +16,7 @@ void optimizeTreeFirstPass(const QueryPlanOptimizationSettings & settings, Query void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes); /// Third pass is used to apply filters such as key conditions and skip indexes to the storages that support them. /// After that it add CreateSetsStep for the subqueries that has not be used in the filters. -void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes); +void optimizeTreeThirdPass(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes); /// Optimization (first pass) is a function applied to QueryPlan::Node. /// It can read and update subtree of specified node. @@ -111,9 +111,9 @@ void optimizePrimaryKeyCondition(const Stack & stack); void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes); void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes); void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &); -bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes); +bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections); bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes); -bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes); +bool addPlansForSets(QueryPlan & plan, QueryPlan::Node & node, QueryPlan::Nodes & nodes); /// Enable memory bound merging of aggregation states for remote queries /// in case it was enabled for local plan diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp index 21c7d362b17..e011fb8ecbe 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp @@ -19,6 +19,7 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const settings.remove_redundant_distinct = from.query_plan_remove_redundant_distinct; settings.optimize_projection = from.optimize_use_projections && from.query_plan_optimize_projection; settings.force_use_projection = settings.optimize_projection && from.force_optimize_projection; + settings.optimize_use_implicit_projections = settings.optimize_projection && from.optimize_use_implicit_projections; return settings; } diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h index 967cfdaca7f..d98c34ce226 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -41,6 +41,7 @@ struct QueryPlanOptimizationSettings /// If reading from projection can be applied bool optimize_projection = false; bool force_use_projection = false; + bool optimize_use_implicit_projections = false; static QueryPlanOptimizationSettings fromSettings(const Settings & from); static QueryPlanOptimizationSettings fromContext(ContextPtr from); diff --git a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp index c9cf46aaeca..787a106200a 100644 --- a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp +++ b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.cpp @@ -8,7 +8,7 @@ namespace DB { -MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag) +MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag, bool check_monotonicity) { using Parents = std::set; std::unordered_map inner_parents; @@ -75,7 +75,12 @@ MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG } /// A node from found match may be nullptr. /// It means that node is visited, but no match was found. - frame.mapped_children.push_back(it->second.node); + if (it->second.monotonicity) + /// Ignore a match with monotonicity. + frame.mapped_children.push_back(nullptr); + else + frame.mapped_children.push_back(it->second.node); + } if (frame.mapped_children.size() < frame.node->children.size()) @@ -182,7 +187,7 @@ MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG } } - if (!match.node && frame.node->function_base->hasInformationAboutMonotonicity()) + if (!match.node && check_monotonicity && frame.node->function_base->hasInformationAboutMonotonicity()) { size_t num_const_args = 0; const ActionsDAG::Node * monotonic_child = nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.h b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.h index dd689cba46b..223fc40e33f 100644 --- a/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.h +++ b/src/Processors/QueryPlan/Optimizations/actionsDAGUtils.h @@ -39,5 +39,5 @@ struct MatchedTrees using Matches = std::unordered_map; }; -MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag); +MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag, bool check_monotonicity = true); } diff --git a/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp b/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp index e9100ae9d02..47df05301c9 100644 --- a/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp +++ b/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp @@ -6,7 +6,7 @@ namespace DB::QueryPlanOptimizations { -bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes) +bool addPlansForSets(QueryPlan & root_plan, QueryPlan::Node & node, QueryPlan::Nodes & nodes) { auto * delayed = typeid_cast(node.step.get()); if (!delayed) @@ -23,7 +23,9 @@ bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes) { input_streams.push_back(plan->getCurrentDataStream()); node.children.push_back(plan->getRootNode()); - nodes.splice(nodes.end(), QueryPlan::detachNodes(std::move(*plan))); + auto [add_nodes, add_resources] = QueryPlan::detachNodesAndResources(std::move(*plan)); + nodes.splice(nodes.end(), std::move(add_nodes)); + root_plan.addResources(std::move(add_resources)); } auto creating_sets = std::make_unique(std::move(input_streams)); diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 4336de41b7b..af47b6ff4cd 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -341,6 +341,10 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (table_join.kind() != JoinKind::Inner && table_join.kind() != JoinKind::Cross && table_join.kind() != kind) return 0; + /// There is no ASOF Right join, so we're talking about pushing to the right side + if (kind == JoinKind::Right && table_join.strictness() == JoinStrictness::Asof) + return 0; + bool is_left = kind == JoinKind::Left; const auto & input_header = is_left ? child->getInputStreams().front().header : child->getInputStreams().back().header; const auto & res_header = child->getOutputStream().header; diff --git a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp index c3b03a5385f..34a1fc2bb88 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -28,6 +29,20 @@ const DB::DataStream & getChildOutputStream(DB::QueryPlan::Node & node) namespace DB::QueryPlanOptimizations { +/// This is a check that output columns does not have the same name +/// This is ok for DAG, but may introduce a bug in a SotringStep cause columns are selected by name. +static bool areOutputsConvertableToBlock(const ActionsDAG::NodeRawConstPtrs & outputs) +{ + std::unordered_set names; + for (const auto & output : outputs) + { + if (!names.emplace(output->result_name).second) + return false; + } + + return true; +} + size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) { if (parent_node->children.size() != 1) @@ -57,6 +72,9 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan: if (unneeded_for_sorting->trivial()) return 0; + if (!areOutputsConvertableToBlock(needed_for_sorting->getOutputs())) + return 0; + // Sorting (parent_node) -> Expression (child_node) auto & node_with_needed = nodes.emplace_back(); std::swap(node_with_needed.children, child_node->children); diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp index ca8a412bf2e..3352567943a 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp @@ -138,8 +138,11 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes) if (table_expression_modifiers && table_expression_modifiers->hasSampleSizeRatio()) { const auto & sampling_key = storage_snapshot->getMetadataForQuery()->getSamplingKey(); - const auto & sampling_columns = sampling_key.sample_block.getColumnsWithTypeAndName(); - required_columns_after_filter.insert(required_columns_after_filter.end(), sampling_columns.begin(), sampling_columns.end()); + const auto & sampling_source_columns = sampling_key.expression->getRequiredColumnsWithTypes(); + for (const auto & column : sampling_source_columns) + required_columns_after_filter.push_back(ColumnWithTypeAndName(column.type, column.name)); + const auto & sampling_result_columns = sampling_key.sample_block.getColumnsWithTypeAndName(); + required_columns_after_filter.insert(required_columns_after_filter.end(), sampling_result_columns.begin(), sampling_result_columns.end()); } const auto & storage = storage_snapshot->storage; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index e788918703e..0caedff67a5 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -114,6 +114,10 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s while (!stack.empty()) { + /// NOTE: optimizePrewhere can modify the stack. + optimizePrewhere(stack, nodes); + optimizePrimaryKeyCondition(stack); + { /// NOTE: frame cannot be safely used after stack was modified. auto & frame = stack.back(); @@ -125,8 +129,10 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (optimization_settings.read_in_order) optimizeReadInOrder(*frame.node, nodes); + /// Projection optimization relies on PK optimization if (optimization_settings.optimize_projection) - num_applied_projection += optimizeUseAggregateProjections(*frame.node, nodes); + num_applied_projection + += optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections); if (optimization_settings.aggregation_in_order) optimizeAggregationInOrder(*frame.node, nodes); @@ -147,6 +153,7 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s if (optimization_settings.optimize_projection) { + /// Projection optimization relies on PK optimization if (optimizeUseNormalProjections(stack, nodes)) { ++num_applied_projection; @@ -163,9 +170,6 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s } } - /// NOTE: optimizePrewhere can modify the stack. - optimizePrewhere(stack, nodes); - optimizePrimaryKeyCondition(stack); enableMemoryBoundMerging(*stack.back().node, nodes); stack.pop_back(); @@ -177,7 +181,7 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s "No projection is used when optimize_use_projections = 1 and force_optimize_projection = 1"); } -void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes) +void optimizeTreeThirdPass(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes) { Stack stack; stack.push_back({.node = &root}); @@ -201,7 +205,7 @@ void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes) source_step_with_filter->applyFilters(); } - addPlansForSets(*frame.node, nodes); + addPlansForSets(plan, *frame.node, nodes); stack.pop_back(); } diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 2959178b2e5..eab4d3f5d43 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -69,7 +69,7 @@ static AggregateProjectionInfo getAggregatingProjectionInfo( projection.query_ast, context, Pipe(std::make_shared(metadata_snapshot->getSampleBlock())), - SelectQueryOptions{QueryProcessingStage::WithMergeableState}.ignoreASTOptimizations()); + SelectQueryOptions{QueryProcessingStage::WithMergeableState}.ignoreASTOptimizations().ignoreSettingConstraints()); const auto & analysis_result = interpreter.getAnalysisResult(); const auto & query_analyzer = interpreter.getQueryAnalyzer(); @@ -92,18 +92,6 @@ static AggregateProjectionInfo getAggregatingProjectionInfo( return info; } -static bool hasNullableOrMissingColumn(const DAGIndex & index, const Names & names) -{ - for (const auto & query_name : names) - { - auto jt = index.find(query_name); - if (jt == index.end() || jt->second->result_type->isNullable()) - return true; - } - - return false; -} - struct AggregateFunctionMatch { const AggregateDescription * description = nullptr; @@ -170,20 +158,14 @@ std::optional matchAggregateFunctions( } /// This is a special case for the function count(). - /// We can assume that 'count(expr) == count()' if expr is not nullable. - if (typeid_cast(candidate.function.get())) + /// We can assume that 'count(expr) == count()' if expr is not nullable, + /// which can be verified by simply casting to `AggregateFunctionCount *`. + if (typeid_cast(aggregate.function.get())) { - bool has_nullable_or_missing_arg = false; - has_nullable_or_missing_arg |= hasNullableOrMissingColumn(query_index, aggregate.argument_names); - has_nullable_or_missing_arg |= hasNullableOrMissingColumn(proj_index, candidate.argument_names); - - if (!has_nullable_or_missing_arg) - { - /// we can ignore arguments for count() - found_match = true; - res.push_back({&candidate, DataTypes()}); - break; - } + /// we can ignore arguments for count() + found_match = true; + res.push_back({&candidate, DataTypes()}); + break; } /// Now, function names and types matched. @@ -287,7 +269,7 @@ ActionsDAGPtr analyzeAggregateProjection( { auto proj_index = buildDAGIndex(*info.before_aggregation); - MatchedTrees::Matches matches = matchTrees(*info.before_aggregation, *query.dag); + MatchedTrees::Matches matches = matchTrees(*info.before_aggregation, *query.dag, false /* check_monotonicity */); // for (const auto & [node, match] : matches) // { @@ -433,7 +415,8 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( QueryPlan::Node & node, AggregatingStep & aggregating, ReadFromMergeTree & reading, - const std::shared_ptr & max_added_blocks) + const std::shared_ptr & max_added_blocks, + bool allow_implicit_projections) { const auto & keys = aggregating.getParams().keys; const auto & aggregates = aggregating.getParams().aggregates; @@ -453,7 +436,8 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( if (projection.type == ProjectionDescription::Type::Aggregate) agg_projections.push_back(&projection); - bool can_use_minmax_projection = metadata->minmax_count_projection && !reading.getMergeTreeData().has_lightweight_delete_parts.load(); + bool can_use_minmax_projection = allow_implicit_projections && metadata->minmax_count_projection + && !reading.getMergeTreeData().has_lightweight_delete_parts.load(); if (!can_use_minmax_projection && agg_projections.empty()) return candidates; @@ -495,6 +479,9 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection sample block 2 {}", block.dumpStructure()); + // minmax_count_projection cannot be used used when there is no data to process, because + // it will produce incorrect result during constant aggregation. + // See https://github.com/ClickHouse/ClickHouse/issues/36728 if (block) { MinMaxProjectionCandidate minmax; @@ -543,7 +530,7 @@ static QueryPlan::Node * findReadingStep(QueryPlan::Node & node) return nullptr; } -bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes) +bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections) { if (node.children.size() != 1) return false; @@ -568,7 +555,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & std::shared_ptr max_added_blocks = getMaxAddedBlocks(reading); - auto candidates = getAggregateProjectionCandidates(node, *aggregating, *reading, max_added_blocks); + auto candidates = getAggregateProjectionCandidates(node, *aggregating, *reading, max_added_blocks, allow_implicit_projections); AggregateProjectionCandidate * best_candidate = nullptr; if (candidates.minmax_projection) @@ -623,8 +610,16 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & // candidates.minmax_projection->block.dumpStructure()); Pipe pipe(std::make_shared(std::move(candidates.minmax_projection->block))); - projection_reading = std::make_unique(std::move(pipe)); - + projection_reading = std::make_unique( + std::move(pipe), + context, + query_info.is_internal + ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = candidates.minmax_projection->candidate.projection->name, + }); has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty(); if (has_ordinary_parts) reading->resetParts(std::move(candidates.minmax_projection->normal_parts)); @@ -656,7 +651,16 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & { auto header = proj_snapshot->getSampleBlockForColumns(best_candidate->dag->getRequiredColumnsNames()); Pipe pipe(std::make_shared(std::move(header))); - projection_reading = std::make_unique(std::move(pipe)); + projection_reading = std::make_unique( + std::move(pipe), + context, + query_info.is_internal + ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = best_candidate->projection->name, + }); } has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index dd7a5d449bc..727afcb1a99 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -92,6 +92,10 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) break; } + /// Dangling query plan node. This might be generated by StorageMerge. + if (iter->node->step.get() == reading) + return false; + const auto metadata = reading->getStorageMetadata(); const auto & projections = metadata->projections; @@ -105,8 +109,8 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) QueryDAG query; { - auto & clild = iter->node->children[iter->next_child - 1]; - if (!query.build(*clild)) + auto & child = iter->node->children[iter->next_child - 1]; + if (!query.build(*child)) return false; if (query.dag) @@ -183,7 +187,16 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) if (!projection_reading) { Pipe pipe(std::make_shared(proj_snapshot->getSampleBlockForColumns(required_columns))); - projection_reading = std::make_unique(std::move(pipe)); + projection_reading = std::make_unique( + std::move(pipe), + context, + query_info.is_internal + ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = best_candidate->projection->name, + }); } bool has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp index cb76ffa84ba..7ddda29cad4 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp @@ -131,7 +131,8 @@ bool QueryDAG::buildImpl(QueryPlan::Node & node, ActionsDAG::NodeRawConstPtrs & if (prewhere_info->prewhere_actions) { appendExpression(prewhere_info->prewhere_actions); - if (const auto * filter_expression = findInOutputs(*dag, prewhere_info->prewhere_column_name, prewhere_info->remove_prewhere_column)) + if (const auto * filter_expression + = findInOutputs(*dag, prewhere_info->prewhere_column_name, prewhere_info->remove_prewhere_column)) filter_nodes.push_back(filter_expression); else return false; diff --git a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h index 1e9ab67c8fe..35daccad115 100644 --- a/src/Processors/QueryPlan/Optimizations/projectionsCommon.h +++ b/src/Processors/QueryPlan/Optimizations/projectionsCommon.h @@ -38,7 +38,6 @@ std::shared_ptr getMaxAddedBlocks(ReadFromMergeTree * rea /// This is a common DAG which is a merge of DAGs from Filter and Expression steps chain. /// Additionally, for all the Filter steps, we collect filter conditions into filter_nodes. -/// Flag remove_last_filter_node is set in case if the last step is a Filter step and it should remove filter column. struct QueryDAG { ActionsDAGPtr dag; diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 533fbde1e13..61c6422de5a 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -254,6 +254,32 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +static void reorderColumns(ActionsDAG & dag, const Block & header, const std::string & filter_column) +{ + std::unordered_map inputs_map; + for (const auto * input : dag.getInputs()) + inputs_map[input->result_name] = input; + + for (const auto & col : header) + { + auto & input = inputs_map[col.name]; + if (!input) + input = &dag.addInput(col); + } + + ActionsDAG::NodeRawConstPtrs new_outputs; + new_outputs.reserve(header.columns() + 1); + + new_outputs.push_back(&dag.findInOutputs(filter_column)); + for (const auto & col : header) + { + auto & input = inputs_map[col.name]; + new_outputs.push_back(input); + } + + dag.getOutputs() = std::move(new_outputs); +} + Pipes buildPipesForReadingByPKRanges( const KeyDescription & primary_key, ExpressionActionsPtr sorting_expr, @@ -279,6 +305,7 @@ Pipes buildPipesForReadingByPKRanges( continue; auto syntax_result = TreeRewriter(context).analyze(filter_function, primary_key.expression->getRequiredColumnsWithTypes()); auto actions = ExpressionAnalyzer(filter_function, syntax_result, context).getActionsDAG(false); + reorderColumns(*actions, pipes[i].getHeader(), filter_function->getColumnName()); ExpressionActionsPtr expression_actions = std::make_shared(std::move(actions)); auto description = fmt::format( "filter values in [{}, {})", i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf"); diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 687260441ff..ceda9f97bab 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -482,7 +482,7 @@ void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_sett QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes); QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes); - QueryPlanOptimizations::optimizeTreeThirdPass(*root, nodes); + QueryPlanOptimizations::optimizeTreeThirdPass(*this, *root, nodes); updateDataStreams(*root); } @@ -542,9 +542,9 @@ void QueryPlan::explainEstimate(MutableColumns & columns) } } -QueryPlan::Nodes QueryPlan::detachNodes(QueryPlan && plan) +std::pair QueryPlan::detachNodesAndResources(QueryPlan && plan) { - return std::move(plan.nodes); + return {std::move(plan.nodes), std::move(plan.resources)}; } } diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index d89bdc534be..f4a6c9097f2 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -108,7 +108,7 @@ public: using Nodes = std::list; Node * getRootNode() const { return root; } - static Nodes detachNodes(QueryPlan && plan); + static std::pair detachNodesAndResources(QueryPlan && plan); private: QueryPlanResourceHolder resources; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index dc9ce5f0391..0b5eb94dbac 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -137,6 +137,69 @@ static bool checkAllPartsOnRemoteFS(const RangesInDataParts & parts) return true; } +/// build sort description for output stream +static void updateSortDescriptionForOutputStream( + DataStream & output_stream, const Names & sorting_key_columns, const int sort_direction, InputOrderInfoPtr input_order_info, PrewhereInfoPtr prewhere_info) +{ + /// Updating sort description can be done after PREWHERE actions are applied to the header. + /// Aftert PREWHERE actions are applied, column names in header can differ from storage column names due to aliases + /// To mitigate it, we're trying to build original header and use it to deduce sorting description + /// TODO: this approach is fragile, it'd be more robust to update sorting description for the whole plan during plan optimization + Block original_header = output_stream.header.cloneEmpty(); + if (prewhere_info) + { + if (prewhere_info->prewhere_actions) + { + FindOriginalNodeForOutputName original_column_finder(prewhere_info->prewhere_actions); + for (auto & column : original_header) + { + const auto * original_node = original_column_finder.find(column.name); + if (original_node) + column.name = original_node->result_name; + } + } + + if (prewhere_info->row_level_filter) + { + FindOriginalNodeForOutputName original_column_finder(prewhere_info->row_level_filter); + for (auto & column : original_header) + { + const auto * original_node = original_column_finder.find(column.name); + if (original_node) + column.name = original_node->result_name; + } + } + } + + SortDescription sort_description; + const Block & header = output_stream.header; + for (const auto & sorting_key : sorting_key_columns) + { + const auto it = std::find_if( + original_header.begin(), original_header.end(), [&sorting_key](const auto & column) { return column.name == sorting_key; }); + if (it == original_header.end()) + break; + + const size_t column_pos = std::distance(original_header.begin(), it); + sort_description.emplace_back((header.begin() + column_pos)->name, sort_direction); + } + + if (!sort_description.empty()) + { + if (input_order_info) + { + output_stream.sort_scope = DataStream::SortScope::Stream; + const size_t used_prefix_of_sorting_key_size = input_order_info->used_prefix_of_sorting_key_size; + if (sort_description.size() > used_prefix_of_sorting_key_size) + sort_description.resize(used_prefix_of_sorting_key_size); + } + else + output_stream.sort_scope = DataStream::SortScope::Chunk; + } + + output_stream.sort_description = std::move(sort_description); +} + void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, const SelectQueryInfo & query_info_) const { @@ -250,33 +313,12 @@ ReadFromMergeTree::ReadFromMergeTree( /// Add explicit description. setStepDescription(data.getStorageID().getFullNameNotQuoted()); - { /// build sort description for output stream - SortDescription sort_description; - const Names & sorting_key_columns = metadata_for_reading->getSortingKeyColumns(); - const Block & header = output_stream->header; - const int sort_direction = getSortDirection(); - for (const auto & column_name : sorting_key_columns) - { - if (std::find_if(header.begin(), header.end(), [&](ColumnWithTypeAndName const & col) { return col.name == column_name; }) - == header.end()) - break; - sort_description.emplace_back(column_name, sort_direction); - } - if (!sort_description.empty()) - { - if (query_info.getInputOrderInfo()) - { - output_stream->sort_scope = DataStream::SortScope::Stream; - const size_t used_prefix_of_sorting_key_size = query_info.getInputOrderInfo()->used_prefix_of_sorting_key_size; - if (sort_description.size() > used_prefix_of_sorting_key_size) - sort_description.resize(used_prefix_of_sorting_key_size); - } - else - output_stream->sort_scope = DataStream::SortScope::Chunk; - } - - output_stream->sort_description = std::move(sort_description); - } + updateSortDescriptionForOutputStream( + *output_stream, + storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(), + getSortDirection(), + query_info.getInputOrderInfo(), + prewhere_info); } @@ -1566,6 +1608,12 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info prewhere_info_value, data.getPartitionValueType(), virt_column_names)}; + updateSortDescriptionForOutputStream( + *output_stream, + storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(), + getSortDirection(), + query_info.getInputOrderInfo(), + prewhere_info); } bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort() @@ -1763,6 +1811,10 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons fmt::format("{}.{}", data.getStorageID().getFullNameNotQuoted(), part.data_part->info.partition_id)); } context->getQueryContext()->addQueryAccessInfo(partition_names); + + if (storage_snapshot->projection) + context->getQueryContext()->addQueryAccessInfo( + Context::QualifiedProjectionName{.storage_id = data.getStorageID(), .projection_name = storage_snapshot->projection->name}); } ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts); diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index 7446203ec35..a24c4dbe4d0 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -4,14 +4,19 @@ namespace DB { -ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_) +ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, Context::QualifiedProjectionName qualified_projection_name_) : ISourceStep(DataStream{.header = pipe_.getHeader()}) , pipe(std::move(pipe_)) + , context(std::move(context_)) + , qualified_projection_name(std::move(qualified_projection_name_)) { } void ReadFromPreparedSource::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { + if (context && context->hasQueryContext()) + context->getQueryContext()->addQueryAccessInfo(qualified_projection_name); + for (const auto & processor : pipe.getProcessors()) processors.emplace_back(processor); diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.h b/src/Processors/QueryPlan/ReadFromPreparedSource.h index 05e3ebd5102..2606f501009 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.h +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.h @@ -1,4 +1,6 @@ #pragma once + +#include #include #include @@ -9,7 +11,8 @@ namespace DB class ReadFromPreparedSource : public ISourceStep { public: - explicit ReadFromPreparedSource(Pipe pipe_); + explicit ReadFromPreparedSource( + Pipe pipe_, ContextPtr context_ = nullptr, Context::QualifiedProjectionName qualified_projection_name_ = {}); String getName() const override { return "ReadFromPreparedSource"; } @@ -18,6 +21,7 @@ public: protected: Pipe pipe; ContextPtr context; + Context::QualifiedProjectionName qualified_projection_name; }; class ReadFromStorageStep : public ReadFromPreparedSource diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index ed740e3e242..7a99c363232 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -86,9 +86,7 @@ static String formattedAST(const ASTPtr & ast) return {}; WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.hilite = false; - ast_format_settings.always_quote_identifiers = true; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ true); ast->format(ast_format_settings); return buf.str(); } @@ -164,7 +162,9 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream if (my_table_func_ptr) try_results = my_shard.shard_info.pool->getManyForTableFunction(timeouts, ¤t_settings, PoolMode::GET_MANY); else - try_results = my_shard.shard_info.pool->getManyChecked(timeouts, ¤t_settings, PoolMode::GET_MANY, my_main_table.getQualifiedName()); + try_results = my_shard.shard_info.pool->getManyChecked( + timeouts, ¤t_settings, PoolMode::GET_MANY, + my_shard.main_table ? my_shard.main_table.getQualifiedName() : my_main_table.getQualifiedName()); } catch (const Exception & ex) { @@ -243,7 +243,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact remote_query_executor->setPoolMode(PoolMode::GET_MANY); if (!table_func_ptr) - remote_query_executor->setMainTable(main_table); + remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table); pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); addConvertingActions(pipes.back(), output_stream->header); diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index d4005d81f1b..ac869cd89f9 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -22,6 +22,7 @@ using ThrottlerPtr = std::shared_ptr; class ReadFromRemote final : public ISourceStep { public: + /// @param main_table_ if Shards contains main_table then this parameter will be ignored ReadFromRemote( ClusterProxy::SelectStreamFactory::Shards shards_, Block header_, diff --git a/src/Processors/ResizeProcessor.cpp b/src/Processors/ResizeProcessor.cpp index 8167fae9baf..3a8c6fb2bff 100644 --- a/src/Processors/ResizeProcessor.cpp +++ b/src/Processors/ResizeProcessor.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 310a1d33e28..74ab3649068 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -77,6 +77,8 @@ std::optional RemoteSource::tryGenerate() { if (value.total_rows_to_read) addTotalRowsApprox(value.total_rows_to_read); + if (value.total_bytes_to_read) + addTotalBytes(value.total_bytes_to_read); progress(value.read_rows, value.read_bytes); }); diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 3d5a910f054..4e9f7b7601a 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -159,14 +159,14 @@ void AggregatingInOrderTransform::consume(Chunk chunk) if (group_by_key) params->aggregator.mergeOnBlockSmall(variants, key_begin, key_end, aggregate_columns_data, key_columns_raw); else - params->aggregator.mergeOnIntervalWithoutKeyImpl(variants, key_begin, key_end, aggregate_columns_data); + params->aggregator.mergeOnIntervalWithoutKey(variants, key_begin, key_end, aggregate_columns_data); } else { if (group_by_key) params->aggregator.executeOnBlockSmall(variants, key_begin, key_end, key_columns_raw, aggregate_function_instructions.data()); else - params->aggregator.executeOnIntervalWithoutKeyImpl(variants, key_begin, key_end, aggregate_function_instructions.data()); + params->aggregator.executeOnIntervalWithoutKey(variants, key_begin, key_end, aggregate_function_instructions.data()); } } diff --git a/src/Processors/Transforms/CheckConstraintsTransform.cpp b/src/Processors/Transforms/CheckConstraintsTransform.cpp index 88f02a3926f..3a6595ea4fb 100644 --- a/src/Processors/Transforms/CheckConstraintsTransform.cpp +++ b/src/Processors/Transforms/CheckConstraintsTransform.cpp @@ -73,7 +73,7 @@ void CheckConstraintsTransform::onConsume(Chunk chunk) "Constraint expression returns nullable column that contains null value", backQuote(constraint_ptr->name), table_id.getNameForLogs(), - serializeAST(*(constraint_ptr->expr), true)); + serializeAST(*(constraint_ptr->expr))); result_column = nested_column; } @@ -116,7 +116,7 @@ void CheckConstraintsTransform::onConsume(Chunk chunk) backQuote(constraint_ptr->name), table_id.getNameForLogs(), rows_written + row_idx + 1, - serializeAST(*(constraint_ptr->expr), true), + serializeAST(*(constraint_ptr->expr)), column_values_msg); } } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 646256d60c0..3dfb9fe178f 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,4 +1,3 @@ -#include #include #include diff --git a/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp b/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp index 59c4b9a6a87..83a75318d61 100644 --- a/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp +++ b/src/Processors/Transforms/CreateSetAndFilterOnTheFlyTransform.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -35,7 +36,11 @@ Columns getColumnsByIndices(const Chunk & chunk, const std::vector & ind Columns columns; const Columns & all_cols = chunk.getColumns(); for (const auto & index : indices) - columns.push_back(all_cols.at(index)); + { + auto col = recursiveRemoveSparse(all_cols.at(index)); + columns.push_back(std::move(col)); + } + return columns; } @@ -149,7 +154,7 @@ IProcessor::Status FilterBySetOnTheFlyTransform::prepare() LOG_DEBUG(log, "Finished {} by [{}]: consumed {} rows in total, {} rows bypassed, result {} rows, {:.2f}% filtered", Poco::toLower(getDescription()), fmt::join(column_names, ", "), stat.consumed_rows, stat.consumed_rows_before_set, stat.result_rows, - 100 - 100.0 * stat.result_rows / stat.consumed_rows); + stat.consumed_rows > 0 ? (100 - 100.0 * stat.result_rows / stat.consumed_rows) : 0); } else { diff --git a/src/Processors/Transforms/DistinctSortedChunkTransform.cpp b/src/Processors/Transforms/DistinctSortedChunkTransform.cpp index 28a3260d742..91806a90083 100644 --- a/src/Processors/Transforms/DistinctSortedChunkTransform.cpp +++ b/src/Processors/Transforms/DistinctSortedChunkTransform.cpp @@ -5,6 +5,7 @@ namespace DB namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int SET_SIZE_LIMIT_EXCEEDED; } @@ -126,9 +127,20 @@ bool DistinctSortedChunkTransform::isKey(const size_t key_pos, const size_t row_ bool DistinctSortedChunkTransform::isLatestKeyFromPrevChunk(const size_t row_pos) const { - for (size_t i = 0; i < sorted_columns.size(); ++i) + for (size_t i = 0, s = sorted_columns.size(); i < s; ++i) { - const int res = prev_chunk_latest_key[i]->compareAt(0, row_pos, *sorted_columns[i], sorted_columns_descr[i].nulls_direction); + const auto & sorted_column = *sorted_columns[i]; + /// temporary hardening due to suspious crashes in sqlancer tests + if (unlikely(sorted_column.size() <= row_pos)) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Unexpected size of a sorted column: size {}, row_pos {}, column position {}, type {}", + sorted_column.size(), + row_pos, + i, + sorted_column.getFamilyName()); + + const int res = prev_chunk_latest_key[i]->compareAt(0, row_pos, sorted_column, sorted_columns_descr[i].nulls_direction); if (res != 0) return false; } diff --git a/src/Processors/Transforms/ExceptionKeepingTransform.cpp b/src/Processors/Transforms/ExceptionKeepingTransform.cpp index 3c40c078225..b50f66b0240 100644 --- a/src/Processors/Transforms/ExceptionKeepingTransform.cpp +++ b/src/Processors/Transforms/ExceptionKeepingTransform.cpp @@ -2,7 +2,6 @@ #include #include #include -#include namespace DB { diff --git a/src/Processors/Transforms/ExtractColumnsTransform.cpp b/src/Processors/Transforms/ExtractColumnsTransform.cpp new file mode 100644 index 00000000000..44bf5582290 --- /dev/null +++ b/src/Processors/Transforms/ExtractColumnsTransform.cpp @@ -0,0 +1,35 @@ +#include +#include + +namespace DB +{ + +ExtractColumnsTransform::ExtractColumnsTransform(const Block & header_, const NamesAndTypesList & requested_columns_) + : ISimpleTransform(header_, transformHeader(header_, requested_columns_), false), requested_columns(requested_columns_) +{ + +} + +Block ExtractColumnsTransform::transformHeader(Block header, const NamesAndTypesList & requested_columns_) +{ + ColumnsWithTypeAndName columns; + columns.reserve(requested_columns_.size()); + for (const auto & required_column : requested_columns_) + columns.emplace_back(getColumnFromBlock(header, required_column), required_column.type, required_column.name); + + return Block(std::move(columns)); +} + +void ExtractColumnsTransform::transform(Chunk & chunk) +{ + size_t num_rows = chunk.getNumRows(); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); + Columns columns; + columns.reserve(requested_columns.size()); + for (const auto & required_column : requested_columns) + columns.emplace_back(getColumnFromBlock(block, required_column)); + + chunk.setColumns(std::move(columns), num_rows); +} + +} diff --git a/src/Processors/Transforms/ExtractColumnsTransform.h b/src/Processors/Transforms/ExtractColumnsTransform.h new file mode 100644 index 00000000000..f8b3d803736 --- /dev/null +++ b/src/Processors/Transforms/ExtractColumnsTransform.h @@ -0,0 +1,26 @@ +#pragma once +#include + +namespace DB +{ + +/// Extracts required columns and subcolumns from the block. +class ExtractColumnsTransform final : public ISimpleTransform +{ +public: + ExtractColumnsTransform( + const Block & header_, + const NamesAndTypesList & requested_columns_); + + String getName() const override { return "ExtractColumnsTransform"; } + + static Block transformHeader(Block header, const NamesAndTypesList & requested_columns_); + +protected: + void transform(Chunk & chunk) override; + +private: + const NamesAndTypesList requested_columns; +}; + +} diff --git a/src/Processors/Transforms/FilterSortedStreamByRange.h b/src/Processors/Transforms/FilterSortedStreamByRange.h index e1141ebd299..e3d3f6f10ef 100644 --- a/src/Processors/Transforms/FilterSortedStreamByRange.h +++ b/src/Processors/Transforms/FilterSortedStreamByRange.h @@ -27,6 +27,9 @@ public: true) , filter_transform(header_, expression_, filter_column_name_, remove_filter_column_, on_totals_) { + assertBlocksHaveEqualStructure( + header_, getOutputPort().getHeader(), + "Expression for FilterSortedStreamByRange should not change header"); } String getName() const override { return "FilterSortedStreamByRange"; } diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp index 05fddc35e15..63a9c3924a2 100644 --- a/src/Processors/Transforms/FinishSortingTransform.cpp +++ b/src/Processors/Transforms/FinishSortingTransform.cpp @@ -35,9 +35,20 @@ FinishSortingTransform::FinishSortingTransform( "Can't finish sorting. SortDescription " "of already sorted stream is not prefix of SortDescription needed to sort"); + /// Remove constants from description_sorted_. + SortDescription description_sorted_without_constants; + description_sorted_without_constants.reserve(description_sorted_.size()); + size_t num_columns = const_columns_to_remove.size(); + for (const auto & column_description : description_sorted_) + { + auto pos = header.getPositionByName(column_description.column_name); + + if (pos < num_columns && !const_columns_to_remove[pos]) + description_sorted_without_constants.push_back(column_description); + } /// The target description is modified in SortingTransform constructor. /// To avoid doing the same actions with description_sorted just copy it from prefix of target description. - for (const auto & column_sort_desc : description_sorted_) + for (const auto & column_sort_desc : description_sorted_without_constants) description_with_positions.emplace_back(column_sort_desc, header_without_constants.getPositionByName(column_sort_desc.column_name)); } diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 49b90d04b81..4e7868ea1c2 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -189,7 +189,6 @@ void JoiningTransform::transform(Chunk & chunk) } else block = readExecute(chunk); - auto num_rows = block.rows(); chunk.setColumns(block.getColumns(), num_rows); } @@ -311,8 +310,11 @@ void FillingRightJoinSideTransform::work() } -DelayedJoinedBlocksWorkerTransform::DelayedJoinedBlocksWorkerTransform(Block output_header) - : IProcessor(InputPorts{Block()}, OutputPorts{output_header}) +DelayedJoinedBlocksWorkerTransform::DelayedJoinedBlocksWorkerTransform( + Block output_header_, + NonJoinedStreamBuilder non_joined_stream_builder_) + : IProcessor(InputPorts{Block()}, OutputPorts{output_header_}) + , non_joined_stream_builder(std::move(non_joined_stream_builder_)) { } @@ -365,6 +367,7 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() if (!data.chunk.hasChunkInfo()) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info"); + task = std::dynamic_pointer_cast(data.chunk.getChunkInfo()); } else @@ -372,7 +375,8 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() input.setNotNeeded(); } - if (task->finished) + // When delayed_blocks is nullptr, it means that all buckets have been joined. + if (!task->delayed_blocks) { input.close(); output.finish(); @@ -387,11 +391,21 @@ void DelayedJoinedBlocksWorkerTransform::work() if (!task) return; - Block block = task->delayed_blocks->next(); - + Block block; + /// All joined and non-joined rows from left stream are emitted, only right non-joined rows are left + if (!task->delayed_blocks->isFinished()) + { + block = task->delayed_blocks->next(); + if (!block) + block = nextNonJoinedBlock(); + } + else + { + block = nextNonJoinedBlock(); + } if (!block) { - task.reset(); + resetTask(); return; } @@ -400,6 +414,30 @@ void DelayedJoinedBlocksWorkerTransform::work() output_chunk.setColumns(block.getColumns(), rows); } +void DelayedJoinedBlocksWorkerTransform::resetTask() +{ + task.reset(); + non_joined_delayed_stream = nullptr; +} + +Block DelayedJoinedBlocksWorkerTransform::nextNonJoinedBlock() +{ + // Before read from non-joined stream, all blocks in left file reader must have been joined. + // For example, in HashJoin, it may return invalid mismatch rows from non-joined stream before + // the all blocks in left file reader have been finished, since the used flags are incomplete. + // To make only one processor could read from non-joined stream seems be a easy way. + if (!non_joined_delayed_stream && task && task->left_delayed_stream_finish_counter->isLast()) + { + non_joined_delayed_stream = non_joined_stream_builder(); + } + + if (non_joined_delayed_stream) + { + return non_joined_delayed_stream->next(); + } + return {}; +} + DelayedJoinedBlocksTransform::DelayedJoinedBlocksTransform(size_t num_streams, JoinPtr join_) : IProcessor(InputPorts{}, OutputPorts(num_streams, Block())) , join(std::move(join_)) @@ -433,6 +471,9 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (finished) { + // Since have memory limit, cannot handle all buckets parallelly by different + // DelayedJoinedBlocksWorkerTransform. So send the same task to all outputs. + // Wait for all DelayedJoinedBlocksWorkerTransform be idle before getting next bucket. for (auto & output : outputs) { if (output.isFinished()) @@ -448,10 +489,14 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (delayed_blocks) { + // This counter is used to ensure that only the last DelayedJoinedBlocksWorkerTransform + // could read right non-joined blocks from the join. + auto left_delayed_stream_finished_counter = std::make_shared(outputs.size()); for (auto & output : outputs) { Chunk chunk; - chunk.setChunkInfo(std::make_shared(delayed_blocks)); + auto task = std::make_shared(delayed_blocks, left_delayed_stream_finished_counter); + chunk.setChunkInfo(task); output.push(std::move(chunk)); } delayed_blocks = nullptr; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index e7edff40c56..a308af03662 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -115,12 +115,16 @@ class DelayedBlocksTask : public ChunkInfo { public: - explicit DelayedBlocksTask() : finished(true) {} - explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_) : delayed_blocks(std::move(delayed_blocks_)) {} + DelayedBlocksTask() = default; + explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) + : delayed_blocks(std::move(delayed_blocks_)) + , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) + { + } IBlocksStreamPtr delayed_blocks = nullptr; + JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter = nullptr; - bool finished = false; }; using DelayedBlocksTaskPtr = std::shared_ptr; @@ -147,7 +151,10 @@ private: class DelayedJoinedBlocksWorkerTransform : public IProcessor { public: - explicit DelayedJoinedBlocksWorkerTransform(Block output_header); + using NonJoinedStreamBuilder = std::function; + explicit DelayedJoinedBlocksWorkerTransform( + Block output_header_, + NonJoinedStreamBuilder non_joined_stream_builder_); String getName() const override { return "DelayedJoinedBlocksWorkerTransform"; } @@ -157,8 +164,12 @@ public: private: DelayedBlocksTaskPtr task; Chunk output_chunk; + /// For building a block stream to access the non-joined rows. + NonJoinedStreamBuilder non_joined_stream_builder; + IBlocksStreamPtr non_joined_delayed_stream = nullptr; - bool finished = false; + void resetTask(); + Block nextNonJoinedBlock(); }; } diff --git a/src/Processors/Transforms/LimitByTransform.cpp b/src/Processors/Transforms/LimitByTransform.cpp index cb2804007bd..5e6d7dc816a 100644 --- a/src/Processors/Transforms/LimitByTransform.cpp +++ b/src/Processors/Transforms/LimitByTransform.cpp @@ -33,14 +33,11 @@ void LimitByTransform::transform(Chunk & chunk) for (UInt64 row = 0; row < num_rows; ++row) { - UInt128 key{}; SipHash hash; - for (auto position : key_positions) columns[position]->updateHashWithValue(row, hash); - hash.get128(key); - + const auto key = hash.get128(); auto count = keys_counts[key]++; if (count >= group_offset && (group_length > std::numeric_limits::max() - group_offset || count < group_length + group_offset)) diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index 33c2d870b76..3fc9a4e71db 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -159,7 +159,7 @@ void PartialSortingTransform::transform(Chunk & chunk) { MutableColumnPtr sort_description_threshold_column_updated = raw_block_columns[i]->cloneEmpty(); sort_description_threshold_column_updated->insertFrom(*raw_block_columns[i], min_row_to_compare); - sort_description_threshold_columns_updated[i] = std::move(sort_description_threshold_column_updated); + sort_description_threshold_columns_updated[i] = sort_description_threshold_column_updated->convertToFullColumnIfSparse(); } sort_description_threshold_columns = std::move(sort_description_threshold_columns_updated); diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index e89aec31655..7de9538e435 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index a785d52bf65..be76971ddcd 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -148,7 +148,7 @@ static int compareValuesWithOffsetFloat(const IColumn * _compared_column, const auto * reference_column = assert_cast( _reference_column); const auto offset = _offset.get(); - assert(offset >= 0); + chassert(offset >= 0); const auto compared_value_data = compared_column->getDataAt(compared_row); assert(compared_value_data.size == sizeof(typename ColumnType::ValueType)); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 7f7f9058f1b..f70ebcf27b8 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -72,7 +72,7 @@ struct ViewsData std::atomic_bool has_exception = false; std::exception_ptr first_exception; - ViewsData(ThreadStatusesHolderPtr thread_status_holder_, ContextPtr context_, StorageID source_storage_id_, StorageMetadataPtr source_metadata_snapshot_ , StoragePtr source_storage_) + ViewsData(ThreadStatusesHolderPtr thread_status_holder_, ContextPtr context_, StorageID source_storage_id_, StorageMetadataPtr source_metadata_snapshot_, StoragePtr source_storage_) : thread_status_holder(std::move(thread_status_holder_)) , context(std::move(context_)) , source_storage_id(std::move(source_storage_id_)) @@ -281,7 +281,7 @@ Chain buildPushingToViewsChain( /// and switch back to the original thread_status. auto * original_thread = current_thread; SCOPE_EXIT({ current_thread = original_thread; }); - + current_thread = nullptr; std::unique_ptr view_thread_status_ptr = std::make_unique(/*check_current_thread_on_destruction=*/ false); /// Copy of a ThreadStatus should be internal. view_thread_status_ptr->setInternalThread(); diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index dedf85e409c..553b18dd57b 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -491,7 +491,10 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe if (delayed_root) { // Process delayed joined blocks when all JoiningTransform are finished. - auto delayed = std::make_shared(joined_header); + auto delayed = std::make_shared( + joined_header, + [left_header, joined_header, max_block_size, join]() + { return join->getNonJoinedBlocks(left_header, joined_header, max_block_size); }); if (delayed->getInputs().size() != 1 || delayed->getOutputs().size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform should have one input and one output"); diff --git a/src/QueryPipeline/ReadProgressCallback.cpp b/src/QueryPipeline/ReadProgressCallback.cpp index 0f50d56f1a5..4d7c7aa0f2a 100644 --- a/src/QueryPipeline/ReadProgressCallback.cpp +++ b/src/QueryPipeline/ReadProgressCallback.cpp @@ -63,6 +63,18 @@ bool ReadProgressCallback::onProgress(uint64_t read_rows, uint64_t read_bytes, c process_list_elem->updateProgressIn(total_rows_progress); } + size_t bytes = 0; + if ((bytes = total_bytes.exchange(0)) != 0) + { + Progress total_bytes_progress = {0, 0, 0, bytes}; + + if (progress_callback) + progress_callback(total_bytes_progress); + + if (process_list_elem) + process_list_elem->updateProgressIn(total_bytes_progress); + } + Progress value {read_rows, read_bytes}; if (progress_callback) diff --git a/src/QueryPipeline/ReadProgressCallback.h b/src/QueryPipeline/ReadProgressCallback.h index 08f2f9fc99b..5dbf3344bdf 100644 --- a/src/QueryPipeline/ReadProgressCallback.h +++ b/src/QueryPipeline/ReadProgressCallback.h @@ -23,6 +23,7 @@ public: void setProcessListElement(QueryStatusPtr elem); void setProgressCallback(const ProgressCallback & callback) { progress_callback = callback; } void addTotalRowsApprox(size_t value) { total_rows_approx += value; } + void addTotalBytes(size_t value) { total_bytes += value; } /// Skip updating profile events. /// For merges in mutations it may need special logic, it's done inside ProgressCallback. @@ -37,6 +38,8 @@ private: /// The approximate total number of rows to read. For progress bar. std::atomic_size_t total_rows_approx = 0; + /// The total number of bytes to read. For progress bar. + std::atomic_size_t total_bytes = 0; std::mutex limits_and_quotas_mutex; Stopwatch total_stopwatch{CLOCK_MONOTONIC_COARSE}; /// Including waiting time diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index cd6f65b7b43..198c3265a84 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -591,8 +591,8 @@ void RemoteQueryExecutor::finish() /// Send the request to abort the execution of the request, if not already sent. tryCancel("Cancelling query because enough data has been read"); - /// If connections weren't created yet or query wasn't sent, nothing to do. - if (!connections || !sent_query) + /// If connections weren't created yet, query wasn't sent or was already finished, nothing to do. + if (!connections || !sent_query || finished) return; /// Get the remaining packets so that there is no out of sync in the connections to the replicas. diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index bf9ba20a5cf..3370a8c009b 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -798,7 +798,7 @@ namespace /// Authentication. session.emplace(iserver.context(), ClientInfo::Interface::GRPC); session->authenticate(user, password, user_address); - session->getClientInfo().quota_key = quota_key; + session->setQuotaClientKey(quota_key); ClientInfo client_info = session->getClientInfo(); @@ -833,7 +833,7 @@ namespace { settings_changes.push_back({key, value}); } - query_context->checkSettingsConstraints(settings_changes); + query_context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); query_context->applySettingsChanges(settings_changes); query_context->setCurrentQueryId(query_info.query_id()); @@ -1118,7 +1118,7 @@ namespace SettingsChanges settings_changes; for (const auto & [key, value] : external_table.settings()) settings_changes.push_back({key, value}); - external_table_context->checkSettingsConstraints(settings_changes); + external_table_context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); external_table_context->applySettingsChanges(settings_changes); } auto in = external_table_context->getInputFormat( diff --git a/src/Server/HTTP/HTTPServerResponse.h b/src/Server/HTTP/HTTPServerResponse.h index f5b7a70dc79..236a56e2323 100644 --- a/src/Server/HTTP/HTTPServerResponse.h +++ b/src/Server/HTTP/HTTPServerResponse.h @@ -5,9 +5,9 @@ #include #include -#include #include + namespace DB { diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index c8015cfd185..9da24cfd9cb 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -3,7 +3,7 @@ #include #include #include - +#include namespace DB { @@ -29,28 +29,31 @@ void WriteBufferFromHTTPServerResponse::startSendHeaders() } } -void WriteBufferFromHTTPServerResponse::writeHeaderSummary() +void WriteBufferFromHTTPServerResponse::writeHeaderProgressImpl(const char * header_name) { if (headers_finished_sending) return; WriteBufferFromOwnString progress_string_writer; - accumulated_progress.writeJSON(progress_string_writer); + + writeCString("{", progress_string_writer); + accumulated_progress.writeJSON(progress_string_writer, false); + writeCString(",\"peak_memory_usage\":\"", progress_string_writer); + writeText(peak_memory_usage, progress_string_writer); + writeCString("\"}", progress_string_writer); if (response_header_ostr) - *response_header_ostr << "X-ClickHouse-Summary: " << progress_string_writer.str() << "\r\n" << std::flush; + *response_header_ostr << header_name << progress_string_writer.str() << "\r\n" << std::flush; +} + +void WriteBufferFromHTTPServerResponse::writeHeaderSummary() +{ + writeHeaderProgressImpl("X-ClickHouse-Summary: "); } void WriteBufferFromHTTPServerResponse::writeHeaderProgress() { - if (headers_finished_sending) - return; - - WriteBufferFromOwnString progress_string_writer; - accumulated_progress.writeJSON(progress_string_writer); - - if (response_header_ostr) - *response_header_ostr << "X-ClickHouse-Progress: " << progress_string_writer.str() << "\r\n" << std::flush; + writeHeaderProgressImpl("X-ClickHouse-Progress: "); } void WriteBufferFromHTTPServerResponse::writeExceptionCode() @@ -149,7 +152,7 @@ WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse( } -void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress) +void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress, Int64 peak_memory_usage_) { std::lock_guard lock(mutex); @@ -158,7 +161,7 @@ void WriteBufferFromHTTPServerResponse::onProgress(const Progress & progress) return; accumulated_progress.incrementPiecewiseAtomically(progress); - + peak_memory_usage = peak_memory_usage_; if (send_progress && progress_watch.elapsed() >= send_progress_interval_ms * 1000000) { progress_watch.restart(); diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h index ce677616755..b4c66357d3b 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h @@ -43,7 +43,7 @@ public: ~WriteBufferFromHTTPServerResponse() override; /// Writes progress in repeating HTTP headers. - void onProgress(const Progress & progress); + void onProgress(const Progress & progress, Int64 peak_memory_usage_); /// Turn compression on or off. /// The setting has any effect only if HTTP headers haven't been sent yet. @@ -89,6 +89,8 @@ private: /// but not finish them with \r\n, allowing to send more headers subsequently. void startSendHeaders(); + // Used for write the header X-ClickHouse-Progress / X-ClickHouse-Summary + void writeHeaderProgressImpl(const char * header_name); // Used for write the header X-ClickHouse-Progress void writeHeaderProgress(); // Used for write the header X-ClickHouse-Summary @@ -126,6 +128,8 @@ private: int exception_code = 0; + Int64 peak_memory_usage = 0; + std::mutex mutex; /// progress callback could be called from different threads. }; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 42459340c57..10d5e7a0242 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -474,7 +474,6 @@ bool HTTPHandler::authenticateUser( } /// Set client info. It will be used for quota accounting parameters in 'setUser' method. - ClientInfo & client_info = session->getClientInfo(); ClientInfo::HTTPMethod http_method = ClientInfo::HTTPMethod::UNKNOWN; if (request.getMethod() == HTTPServerRequest::HTTP_GET) @@ -482,15 +481,13 @@ bool HTTPHandler::authenticateUser( else if (request.getMethod() == HTTPServerRequest::HTTP_POST) http_method = ClientInfo::HTTPMethod::POST; - client_info.http_method = http_method; - client_info.http_user_agent = request.get("User-Agent", ""); - client_info.http_referer = request.get("Referer", ""); - client_info.forwarded_for = request.get("X-Forwarded-For", ""); - client_info.quota_key = quota_key; + session->setHttpClientInfo(http_method, request.get("User-Agent", ""), request.get("Referer", "")); + session->setForwardedFor(request.get("X-Forwarded-For", "")); + session->setQuotaClientKey(quota_key); /// Extract the last entry from comma separated list of forwarded_for addresses. /// Only the last proxy can be trusted (if any). - String forwarded_address = client_info.getLastForwardedFor(); + String forwarded_address = session->getClientInfo().getLastForwardedFor(); try { if (!forwarded_address.empty() && server.config().getBool("auth_use_forwarded_address", false)) @@ -564,8 +561,7 @@ void HTTPHandler::processQuery( session->makeSessionContext(); } - auto client_info = session->getClientInfo(); - auto context = session->makeQueryContext(std::move(client_info)); + auto context = session->makeQueryContext(); /// This parameter is used to tune the behavior of output formats (such as Native) for compatibility. if (params.has("client_protocol_version")) @@ -641,7 +637,7 @@ void HTTPHandler::processQuery( throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected MemoryWriteBuffer"); auto rdbuf = prev_memory_buffer->tryGetReadBuffer(); - copyData(*rdbuf , *next_buffer); + copyData(*rdbuf, *next_buffer); return next_buffer; }; @@ -767,7 +763,7 @@ void HTTPHandler::processQuery( context->setDefaultFormat(default_format); /// For external data we also want settings - context->checkSettingsConstraints(settings_changes); + context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); context->applySettingsChanges(settings_changes); /// Set the query id supplied by the user, if any, and also update the OpenTelemetry fields. @@ -818,7 +814,11 @@ void HTTPHandler::processQuery( /// While still no data has been sent, we will report about query execution progress by sending HTTP headers. /// Note that we add it unconditionally so the progress is available for `X-ClickHouse-Summary` - append_callback([&used_output](const Progress & progress) { used_output.out->onProgress(progress); }); + append_callback([&used_output](const Progress & progress) + { + const auto& thread_group = CurrentThread::getGroup(); + used_output.out->onProgress(progress, thread_group->memory_tracker.getPeak()); + }); if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close) { @@ -904,10 +904,9 @@ try /// Destroy CascadeBuffer to actualize buffers' positions and reset extra references if (used_output.hasDelayed()) { - if (used_output.out_maybe_delayed_and_compressed) - { - used_output.out_maybe_delayed_and_compressed->finalize(); - } + /// do not call finalize here for CascadeWriteBuffer used_output.out_maybe_delayed_and_compressed, + /// exception is written into used_output.out_maybe_compressed later + /// HTTPHandler::trySendExceptionToClient is called with exception context, it is Ok to destroy buffers used_output.out_maybe_delayed_and_compressed.reset(); } @@ -989,22 +988,22 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse } // Parse the OpenTelemetry traceparent header. - ClientInfo& client_info = session->getClientInfo(); + auto & client_trace_context = session->getClientTraceContext(); if (request.has("traceparent")) { std::string opentelemetry_traceparent = request.get("traceparent"); std::string error; - if (!client_info.client_trace_context.parseTraceparentHeader(opentelemetry_traceparent, error)) + if (!client_trace_context.parseTraceparentHeader(opentelemetry_traceparent, error)) { LOG_DEBUG(log, "Failed to parse OpenTelemetry traceparent header '{}': {}", opentelemetry_traceparent, error); } - client_info.client_trace_context.tracestate = request.get("tracestate", ""); + client_trace_context.tracestate = request.get("tracestate", ""); } // Setup tracing context for this thread auto context = session->sessionOrGlobalContext(); thread_trace_context = std::make_unique("HTTPHandler", - client_info.client_trace_context, + client_trace_context, context->getSettingsRef(), context->getOpenTelemetrySpanLog()); thread_trace_context->root_span.kind = OpenTelemetry::SERVER; diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index 78e374ee9e0..1c911034da1 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -132,21 +132,25 @@ void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IS auto ping_handler = std::make_shared>(server, ping_response_expression); ping_handler->attachStrictPath("/ping"); ping_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/ping"); factory.addHandler(ping_handler); auto replicas_status_handler = std::make_shared>(server); replicas_status_handler->attachNonStrictPath("/replicas_status"); replicas_status_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/replicas_status"); factory.addHandler(replicas_status_handler); auto play_handler = std::make_shared>(server); play_handler->attachNonStrictPath("/play"); play_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/play"); factory.addHandler(play_handler); auto dashboard_handler = std::make_shared>(server); dashboard_handler->attachNonStrictPath("/dashboard"); dashboard_handler->allowGetAndHeadRequest(); + factory.addPathToHints("/dashboard"); factory.addHandler(dashboard_handler); auto js_handler = std::make_shared>(server); diff --git a/src/Server/HTTPPathHints.cpp b/src/Server/HTTPPathHints.cpp new file mode 100644 index 00000000000..51ef3eabffe --- /dev/null +++ b/src/Server/HTTPPathHints.cpp @@ -0,0 +1,16 @@ +#include + +namespace DB +{ + +void HTTPPathHints::add(const String & http_path) +{ + http_paths.push_back(http_path); +} + +std::vector HTTPPathHints::getAllRegisteredNames() const +{ + return http_paths; +} + +} diff --git a/src/Server/HTTPPathHints.h b/src/Server/HTTPPathHints.h new file mode 100644 index 00000000000..708816ebf07 --- /dev/null +++ b/src/Server/HTTPPathHints.h @@ -0,0 +1,22 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +class HTTPPathHints : public IHints<1, HTTPPathHints> +{ +public: + std::vector getAllRegisteredNames() const override; + void add(const String & http_path); + +private: + std::vector http_paths; +}; + +using HTTPPathHintsPtr = std::shared_ptr; + +} diff --git a/src/Server/HTTPRequestHandlerFactoryMain.cpp b/src/Server/HTTPRequestHandlerFactoryMain.cpp index 61a2909d30f..5481bcd5083 100644 --- a/src/Server/HTTPRequestHandlerFactoryMain.cpp +++ b/src/Server/HTTPRequestHandlerFactoryMain.cpp @@ -29,7 +29,7 @@ std::unique_ptr HTTPRequestHandlerFactoryMain::createRequest || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD || request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST) { - return std::unique_ptr(new NotFoundHandler); + return std::unique_ptr(new NotFoundHandler(hints.getHints(request.getURI()))); } return nullptr; diff --git a/src/Server/HTTPRequestHandlerFactoryMain.h b/src/Server/HTTPRequestHandlerFactoryMain.h index b0e57bd6b3b..07b278d831c 100644 --- a/src/Server/HTTPRequestHandlerFactoryMain.h +++ b/src/Server/HTTPRequestHandlerFactoryMain.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -15,11 +16,14 @@ public: void addHandler(HTTPRequestHandlerFactoryPtr child_factory) { child_factories.emplace_back(child_factory); } + void addPathToHints(const std::string & http_path) { hints.add(http_path); } + std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; private: Poco::Logger * log; std::string name; + HTTPPathHints hints; std::vector child_factories; }; diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index e3edc281e83..58d227a5ae5 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -175,10 +175,13 @@ struct SocketInterruptablePollWrapper } while (rc < 0 && errno == POCO_EINTR); - if (rc >= 1 && poll_buf[0].revents & POLLIN) - socket_ready = true; - if (rc >= 2 && poll_buf[1].revents & POLLIN) - fd_ready = true; + if (rc >= 1) + { + if (poll_buf[0].revents & POLLIN) + socket_ready = true; + if (poll_buf[1].revents & POLLIN) + fd_ready = true; + } #endif } diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 7318b0ad89b..f98b86e6cf8 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -94,7 +94,7 @@ void MySQLHandler::run() session = std::make_unique(server.context(), ClientInfo::Interface::MYSQL); SCOPE_EXIT({ session.reset(); }); - session->getClientInfo().connection_id = connection_id; + session->setClientConnectionId(connection_id); in = std::make_shared(socket()); out = std::make_shared(socket()); diff --git a/src/Server/NotFoundHandler.cpp b/src/Server/NotFoundHandler.cpp index 3181708b9b7..5b1db508551 100644 --- a/src/Server/NotFoundHandler.cpp +++ b/src/Server/NotFoundHandler.cpp @@ -10,7 +10,8 @@ void NotFoundHandler::handleRequest(HTTPServerRequest & request, HTTPServerRespo try { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_NOT_FOUND); - *response.send() << "There is no handle " << request.getURI() << "\n\n" + *response.send() << "There is no handle " << request.getURI() + << (!hints.empty() ? fmt::format(". Maybe you meant {}.", hints.front()) : "") << "\n\n" << "Use / or /ping for health checks.\n" << "Or /replicas_status for more sophisticated health checks.\n\n" << "Send queries from your program with POST method or GET /?query=...\n\n" diff --git a/src/Server/NotFoundHandler.h b/src/Server/NotFoundHandler.h index 749ac388c4d..1cbfcd57f8f 100644 --- a/src/Server/NotFoundHandler.h +++ b/src/Server/NotFoundHandler.h @@ -9,7 +9,10 @@ namespace DB class NotFoundHandler : public HTTPRequestHandler { public: + NotFoundHandler(std::vector hints_) : hints(std::move(hints_)) {} void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; +private: + std::vector hints; }; } diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index 36b05932979..7b078154252 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -58,7 +58,7 @@ void PostgreSQLHandler::run() session = std::make_unique(server.context(), ClientInfo::Interface::POSTGRESQL); SCOPE_EXIT({ session.reset(); }); - session->getClientInfo().connection_id = connection_id; + session->setClientConnectionId(connection_id); try { diff --git a/src/Server/ServerType.cpp b/src/Server/ServerType.cpp new file mode 100644 index 00000000000..29ba7224c75 --- /dev/null +++ b/src/Server/ServerType.cpp @@ -0,0 +1,132 @@ +#include + +#include +#include + +#include + + +namespace DB +{ + +namespace +{ + std::vector getTypeIndexToTypeName() + { + constexpr std::size_t types_size = magic_enum::enum_count(); + + std::vector type_index_to_type_name; + type_index_to_type_name.resize(types_size); + + auto entries = magic_enum::enum_entries(); + for (const auto & [entry, str] : entries) + { + auto str_copy = String(str); + std::replace(str_copy.begin(), str_copy.end(), '_', ' '); + type_index_to_type_name[static_cast(entry)] = std::move(str_copy); + } + + return type_index_to_type_name; + } +} + +const char * ServerType::serverTypeToString(ServerType::Type type) +{ + /** During parsing if SystemQuery is not parsed properly it is added to Expected variants as description check IParser.h. + * Description string must be statically allocated. + */ + static std::vector type_index_to_type_name = getTypeIndexToTypeName(); + const auto & type_name = type_index_to_type_name[static_cast(type)]; + return type_name.data(); +} + +bool ServerType::shouldStart(Type server_type, const std::string & server_custom_name) const +{ + if (type == Type::QUERIES_ALL) + return true; + + if (type == Type::QUERIES_DEFAULT) + { + switch (server_type) + { + case Type::TCP: + case Type::TCP_WITH_PROXY: + case Type::TCP_SECURE: + case Type::HTTP: + case Type::HTTPS: + case Type::MYSQL: + case Type::GRPC: + case Type::POSTGRESQL: + case Type::PROMETHEUS: + case Type::INTERSERVER_HTTP: + case Type::INTERSERVER_HTTPS: + return true; + default: + return false; + } + } + + if (type == Type::QUERIES_CUSTOM) + { + switch (server_type) + { + case Type::CUSTOM: + return true; + default: + return false; + } + } + + if (type == Type::CUSTOM) + return server_type == type && server_custom_name == "protocols." + custom_name + ".port"; + + return server_type == type; +} + +bool ServerType::shouldStop(const std::string & port_name) const +{ + Type port_type; + + if (port_name == "http_port") + port_type = Type::HTTP; + + else if (port_name == "https_port") + port_type = Type::HTTPS; + + else if (port_name == "tcp_port") + port_type = Type::TCP; + + else if (port_name == "tcp_with_proxy_port") + port_type = Type::TCP_WITH_PROXY; + + else if (port_name == "tcp_port_secure") + port_type = Type::TCP_SECURE; + + else if (port_name == "mysql_port") + port_type = Type::MYSQL; + + else if (port_name == "postgresql_port") + port_type = Type::POSTGRESQL; + + else if (port_name == "grpc_port") + port_type = Type::GRPC; + + else if (port_name == "prometheus.port") + port_type = Type::PROMETHEUS; + + else if (port_name == "interserver_http_port") + port_type = Type::INTERSERVER_HTTP; + + else if (port_name == "interserver_https_port") + port_type = Type::INTERSERVER_HTTPS; + + else if (port_name.starts_with("protocols.") && port_name.ends_with(".port")) + port_type = Type::CUSTOM; + + else + return false; + + return shouldStart(port_type, port_name); +} + +} diff --git a/src/Server/ServerType.h b/src/Server/ServerType.h new file mode 100644 index 00000000000..eafe4f941dd --- /dev/null +++ b/src/Server/ServerType.h @@ -0,0 +1,44 @@ +#pragma once + +#include + +namespace DB +{ + +class ServerType +{ +public: + enum Type + { + TCP, + TCP_WITH_PROXY, + TCP_SECURE, + HTTP, + HTTPS, + MYSQL, + GRPC, + POSTGRESQL, + PROMETHEUS, + CUSTOM, + INTERSERVER_HTTP, + INTERSERVER_HTTPS, + QUERIES_ALL, + QUERIES_DEFAULT, + QUERIES_CUSTOM, + END + }; + + ServerType() = default; + explicit ServerType(Type type_, const std::string & custom_name_ = "") : type(type_), custom_name(custom_name_) {} + + static const char * serverTypeToString(Type type); + + /// Checks whether provided in the arguments type should be started or stopped based on current server type. + bool shouldStart(Type server_type, const std::string & server_custom_name = "") const; + bool shouldStop(const std::string & port_name) const; + + Type type; + std::string custom_name; +}; + +} diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 36566832ebc..ac3928b4abe 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -184,14 +184,17 @@ void TCPHandler::runImpl() try { receiveHello(); + + /// In interserver mode queries are executed without a session context. + if (!is_interserver_mode) + session->makeSessionContext(); + sendHello(); if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) receiveAddendum(); - if (!is_interserver_mode) /// In interserver mode queries are executed without a session context. + if (!is_interserver_mode) { - session->makeSessionContext(); - /// If session created, then settings in session context has been updated. /// So it's better to update the connection settings for flexibility. extractConnectionSettingsFromContext(session->sessionContext()); @@ -260,6 +263,17 @@ void TCPHandler::runImpl() std::unique_ptr exception; bool network_error = false; bool query_duration_already_logged = false; + auto log_query_duration = [this, &query_duration_already_logged]() + { + if (query_duration_already_logged) + return; + query_duration_already_logged = true; + auto elapsed_sec = state.watch.elapsedSeconds(); + /// We already logged more detailed info if we read some rows + if (elapsed_sec < 1.0 && state.progress.read_rows) + return; + LOG_DEBUG(log, "Processed in {} sec.", elapsed_sec); + }; try { @@ -489,9 +503,7 @@ void TCPHandler::runImpl() /// Do it before sending end of stream, to have a chance to show log message in client. query_scope->logPeakMemoryUsage(); - - LOG_DEBUG(log, "Processed in {} sec.", state.watch.elapsedSeconds()); - query_duration_already_logged = true; + log_query_duration(); if (state.is_connection_closed) break; @@ -613,10 +625,7 @@ void TCPHandler::runImpl() LOG_WARNING(log, "Can't skip data packets after query failure."); } - if (!query_duration_already_logged) - { - LOG_DEBUG(log, "Processed in {} sec.", state.watch.elapsedSeconds()); - } + log_query_duration(); /// QueryState should be cleared before QueryScope, since otherwise /// the MemoryTracker will be wrong for possible deallocations. @@ -1177,21 +1186,11 @@ std::unique_ptr TCPHandler::makeSession() auto res = std::make_unique(server.context(), interface, socket().secure(), certificate); - auto & client_info = res->getClientInfo(); - client_info.forwarded_for = forwarded_for; - client_info.client_name = client_name; - client_info.client_version_major = client_version_major; - client_info.client_version_minor = client_version_minor; - client_info.client_version_patch = client_version_patch; - client_info.client_tcp_protocol_version = client_tcp_protocol_version; - - client_info.connection_client_version_major = client_version_major; - client_info.connection_client_version_minor = client_version_minor; - client_info.connection_client_version_patch = client_version_patch; - client_info.connection_tcp_protocol_version = client_tcp_protocol_version; - - client_info.quota_key = quota_key; - client_info.interface = interface; + res->setForwardedFor(forwarded_for); + res->setClientName(client_name); + res->setClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); + res->setConnectionClientVersion(client_version_major, client_version_minor, client_version_patch, client_tcp_protocol_version); + res->setClientInterface(interface); return res; } @@ -1253,7 +1252,7 @@ void TCPHandler::receiveHello() } session = makeSession(); - auto & client_info = session->getClientInfo(); + const auto & client_info = session->getClientInfo(); #if USE_SSL /// Authentication with SSL user certificate @@ -1283,11 +1282,10 @@ void TCPHandler::receiveHello() void TCPHandler::receiveAddendum() { if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_QUOTA_KEY) - { readStringBinary(quota_key, *in); - if (!is_interserver_mode) - session->getClientInfo().quota_key = quota_key; - } + + if (!is_interserver_mode) + session->setQuotaClientKey(quota_key); } @@ -1311,16 +1309,16 @@ void TCPHandler::receiveUnexpectedHello() void TCPHandler::sendHello() { writeVarUInt(Protocol::Server::Hello, *out); - writeStringBinary(DBMS_NAME, *out); - writeVarUInt(DBMS_VERSION_MAJOR, *out); - writeVarUInt(DBMS_VERSION_MINOR, *out); + writeStringBinary(VERSION_NAME, *out); + writeVarUInt(VERSION_MAJOR, *out); + writeVarUInt(VERSION_MINOR, *out); writeVarUInt(DBMS_TCP_PROTOCOL_VERSION, *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE) writeStringBinary(DateLUT::instance().getTimeZone(), *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME) writeStringBinary(server_display_name, *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) - writeVarUInt(DBMS_VERSION_PATCH, *out); + writeVarUInt(VERSION_PATCH, *out); if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES) { auto rules = server.context()->getAccessControl().getPasswordComplexityRules(); @@ -1600,12 +1598,12 @@ void TCPHandler::receiveQuery() if (query_kind == ClientInfo::QueryKind::INITIAL_QUERY) { /// Throw an exception if the passed settings violate the constraints. - query_context->checkSettingsConstraints(settings_changes); + query_context->checkSettingsConstraints(settings_changes, SettingSource::QUERY); } else { /// Quietly clamp to the constraints if it's not an initial query. - query_context->clampToSettingsConstraints(settings_changes); + query_context->clampToSettingsConstraints(settings_changes, SettingSource::QUERY); } query_context->applySettingsChanges(settings_changes); diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index 3997e0f19b6..6fa1d65de42 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -6,10 +6,18 @@ #include #include -#include #include +#include + +#include "config.h" + +/// Embedded HTML pages +INCBIN(resource_play_html, SOURCE_DIR "/programs/server/play.html"); +INCBIN(resource_dashboard_html, SOURCE_DIR "/programs/server/dashboard.html"); +INCBIN(resource_uplot_js, SOURCE_DIR "/programs/server/js/uplot.js"); + namespace DB { @@ -34,13 +42,13 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR if (request.getURI().starts_with("/play")) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - *response.send() << getResource("play.html"); + *response.send() << std::string_view(reinterpret_cast(gresource_play_htmlData), gresource_play_htmlSize); } else if (request.getURI().starts_with("/dashboard")) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - std::string html(getResource("dashboard.html")); + std::string html(reinterpret_cast(gresource_dashboard_htmlData), gresource_dashboard_htmlSize); /// Replace a link to external JavaScript file to embedded file. /// This allows to open the HTML without running a server and to host it on server. @@ -55,7 +63,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR else if (request.getURI() == "/js/uplot.js") { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - *response.send() << getResource("js/uplot.js"); + *response.send() << std::string_view(reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize); } else { diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index a9247f9b898..96dcd74f947 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1077,9 +1077,8 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { if (!command.if_exists) { - String exception_message = fmt::format("Wrong column. Cannot find column {} to modify", backQuote(column_name)); - all_columns.appendHintsMessage(exception_message, column_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Wrong column. Cannot find column {} to modify{}", + backQuote(column_name), all_columns.getHintsMessage(column_name)); } else continue; diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 045afd7e6e6..0c918bda5fd 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -232,9 +232,7 @@ void ColumnsDescription::remove(const String & column_name) auto range = getNameRange(columns, column_name); if (range.first == range.second) { - String exception_message = fmt::format("There is no column {} in table", column_name); - appendHintsMessage(exception_message, column_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table{}", column_name, getHintsMessage(column_name)); } for (auto list_it = range.first; list_it != range.second;) @@ -249,9 +247,8 @@ void ColumnsDescription::rename(const String & column_from, const String & colum auto it = columns.get<1>().find(column_from); if (it == columns.get<1>().end()) { - String exception_message = fmt::format("Cannot find column {} in ColumnsDescription", column_from); - appendHintsMessage(exception_message, column_from); - throw Exception::createDeprecated(exception_message, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find column {} in ColumnsDescription{}", + column_from, getHintsMessage(column_from)); } columns.get<1>().modify_key(it, [&column_to] (String & old_name) diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index 365a999673e..fb1eeed3127 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -160,9 +160,8 @@ public: auto it = columns.get<1>().find(column_name); if (it == columns.get<1>().end()) { - String exception_message = fmt::format("Cannot find column {} in ColumnsDescription", column_name); - appendHintsMessage(exception_message, column_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find column {} in ColumnsDescription{}", + column_name, getHintsMessage(column_name)); } removeSubcolumns(it->name); diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp index db37ac7c4c3..249ed8be428 100644 --- a/src/Storages/ConstraintsDescription.cpp +++ b/src/Storages/ConstraintsDescription.cpp @@ -35,7 +35,7 @@ String ConstraintsDescription::toString() const for (const auto & constraint : constraints) list.children.push_back(constraint); - return serializeAST(list, true); + return serializeAST(list); } ConstraintsDescription ConstraintsDescription::parse(const String & str) diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index 309aa54909a..1172a40627d 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -281,7 +281,6 @@ struct DeltaLakeMetadataParser::Impl ArrowColumnToCHColumn column_reader( header, "Parquet", - format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns, /* null_as_default */true, /* case_insensitive_column_matching */false); diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/DataLakes/HudiMetadataParser.cpp index a1f35a5ae42..78d69c83989 100644 --- a/src/Storages/DataLakes/HudiMetadataParser.cpp +++ b/src/Storages/DataLakes/HudiMetadataParser.cpp @@ -67,7 +67,8 @@ struct HudiMetadataParser::Impl { auto key_file = std::filesystem::path(key); Strings file_parts; - splitInto<'_'>(file_parts, key_file.stem()); + const String stem = key_file.stem(); + splitInto<'_'>(file_parts, stem); if (file_parts.size() != 3) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format for file: {}", key); diff --git a/src/Storages/DataLakes/S3MetadataReader.cpp b/src/Storages/DataLakes/S3MetadataReader.cpp index f62c440bc2f..ac472c190e4 100644 --- a/src/Storages/DataLakes/S3MetadataReader.cpp +++ b/src/Storages/DataLakes/S3MetadataReader.cpp @@ -57,8 +57,8 @@ std::vector S3DataLakeMetadataReadHelper::listFiles( { outcome = client->ListObjectsV2(request); if (!outcome.IsSuccess()) - throw Exception( - ErrorCodes::S3_ERROR, + throw S3Exception( + outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with key {}, S3 exception: {}, message: {}", quoteString(bucket), quoteString(base_configuration.url.key), diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index d8d9a0c9d1e..51839ad973a 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -419,7 +419,7 @@ catch (...) throw; } -void DistributedAsyncInsertDirectoryQueue::processFile(const std::string & file_path) +void DistributedAsyncInsertDirectoryQueue::processFile(std::string & file_path) { OpenTelemetry::TracingContextHolderPtr thread_trace_context; @@ -459,7 +459,7 @@ void DistributedAsyncInsertDirectoryQueue::processFile(const std::string & file_ if (isDistributedSendBroken(e.code(), e.isRemoteException())) { markAsBroken(file_path); - current_file.clear(); + file_path.clear(); } throw; } @@ -473,8 +473,8 @@ void DistributedAsyncInsertDirectoryQueue::processFile(const std::string & file_ auto dir_sync_guard = getDirectorySyncGuard(relative_path); markAsSend(file_path); - current_file.clear(); LOG_TRACE(log, "Finished processing `{}` (took {} ms)", file_path, watch.elapsedMilliseconds()); + file_path.clear(); } struct DistributedAsyncInsertDirectoryQueue::BatchHeader diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h index 9a8a235e265..45c355bb64e 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h @@ -100,7 +100,7 @@ private: void addFile(const std::string & file_path); void initializeFilesFromDisk(); void processFiles(); - void processFile(const std::string & file_path); + void processFile(std::string & file_path); void processFilesWithBatching(); void markAsBroken(const std::string & file_path); diff --git a/src/Storages/Distributed/DistributedSettings.h b/src/Storages/Distributed/DistributedSettings.h index 86b77707dee..85e64cabfd7 100644 --- a/src/Storages/Distributed/DistributedSettings.h +++ b/src/Storages/Distributed/DistributedSettings.h @@ -26,6 +26,7 @@ class ASTStorage; M(UInt64, monitor_split_batch_on_failure, 0, "Default - distributed_directory_monitor_split_batch_on_failure", 0) \ M(Milliseconds, monitor_sleep_time_ms, 0, "Default - distributed_directory_monitor_sleep_time_ms", 0) \ M(Milliseconds, monitor_max_sleep_time_ms, 0, "Default - distributed_directory_monitor_max_sleep_time_ms", 0) \ + M(Bool, flush_on_detach, true, "Flush data to remote nodes on DETACH/DROP/server shutdown", 0) \ DECLARE_SETTINGS_TRAITS(DistributedSettingsTraits, LIST_OF_DISTRIBUTED_SETTINGS) diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 875764f7633..0dcdae01ba9 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -132,7 +132,7 @@ DistributedSink::DistributedSink( const auto & settings = context->getSettingsRef(); if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); - context->getClientInfo().distributed_depth += 1; + context->increaseDistributedDepth(); random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key; } diff --git a/src/Storages/HDFS/HDFSCommon.h b/src/Storages/HDFS/HDFSCommon.h index 4588480602a..23f9e4d8f12 100644 --- a/src/Storages/HDFS/HDFSCommon.h +++ b/src/Storages/HDFS/HDFSCommon.h @@ -57,7 +57,23 @@ public: ~HDFSBuilderWrapper() { hdfsFreeBuilder(hdfs_builder); } HDFSBuilderWrapper(const HDFSBuilderWrapper &) = delete; - HDFSBuilderWrapper(HDFSBuilderWrapper &&) = default; + HDFSBuilderWrapper & operator=(const HDFSBuilderWrapper &) = delete; + + HDFSBuilderWrapper(HDFSBuilderWrapper && other) noexcept + { + *this = std::move(other); + } + + HDFSBuilderWrapper & operator=(HDFSBuilderWrapper && other) noexcept + { + std::swap(hdfs_builder, other.hdfs_builder); + config_stor = std::move(other.config_stor); + hadoop_kerberos_keytab = std::move(other.hadoop_kerberos_keytab); + hadoop_kerberos_principal = std::move(other.hadoop_kerberos_principal); + hadoop_security_kerberos_ticket_cache_path = std::move(other.hadoop_security_kerberos_ticket_cache_path); + need_kinit = std::move(other.need_kinit); + return *this; + } hdfsBuilder * get() { return hdfs_builder; } diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index ee8e0764db0..9455afe7833 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -3,6 +3,7 @@ #if USE_HDFS #include #include +#include #include #include #include @@ -41,6 +42,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory file_size_) : BufferWithOwnMemory(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size) , hdfs_uri(hdfs_uri_) , hdfs_file_path(hdfs_file_path_) @@ -63,6 +66,22 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory(file_info->mSize); + hdfsFreeFileInfo(file_info, 1); + } } ~ReadBufferFromHDFSImpl() override @@ -72,10 +91,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemorymSize; + return file_size; } bool nextImpl() override @@ -89,12 +105,16 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory {})", file_offset, read_until_position - 1); - num_bytes_to_read = read_until_position - file_offset; + num_bytes_to_read = std::min(read_until_position - file_offset, internal_buffer.size()); } else { num_bytes_to_read = internal_buffer.size(); } + if (file_size != 0 && file_offset >= file_size) + { + return false; + } ResourceGuard rlock(read_settings.resource_link, num_bytes_to_read); int bytes_read; @@ -156,10 +176,11 @@ ReadBufferFromHDFS::ReadBufferFromHDFS( const Poco::Util::AbstractConfiguration & config_, const ReadSettings & read_settings_, size_t read_until_position_, - bool use_external_buffer_) + bool use_external_buffer_, + std::optional file_size_) : ReadBufferFromFileBase(read_settings_.remote_fs_buffer_size, nullptr, 0) , impl(std::make_unique( - hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_, use_external_buffer_)) + hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_, use_external_buffer_, file_size_)) , use_external_buffer(use_external_buffer_) { } diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/HDFS/ReadBufferFromHDFS.h index 6aed3ddff26..64adbc62789 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.h +++ b/src/Storages/HDFS/ReadBufferFromHDFS.h @@ -29,7 +29,8 @@ public: const Poco::Util::AbstractConfiguration & config_, const ReadSettings & read_settings_, size_t read_until_position_ = 0, - bool use_external_buffer = false); + bool use_external_buffer = false, + std::optional file_size = std::nullopt); ~ReadBufferFromHDFS() override; diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index a41c65cdb2e..aa99917d533 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,6 @@ #include #include #include -#include #include #include @@ -64,23 +64,131 @@ namespace ErrorCodes } namespace { + /// Forward-declared to use in LSWithFoldedRegexpMatching w/o circular dependency. + std::vector LSWithRegexpMatching(const String & path_for_ls, + const HDFSFSPtr & fs, + const String & for_match); + + /* + * When `{...}` has any `/`s, it must be processed in a different way: + * Basically, a path with globs is processed by LSWithRegexpMatching. In case it detects multi-dir glob {.../..., .../...}, + * LSWithFoldedRegexpMatching is in charge from now on. + * It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob. + * Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob. + * StorageFile.cpp has the same logic. + */ + std::vector LSWithFoldedRegexpMatching(const String & path_for_ls, + const HDFSFSPtr & fs, + const String & processed_suffix, + const String & suffix_with_globs, + re2::RE2 & matcher, + const size_t max_depth, + const size_t next_slash_after_glob_pos) + { + /// We don't need to go all the way in every directory if max_depth is reached + /// as it is upper limit of depth by simply counting `/`s in curly braces + if (!max_depth) + return {}; + + HDFSFileInfo ls; + ls.file_info = hdfsListDirectory(fs.get(), path_for_ls.data(), &ls.length); + if (ls.file_info == nullptr && errno != ENOENT) // NOLINT + { + // ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno. + throw Exception( + ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", path_for_ls, String(hdfsGetLastError())); + } + + std::vector result; + + if (!ls.file_info && ls.length > 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null"); + + for (int i = 0; i < ls.length; ++i) + { + const String full_path = String(ls.file_info[i].mName); + const size_t last_slash = full_path.rfind('/'); + const String dir_or_file_name = full_path.substr(last_slash); + const bool is_directory = ls.file_info[i].mKind == 'D'; + + if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher)) + { + if (next_slash_after_glob_pos == std::string::npos) + { + result.emplace_back(StorageHDFS::PathWithInfo{ + String(ls.file_info[i].mName), + StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); + } + else + { + std::vector result_part = LSWithRegexpMatching( + fs::path(full_path) / "" , fs, suffix_with_globs.substr(next_slash_after_glob_pos)); + std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); + } + } + else if (is_directory) + { + std::vector result_part = LSWithFoldedRegexpMatching( + fs::path(full_path), fs, processed_suffix + dir_or_file_name, + suffix_with_globs, matcher, max_depth - 1, next_slash_after_glob_pos); + std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); + } + } + return result; + } + /* Recursive directory listing with matched paths as a result. * Have the same method in StorageFile. */ - std::vector LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match) + std::vector LSWithRegexpMatching( + const String & path_for_ls, + const HDFSFSPtr & fs, + const String & for_match) { - const size_t first_glob = for_match.find_first_of("*?{"); + const size_t first_glob_pos = for_match.find_first_of("*?{"); + const bool has_glob = first_glob_pos != std::string::npos; - const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/'); + const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); const String suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/' - const size_t next_slash = suffix_with_globs.find('/', 1); - re2::RE2 matcher(makeRegexpPatternFromGlobs(suffix_with_globs.substr(0, next_slash))); + size_t slashes_in_glob = 0; + const size_t next_slash_after_glob_pos = [&]() + { + if (!has_glob) + return suffix_with_globs.find('/', 1); + + size_t in_curly = 0; + for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++) + { + if (*it == '{') + ++in_curly; + else if (*it == '/') + { + if (in_curly) + ++slashes_in_glob; + else + return size_t(std::distance(suffix_with_globs.begin(), it)); + } + else if (*it == '}') + --in_curly; + } + return std::string::npos; + }(); + + const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); + + re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob)); if (!matcher.ok()) throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", for_match, matcher.error()); + if (slashes_in_glob) + { + return LSWithFoldedRegexpMatching(fs::path(prefix_without_globs), fs, "", suffix_with_globs, + matcher, slashes_in_glob, next_slash_after_glob_pos); + } + HDFSFileInfo ls; ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length); if (ls.file_info == nullptr && errno != ENOENT) // NOLINT @@ -97,7 +205,7 @@ namespace const String full_path = String(ls.file_info[i].mName); const size_t last_slash = full_path.rfind('/'); const String file_name = full_path.substr(last_slash); - const bool looking_for_directory = next_slash != std::string::npos; + const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; const bool is_directory = ls.file_info[i].mKind == 'D'; /// Condition with type of current file_info means what kind of path is it in current iteration of ls if (!is_directory && !looking_for_directory) @@ -111,7 +219,7 @@ namespace { if (re2::RE2::FullMatch(file_name, matcher)) { - std::vector result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash)); + std::vector result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash_after_glob_pos)); /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); } @@ -259,8 +367,13 @@ public: { const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); uris = getPathsList(path_from_uri, uri_without_path, context_); + auto file_progress_callback = context_->getFileProgressCallback(); for (auto & elem : uris) + { elem.path = uri_without_path + elem.path; + if (file_progress_callback && elem.info) + file_progress_callback(FileProgress(0, elem.info->size)); + } uris_iter = uris.begin(); } @@ -281,37 +394,54 @@ private: std::vector::iterator uris_iter; }; -class HDFSSource::URISIterator::Impl +class HDFSSource::URISIterator::Impl : WithContext { public: - explicit Impl(const std::vector & uris_, ContextPtr context) + explicit Impl(const std::vector & uris_, ContextPtr context_) + : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback()) { - auto path_and_uri = getPathFromUriAndUriWithoutPath(uris_[0]); - HDFSBuilderWrapper builder = createHDFSBuilder(path_and_uri.second + "/", context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - for (const auto & uri : uris_) + if (!uris.empty()) { - path_and_uri = getPathFromUriAndUriWithoutPath(uri); - if (!hdfsExists(fs.get(), path_and_uri.first.c_str())) - uris.push_back(uri); + auto path_and_uri = getPathFromUriAndUriWithoutPath(uris[0]); + builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef()); + fs = createHDFSFS(builder.get()); } - uris_iter = uris.begin(); } StorageHDFS::PathWithInfo next() { - std::lock_guard lock(mutex); - if (uris_iter == uris.end()) - return {"", {}}; - auto key = *uris_iter; - ++uris_iter; - return {key, {}}; + String uri; + hdfsFileInfo * hdfs_info; + do + { + size_t current_index = index.fetch_add(1); + if (current_index >= uris.size()) + return {"", {}}; + + uri = uris[current_index]; + auto path_and_uri = getPathFromUriAndUriWithoutPath(uri); + hdfs_info = hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str()); + } + /// Skip non-existed files. + while (!hdfs_info && String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos); + + std::optional info; + if (hdfs_info) + { + info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; + if (file_progress_callback) + file_progress_callback(FileProgress(0, hdfs_info->mSize)); + } + + return {uri, info}; } private: - std::mutex mutex; + std::atomic_size_t index = 0; Strings uris; - Strings::iterator uris_iter; + HDFSBuilderWrapper builder; + HDFSFSPtr fs; + std::function file_progress_callback; }; HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(ContextPtr context_, const String & uri) @@ -332,30 +462,21 @@ StorageHDFS::PathWithInfo HDFSSource::URISIterator::next() return pimpl->next(); } -Block HDFSSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - HDFSSource::HDFSSource( + const ReadFromFormatInfo & info, StorageHDFSPtr storage_, - const Block & block_for_format_, - const std::vector & requested_virtual_columns_, ContextPtr context_, UInt64 max_block_size_, - std::shared_ptr file_iterator_, - ColumnsDescription columns_description_) - : ISource(getHeader(block_for_format_, requested_virtual_columns_)) + std::shared_ptr file_iterator_) + : ISource(info.source_header, false) , WithContext(context_) , storage(std::move(storage_)) - , block_for_format(block_for_format_) - , requested_virtual_columns(requested_virtual_columns_) + , block_for_format(info.format_header) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) , max_block_size(max_block_size_) , file_iterator(file_iterator_) - , columns_description(std::move(columns_description_)) + , columns_description(info.columns_description) { initialize(); } @@ -374,13 +495,17 @@ bool HDFSSource::initialize() continue; current_path = path_with_info.path; + std::optional file_size; + if (path_with_info.info) + file_size = path_with_info.info->size; const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path); auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); auto impl = std::make_unique( - uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); + uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size); if (!skip_empty_files || !impl->eof()) { + impl->setProgressCallback(getContext()); const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); break; @@ -389,14 +514,6 @@ bool HDFSSource::initialize() current_path = path_with_info.path; - if (path_with_info.info && path_with_info.info->size) - { - /// Adjust total_rows_approx_accumulated with new total size. - if (total_files_size) - total_rows_approx_accumulated = static_cast(std::ceil(static_cast(total_files_size + path_with_info.info->size) / total_files_size * total_rows_approx_accumulated)); - total_files_size += path_with_info.info->size; - } - input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size); QueryPipelineBuilder builder; @@ -408,6 +525,14 @@ bool HDFSSource::initialize() return std::make_shared(header, columns_description, *input_format, getContext()); }); } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); return true; @@ -434,14 +559,8 @@ Chunk HDFSSource::generate() { Columns columns = chunk.getColumns(); UInt64 num_rows = chunk.getNumRows(); - - if (num_rows && total_files_size) - { - size_t chunk_size = input_format->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox(*this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } + size_t chunk_size = input_format->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); for (const auto & virtual_column : requested_virtual_columns) { @@ -602,7 +721,7 @@ private: bool StorageHDFS::supportsSubsetOfColumns() const { - return format_name != "Distributed" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name); } Pipe StorageHDFS::read( @@ -640,50 +759,17 @@ Pipe StorageHDFS::read( }); } - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); Pipes pipes; auto this_ptr = std::static_pointer_cast(shared_from_this()); for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( + read_from_format_info, this_ptr, - block_for_format, - requested_virtual_columns, context_, max_block_size, - iterator_wrapper, - columns_description)); + iterator_wrapper)); } return Pipe::unitePipes(std::move(pipes)); } diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 74801b68f73..626d22ad33f 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -7,6 +7,7 @@ #include #include #include +#include #include namespace DB @@ -29,6 +30,8 @@ public: struct PathWithInfo { + PathWithInfo() = default; + PathWithInfo(const String & path_, const std::optional & info_) : path(path_), info(info_) {} String path; std::optional info; }; @@ -74,6 +77,8 @@ public: /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. bool supportsSubsetOfColumns() const override; + bool supportsSubcolumns() const override { return true; } + static ColumnsDescription getTableStructureFromData( const String & format, const String & uri, @@ -140,16 +145,12 @@ public: using IteratorWrapper = std::function; using StorageHDFSPtr = std::shared_ptr; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - HDFSSource( + const ReadFromFormatInfo & info, StorageHDFSPtr storage_, - const Block & block_for_format_, - const std::vector & requested_virtual_columns_, ContextPtr context_, UInt64 max_block_size_, - std::shared_ptr file_iterator_, - ColumnsDescription columns_description_); + std::shared_ptr file_iterator_); String getName() const override; @@ -158,7 +159,8 @@ public: private: StorageHDFSPtr storage; Block block_for_format; - std::vector requested_virtual_columns; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; UInt64 max_block_size; std::shared_ptr file_iterator; ColumnsDescription columns_description; @@ -169,11 +171,6 @@ private: std::unique_ptr reader; String current_path; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; - size_t total_files_size = 0; - /// Recreate ReadBuffer and PullingPipelineExecutor for each file. bool initialize(); }; diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 350051ab089..1dbf2f3c4e2 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -37,6 +37,8 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + bool supportsSubcolumns() const override { return true; } + private: void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 8cf708acd8b..ae7659e074f 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -71,15 +71,12 @@ TableLockHolder IStorage::tryLockForShare(const String & query_id, const std::ch return result; } -IStorage::AlterLockHolder IStorage::lockForAlter(const std::chrono::milliseconds & acquire_timeout) +std::optional IStorage::tryLockForAlter(const std::chrono::milliseconds & acquire_timeout) { AlterLockHolder lock{alter_lock, std::defer_lock}; if (!lock.try_lock_for(acquire_timeout)) - throw Exception(ErrorCodes::DEADLOCK_AVOIDED, - "Locking attempt for ALTER on \"{}\" has timed out! ({} ms) " - "Possible deadlock avoided. Client should retry.", - getStorageID().getFullTableName(), acquire_timeout.count()); + return {}; if (is_dropped || is_detached) throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {} is dropped or detached", getStorageID()); @@ -87,6 +84,18 @@ IStorage::AlterLockHolder IStorage::lockForAlter(const std::chrono::milliseconds return lock; } +IStorage::AlterLockHolder IStorage::lockForAlter(const std::chrono::milliseconds & acquire_timeout) +{ + + if (auto lock = tryLockForAlter(acquire_timeout); lock == std::nullopt) + throw Exception(ErrorCodes::DEADLOCK_AVOIDED, + "Locking attempt for ALTER on \"{}\" has timed out! ({} ms) " + "Possible deadlock avoided. Client should retry.", + getStorageID().getFullTableName(), acquire_timeout.count()); + else + return std::move(*lock); +} + TableExclusiveLockHolder IStorage::lockExclusively(const String & query_id, const std::chrono::milliseconds & acquire_timeout) { diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index b262d88db57..ec92f57aeda 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -254,6 +254,9 @@ public: /// because those are internally translated into 'ALTER UDPATE' mutations. virtual bool supportsDelete() const { return false; } + /// Return true if the trivial count query could be optimized without reading the data at all. + virtual bool supportsTrivialCountOptimization() const { return false; } + private: StorageID storage_id; @@ -283,6 +286,7 @@ public: /// sure, that we execute only one simultaneous alter. Doesn't affect share lock. using AlterLockHolder = std::unique_lock; AlterLockHolder lockForAlter(const std::chrono::milliseconds & acquire_timeout); + std::optional tryLockForAlter(const std::chrono::milliseconds & acquire_timeout); /// Lock table exclusively. This lock must be acquired if you want to be /// sure, that no other thread (SELECT, merge, ALTER, etc.) doing something @@ -549,15 +553,15 @@ public: /** * If the storage requires some complicated work on destroying, * then you have two virtual methods: - * - flush() + * - flushAndPrepareForShutdown() * - shutdown() * * @see shutdown() - * @see flush() + * @see flushAndPrepareForShutdown() */ void flushAndShutdown() { - flush(); + flushAndPrepareForShutdown(); shutdown(); } @@ -570,7 +574,7 @@ public: /// Called before shutdown() to flush data to underlying storage /// Data in memory need to be persistent - virtual void flush() {} + virtual void flushAndPrepareForShutdown() {} /// Asks table to stop executing some action identified by action_type /// If table does not support such type of lock, and empty lock is returned diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index a93ac248c98..06518a52c61 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -11,6 +11,7 @@ #include #include +#include "Common/Exception.h" namespace DB @@ -89,8 +90,16 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast result.type = Poco::toLower(index_definition->type->name); result.granularity = index_definition->granularity; - ASTPtr expr_list = extractKeyExpressionList(index_definition->expr->clone()); - result.expression_list_ast = expr_list->clone(); + ASTPtr expr_list; + if (index_definition->expr) + { + expr_list = extractKeyExpressionList(index_definition->expr->clone()); + result.expression_list_ast = expr_list->clone(); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expression is not set"); + } auto syntax = TreeRewriter(context).analyze(expr_list, columns.getAllPhysical()); result.expression = ExpressionAnalyzer(expr_list, syntax, context).getActions(true); @@ -142,7 +151,7 @@ String IndicesDescription::toString() const for (const auto & index : *this) list.children.push_back(index.definition_ast); - return serializeAST(list, true); + return serializeAST(list); } diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 2aba76c1a3f..a7315eb51ea 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -250,15 +250,16 @@ StorageKafka::StorageKafka( : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , kafka_settings(std::move(kafka_settings_)) - , topics(parseTopics(getContext()->getMacros()->expand(kafka_settings->kafka_topic_list.value))) - , brokers(getContext()->getMacros()->expand(kafka_settings->kafka_broker_list.value)) - , group(getContext()->getMacros()->expand(kafka_settings->kafka_group_name.value)) + , macros_info{.table_id = table_id_} + , topics(parseTopics(getContext()->getMacros()->expand(kafka_settings->kafka_topic_list.value, macros_info))) + , brokers(getContext()->getMacros()->expand(kafka_settings->kafka_broker_list.value, macros_info)) + , group(getContext()->getMacros()->expand(kafka_settings->kafka_group_name.value, macros_info)) , client_id( kafka_settings->kafka_client_id.value.empty() ? getDefaultClientId(table_id_) - : getContext()->getMacros()->expand(kafka_settings->kafka_client_id.value)) + : getContext()->getMacros()->expand(kafka_settings->kafka_client_id.value, macros_info)) , format_name(getContext()->getMacros()->expand(kafka_settings->kafka_format.value)) , max_rows_per_message(kafka_settings->kafka_max_rows_per_message.value) - , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value)) + , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value, macros_info)) , num_consumers(kafka_settings->kafka_num_consumers.value) , log(&Poco::Logger::get("StorageKafka (" + table_id_.table_name + ")")) , semaphore(0, static_cast(num_consumers)) diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 09aa091ef18..72875e77b12 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -79,6 +80,7 @@ public: private: // Configuration and state std::unique_ptr kafka_settings; + Macros::MacroExpansionInfo macros_info; const Names topics; const String brokers; const String group; diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index e163400f2af..792133ced64 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -32,11 +32,8 @@ public: void onFinish() override { - UInt128 key; - String key_str; - - new_hash->get128(key); - key_str = getHexUIntLowercase(key); + const auto key = new_hash->get128(); + const auto key_str = getHexUIntLowercase(key); std::lock_guard lock(storage.mutex); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 173bb128c4a..5719529533e 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -681,7 +681,6 @@ QueryPipelineBuilder StorageLiveView::completeQuery(Pipes pipes) bool StorageLiveView::getNewBlocks(const std::lock_guard & lock) { SipHash hash; - UInt128 key; BlocksPtr new_blocks = std::make_shared(); BlocksMetadataPtr new_blocks_metadata = std::make_shared(); @@ -713,7 +712,7 @@ bool StorageLiveView::getNewBlocks(const std::lock_guard & lock) new_blocks->push_back(block); } - hash.get128(key); + const auto key = hash.get128(); /// Update blocks only if hash keys do not match /// NOTE: hash could be different for the same result diff --git a/src/Storages/MarkCache.h b/src/Storages/MarkCache.h index 0f4af57fc8e..495cebb080f 100644 --- a/src/Storages/MarkCache.h +++ b/src/Storages/MarkCache.h @@ -50,13 +50,9 @@ public: /// Calculate key from path to file and offset. static UInt128 hash(const String & path_to_file) { - UInt128 key; - SipHash hash; hash.update(path_to_file.data(), path_to_file.size() + 1); - hash.get128(key); - - return key; + return hash.get128(); } template diff --git a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp index 5d77fc080a4..aa8b437263a 100644 --- a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp +++ b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp @@ -62,9 +62,10 @@ ColumnsDescription StorageMeiliSearch::getTableStructureFromData(const MeiliSear String convertASTtoStr(ASTPtr ptr) { WriteBufferFromOwnString out; - IAST::FormatSettings settings(out, true); - settings.identifier_quoting_style = IdentifierQuotingStyle::BackticksMySQL; - settings.always_quote_identifiers = IdentifierQuotingStyle::BackticksMySQL != IdentifierQuotingStyle::None; + IAST::FormatSettings settings( + out, /*one_line*/ true, /*hilite*/ false, + /*always_quote_identifiers*/ IdentifierQuotingStyle::BackticksMySQL != IdentifierQuotingStyle::None, + /*identifier_quoting_style*/ IdentifierQuotingStyle::BackticksMySQL); ptr->format(settings); return out.str(); } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index e1921f45eda..618dbd845ae 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -331,6 +331,7 @@ void DataPartStorageOnDiskBase::backup( const NameSet & files_without_checksums, const String & path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, TemporaryFilesOnDisks * temp_dirs) const @@ -350,7 +351,7 @@ void DataPartStorageOnDiskBase::backup( temp_dir_it = temp_dirs->emplace(disk, std::make_shared(disk, "tmp/")).first; temp_dir_owner = temp_dir_it->second; - fs::path temp_dir = temp_dir_owner->getPath(); + fs::path temp_dir = temp_dir_owner->getRelativePath(); temp_part_dir = temp_dir / part_path_in_backup.relative_path(); disk->createDirectories(temp_part_dir); } @@ -382,7 +383,7 @@ void DataPartStorageOnDiskBase::backup( if (files_without_checksums.contains(filepath)) { - backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk, copy_encrypted)); + backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk, read_settings, copy_encrypted)); continue; } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 648bc908f59..6c2987e4971 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -55,6 +55,7 @@ public: const NameSet & files_without_checksums, const String & path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, TemporaryFilesOnDisks * temp_dirs) const override; diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 23bbc1c7f9d..4545b2b98ae 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -203,6 +203,8 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write sendPartFromMemory(part, out, send_projections); else sendPartFromDisk(part, out, client_protocol_version, false, send_projections); + + data.addLastSentPart(part->info); } catch (const NetException &) { @@ -353,8 +355,14 @@ MergeTreeData::DataPartPtr Service::findPart(const String & name) { /// It is important to include Outdated parts here because remote replicas cannot reliably /// determine the local state of the part, so queries for the parts in these states are completely normal. - auto part = data.getPartIfExists( - name, {MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); + MergeTreeData::DataPartPtr part; + + /// Ephemeral zero-copy lock may be lost for PreActive parts + bool zero_copy_enabled = data.getSettings()->allow_remote_fs_zero_copy_replication; + if (zero_copy_enabled) + part = data.getPartIfExists(name, {MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); + else + part = data.getPartIfExists(name, {MergeTreeDataPartState::PreActive, MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); if (part) return part; diff --git a/src/Storages/MergeTree/GinIndexStore.cpp b/src/Storages/MergeTree/GinIndexStore.cpp index aa0c1fccbc3..5b798ecc8a5 100644 --- a/src/Storages/MergeTree/GinIndexStore.cpp +++ b/src/Storages/MergeTree/GinIndexStore.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include @@ -243,6 +242,15 @@ void GinIndexStore::finalize() { if (!current_postings.empty()) writeSegment(); + + if (metadata_file_stream) + metadata_file_stream->finalize(); + + if (dict_file_stream) + dict_file_stream->finalize(); + + if (postings_file_stream) + postings_file_stream->finalize(); } void GinIndexStore::initFileStreams() @@ -319,13 +327,8 @@ void GinIndexStore::writeSegment() current_segment.segment_id = getNextSegmentID(); metadata_file_stream->sync(); - metadata_file_stream->finalize(); - dict_file_stream->sync(); - dict_file_stream->finalize(); - postings_file_stream->sync(); - postings_file_stream->finalize(); } GinIndexStoreDeserializer::GinIndexStoreDeserializer(const GinIndexStorePtr & store_) diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 8dbf5caa168..19af6085547 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -221,6 +221,7 @@ public: const NameSet & files_without_checksums, const String & path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, bool make_temporary_hard_links, BackupEntries & backup_entries, TemporaryFilesOnDisks * temp_dirs) const = 0; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index ec157a01c21..6564fc67767 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1,5 +1,6 @@ #include "IMergeTreeDataPart.h" -#include "Storages/MergeTree/IDataPartStorage.h" +#include +#include #include #include @@ -1673,8 +1674,8 @@ std::pair IMergeTreeDataPart::canRemovePart() const void IMergeTreeDataPart::initializePartMetadataManager() { #if USE_ROCKSDB - if (use_metadata_cache) - metadata_manager = std::make_shared(this, storage.getContext()->getMergeTreeMetadataCache()); + if (auto metadata_cache = storage.getContext()->tryGetMergeTreeMetadataCache(); metadata_cache && use_metadata_cache) + metadata_manager = std::make_shared(this, metadata_cache); else metadata_manager = std::make_shared(this); #else @@ -1779,7 +1780,8 @@ void IMergeTreeDataPart::renameToDetached(const String & prefix) part_is_probably_removed_from_disk = true; } -DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & /*metadata_snapshot*/) const +DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & /*metadata_snapshot*/, + const DiskTransactionPtr & disk_transaction) const { /// Avoid unneeded duplicates of broken parts if we try to detach the same broken part multiple times. /// Otherwise it may pollute detached/ with dirs with _tryN suffix and we will fail to remove broken part after 10 attempts. @@ -1794,7 +1796,8 @@ DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix IDataPartStorage::ClonePartParams params { .copy_instead_of_hardlink = isStoredOnRemoteDiskWithZeroCopySupport() && storage.supportsReplication() && storage_settings->allow_remote_fs_zero_copy_replication, - .make_source_readonly = true + .make_source_readonly = true, + .external_transaction = disk_transaction }; return getDataPartStorage().freeze( storage.relative_data_path, @@ -1816,6 +1819,22 @@ MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & di return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, storage.log); } +UInt64 IMergeTreeDataPart::getIndexSizeFromFile() const +{ + auto metadata_snapshot = storage.getInMemoryMetadataPtr(); + if (parent_part) + metadata_snapshot = metadata_snapshot->projections.get(name).metadata; + const auto & pk = metadata_snapshot->getPrimaryKey(); + if (!pk.column_names.empty()) + { + String file = "primary" + getIndexExtension(false); + if (checksums.files.contains("primary" + getIndexExtension(true))) + file = "primary" + getIndexExtension(true); + return getFileSizeOrZero(file); + } + return 0; +} + void IMergeTreeDataPart::checkConsistencyBase() const { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); @@ -1966,6 +1985,12 @@ IndexSize IMergeTreeDataPart::getSecondaryIndexSize(const String & secondary_ind return ColumnSize{}; } +bool IMergeTreeDataPart::hasSecondaryIndex(const String & index_name) const +{ + auto file_name = INDEX_FILE_PREFIX + index_name; + return checksums.has(file_name + ".idx") || checksums.has(file_name + ".idx2"); +} + void IMergeTreeDataPart::accumulateColumnSizes(ColumnToSize & column_to_size) const { for (const auto & [column_name, size] : columns_sizes) @@ -2035,14 +2060,8 @@ String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const hash.update(token.data(), token.size()); } - union - { - char bytes[16]; - UInt64 words[2]; - } hash_value; - hash.get128(hash_value.bytes); - - return info.partition_id + "_" + toString(hash_value.words[0]) + "_" + toString(hash_value.words[1]); + const auto hash_value = hash.get128(); + return info.partition_id + "_" + toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); } IMergeTreeDataPart::uint128 IMergeTreeDataPart::getActualChecksumByFile(const String & file_name) const diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 2c0cf37b3a5..a8e053a9c7b 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -89,7 +89,7 @@ public: virtual MergeTreeReaderPtr getReader( const NamesAndTypesList & columns_, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -122,6 +122,9 @@ public: /// Otherwise return information about secondary index size on disk. IndexSize getSecondaryIndexSize(const String & secondary_index_name) const; + /// Returns true if there is materialized index with specified name in part. + bool hasSecondaryIndex(const String & index_name) const; + /// Return information about column size on disk for all columns in part ColumnSize getTotalColumnsSize() const { return total_columns_size; } @@ -353,6 +356,7 @@ public: UInt64 getIndexSizeInBytes() const; UInt64 getIndexSizeInAllocatedBytes() const; UInt64 getMarksCount() const; + UInt64 getIndexSizeFromFile() const; UInt64 getBytesOnDisk() const { return bytes_on_disk; } void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; } @@ -367,7 +371,8 @@ public: virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists); /// Makes clone of a part in detached/ directory via hard links - virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const; + virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, + const DiskTransactionPtr & disk_transaction) const; /// Makes full clone of part in specified subdirectory (relative to storage data directory, e.g. "detached") on another disk MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const; @@ -500,7 +505,7 @@ public: mutable std::atomic removal_state = DataPartRemovalState::NOT_ATTEMPTED; - mutable std::atomic last_removal_attemp_time = 0; + mutable std::atomic last_removal_attempt_time = 0; protected: diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index f9b97a6a05d..4bb8c400691 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -24,7 +24,7 @@ namespace ErrorCodes IMergeTreeReader::IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, const MarkRanges & all_mark_ranges_, @@ -35,7 +35,7 @@ IMergeTreeReader::IMergeTreeReader( , uncompressed_cache(uncompressed_cache_) , mark_cache(mark_cache_) , settings(settings_) - , metadata_snapshot(metadata_snapshot_) + , storage_snapshot(storage_snapshot_) , all_mark_ranges(all_mark_ranges_) , alter_conversions(data_part_info_for_read->getAlterConversions()) /// For wide parts convert plain arrays of Nested to subcolumns @@ -71,7 +71,7 @@ void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_e res_columns, num_rows, Nested::convertToSubcolumns(requested_columns), Nested::convertToSubcolumns(available_columns), - partially_read_columns, metadata_snapshot); + partially_read_columns, storage_snapshot->metadata); should_evaluate_missing_defaults = std::any_of( res_columns.begin(), res_columns.end(), [](const auto & column) { return column == nullptr; }); @@ -110,7 +110,10 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns } auto dag = DB::evaluateMissingDefaults( - additional_columns, requested_columns, metadata_snapshot->getColumns(), data_part_info_for_read->getContext()); + additional_columns, requested_columns, + storage_snapshot->metadata->getColumns(), + data_part_info_for_read->getContext()); + if (dag) { dag->addMaterializingOutputActions(); @@ -216,7 +219,7 @@ void IMergeTreeReader::performRequiredConversions(Columns & res_columns) const } } -IMergeTreeReader::ColumnPositionLevel IMergeTreeReader::findColumnForOffsets(const NameAndTypePair & required_column) const +IMergeTreeReader::ColumnNameLevel IMergeTreeReader::findColumnForOffsets(const NameAndTypePair & required_column) const { auto get_offsets_streams = [](const auto & serialization, const auto & name_in_storage) { @@ -238,11 +241,11 @@ IMergeTreeReader::ColumnPositionLevel IMergeTreeReader::findColumnForOffsets(con auto required_offsets_streams = get_offsets_streams(getSerializationInPart(required_column), required_name_in_storage); size_t max_matched_streams = 0; - ColumnPositionLevel position_level; + ColumnNameLevel name_level; /// Find column that has maximal number of matching /// offsets columns with required_column. - for (const auto & part_column : data_part_info_for_read->getColumns()) + for (const auto & part_column : Nested::convertToSubcolumns(data_part_info_for_read->getColumns())) { auto name_in_storage = Nested::extractTableName(part_column.name); if (name_in_storage != required_name_in_storage) @@ -261,14 +264,14 @@ IMergeTreeReader::ColumnPositionLevel IMergeTreeReader::findColumnForOffsets(con it = current_it; } - if (i && (!position_level || i > max_matched_streams)) + if (i && (!name_level || i > max_matched_streams)) { max_matched_streams = i; - position_level.emplace(*data_part_info_for_read->getColumnPosition(part_column.name), it->second); + name_level.emplace(part_column.name, it->second); } } - return position_level; + return name_level; } void IMergeTreeReader::checkNumberOfColumns(size_t num_columns_to_read) const diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index fcab35fb4c2..dafe7b2d4ba 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -24,7 +24,7 @@ public: IMergeTreeReader( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, const NamesAndTypesList & columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, const MarkRanges & all_mark_ranges_, @@ -92,22 +92,23 @@ protected: MergeTreeReaderSettings settings; - StorageMetadataPtr metadata_snapshot; + StorageSnapshotPtr storage_snapshot; MarkRanges all_mark_ranges; /// Position and level (of nesting). - using ColumnPositionLevel = std::optional>; + using ColumnNameLevel = std::optional>; + /// In case of part of the nested column does not exists, offsets should be /// read, but only the offsets for the current column, that is why it /// returns pair of size_t, not just one. - ColumnPositionLevel findColumnForOffsets(const NameAndTypePair & column) const; + ColumnNameLevel findColumnForOffsets(const NameAndTypePair & column) const; NameSet partially_read_columns; -private: /// Alter conversions, which must be applied on fly if required AlterConversionsPtr alter_conversions; +private: /// Columns that are requested to read. NamesAndTypesList requested_columns; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index f909d854cf6..3f02a6b197e 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -564,7 +564,17 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown( } case (ActionsDAG::ActionType::COLUMN): { - res = &inverted_dag.addColumn({node.column, node.result_type, node.result_name}); + String name; + if (const auto * column_const = typeid_cast(node.column.get())) + /// Re-generate column name for constant. + /// DAG form query (with enabled analyzer) uses suffixes for constants, like 1_UInt8. + /// DAG from PK does not use it. This is breakig match by column name sometimes. + /// Ideally, we should not compare manes, but DAG subtrees instead. + name = ASTLiteral(column_const->getDataColumn()[0]).getColumnName(); + else + name = node.result_name; + + res = &inverted_dag.addColumn({node.column, node.result_type, name}); break; } case (ActionsDAG::ActionType::ALIAS): diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index f694ecab8e3..2e48892563b 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -36,7 +36,7 @@ void checkNoOldLeaders(Poco::Logger * log, ZooKeeper & zookeeper, const String p if (code == Coordination::Error::ZNONODE) return; else if (code != Coordination::Error::ZOK) - throw KeeperException(code, path); + throw KeeperException::fromPath(code, path); Coordination::Requests ops; diff --git a/src/Storages/MergeTree/MarkRange.cpp b/src/Storages/MergeTree/MarkRange.cpp index c6c197919f4..bd8546f04cc 100644 --- a/src/Storages/MergeTree/MarkRange.cpp +++ b/src/Storages/MergeTree/MarkRange.cpp @@ -83,12 +83,12 @@ size_t MarkRanges::getNumberOfMarks() const void MarkRanges::serialize(WriteBuffer & out) const { - writeIntBinary(this->size(), out); + writeBinaryLittleEndian(this->size(), out); for (const auto & [begin, end] : *this) { - writeIntBinary(begin, out); - writeIntBinary(end, out); + writeBinaryLittleEndian(begin, out); + writeBinaryLittleEndian(end, out); } } @@ -100,13 +100,13 @@ String MarkRanges::describe() const void MarkRanges::deserialize(ReadBuffer & in) { size_t size = 0; - readIntBinary(size, in); + readBinaryLittleEndian(size, in); this->resize(size); for (size_t i = 0; i < size; ++i) { - readIntBinary((*this)[i].begin, in); - readIntBinary((*this)[i].end, in); + readBinaryLittleEndian((*this)[i].begin, in); + readBinaryLittleEndian((*this)[i].end, in); } } diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 9f54c554c85..883cfee89c8 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -230,7 +230,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() /// the fast replica is not overloaded because amount of executing merges doesn't affect the ability to acquire locks for new merges. /// /// So here we trying to solve it with the simplest solution -- sleep random time up to 500ms for 1GB part and up to 7 seconds for 300GB part. - /// It can sound too much, but we are trying to aquite these locks in background tasks which can be scheduled each 5 seconds or so. + /// It can sound too much, but we are trying to acquire these locks in background tasks which can be scheduled each 5 seconds or so. double start_to_sleep_seconds = std::logf(storage_settings_ptr->zero_copy_merge_mutation_min_parts_size_sleep_before_lock.value); uint64_t right_border_to_sleep_ms = static_cast((std::log(estimated_space_for_merge) - start_to_sleep_seconds + 0.5) * 1000); uint64_t time_to_sleep_milliseconds = std::min(10000UL, std::uniform_int_distribution(1, 1 + right_border_to_sleep_ms)(rng)); @@ -245,7 +245,11 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() if (!zero_copy_lock || !zero_copy_lock->isLocked()) { - LOG_DEBUG(log, "Merge of part {} started by some other replica, will wait it and fetch merged part", entry.new_part_name); + LOG_DEBUG( + log, + "Merge of part {} started by some other replica, will wait for it and fetch merged part. Number of tries {}", + entry.new_part_name, + entry.num_tries); storage.watchZeroCopyLock(entry.new_part_name, disk); /// Don't check for missing part -- it's missing because other replica still not /// finished merge. diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index 6eab4337162..e497a799274 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -269,7 +269,7 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) try { ALLOW_ALLOCATIONS_IN_SCOPE; - item->task->getQueryId(); + query_id = item->task->getQueryId(); need_execute_again = item->task->executeStep(); } catch (...) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 48adf36e678..d7836ac01b0 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -145,9 +145,6 @@ bool IMergeTreeSelectAlgorithm::getNewTask() ChunkAndProgress IMergeTreeSelectAlgorithm::read() { - size_t num_read_rows = 0; - size_t num_read_bytes = 0; - while (!is_cancelled) { try @@ -178,10 +175,6 @@ ChunkAndProgress IMergeTreeSelectAlgorithm::read() ordered_columns.push_back(res.block.getByName(name).column); } - /// Account a progress from previous empty chunks. - res.num_read_rows += num_read_rows; - res.num_read_bytes += num_read_bytes; - return ChunkAndProgress{ .chunk = Chunk(ordered_columns, res.row_count), .num_read_rows = res.num_read_rows, @@ -194,11 +187,10 @@ ChunkAndProgress IMergeTreeSelectAlgorithm::read() } } - return {Chunk(), num_read_rows, num_read_bytes, true}; + return {Chunk(), 0, 0, true}; } void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForCurrentTask( - const StorageMetadataPtr & metadata_snapshot, const IMergeTreeReader::ValueSizeMap & value_size_map, const ReadBufferFromFileBase::ProfileCallback & profile_callback) { @@ -213,7 +205,7 @@ void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForCurrentTask( else { reader = task->data_part->getReader( - task->task_columns.columns, metadata_snapshot, task->mark_ranges, + task->task_columns.columns, storage_snapshot, task->mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), task->alter_conversions, reader_settings, value_size_map, profile_callback); } @@ -229,8 +221,8 @@ void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForCurrentTask( { initializeMergeTreePreReadersForPart( task->data_part, task->alter_conversions, - task->task_columns, metadata_snapshot, - task->mark_ranges, value_size_map, profile_callback); + task->task_columns, task->mark_ranges, + value_size_map, profile_callback); } } @@ -238,18 +230,17 @@ void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForPart( const MergeTreeData::DataPartPtr & data_part, const AlterConversionsPtr & alter_conversions, const MergeTreeReadTaskColumns & task_columns, - const StorageMetadataPtr & metadata_snapshot, const MarkRanges & mark_ranges, const IMergeTreeReader::ValueSizeMap & value_size_map, const ReadBufferFromFileBase::ProfileCallback & profile_callback) { reader = data_part->getReader( - task_columns.columns, metadata_snapshot, mark_ranges, + task_columns.columns, storage_snapshot, mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), alter_conversions, reader_settings, value_size_map, profile_callback); initializeMergeTreePreReadersForPart( - data_part, alter_conversions, task_columns, metadata_snapshot, + data_part, alter_conversions, task_columns, mark_ranges, value_size_map, profile_callback); } @@ -257,7 +248,6 @@ void IMergeTreeSelectAlgorithm::initializeMergeTreePreReadersForPart( const MergeTreeData::DataPartPtr & data_part, const AlterConversionsPtr & alter_conversions, const MergeTreeReadTaskColumns & task_columns, - const StorageMetadataPtr & metadata_snapshot, const MarkRanges & mark_ranges, const IMergeTreeReader::ValueSizeMap & value_size_map, const ReadBufferFromFileBase::ProfileCallback & profile_callback) @@ -269,7 +259,7 @@ void IMergeTreeSelectAlgorithm::initializeMergeTreePreReadersForPart( { pre_reader_for_step.push_back( data_part->getReader( - {LightweightDeleteDescription::FILTER_COLUMN}, metadata_snapshot, + {LightweightDeleteDescription::FILTER_COLUMN}, storage_snapshot, mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), alter_conversions, reader_settings, value_size_map, profile_callback)); } @@ -278,7 +268,7 @@ void IMergeTreeSelectAlgorithm::initializeMergeTreePreReadersForPart( { pre_reader_for_step.push_back( data_part->getReader( - pre_columns_per_step, metadata_snapshot, mark_ranges, + pre_columns_per_step, storage_snapshot, mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), alter_conversions, reader_settings, value_size_map, profile_callback)); } diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index 7b6dc50060a..42043c03e85 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -120,7 +120,6 @@ protected: /// Sets up data readers for each step of prewhere and where void initializeMergeTreeReadersForCurrentTask( - const StorageMetadataPtr & metadata_snapshot, const IMergeTreeReader::ValueSizeMap & value_size_map, const ReadBufferFromFileBase::ProfileCallback & profile_callback); @@ -128,7 +127,6 @@ protected: const MergeTreeData::DataPartPtr & data_part, const AlterConversionsPtr & alter_conversions, const MergeTreeReadTaskColumns & task_columns, - const StorageMetadataPtr & metadata_snapshot, const MarkRanges & mark_ranges, const IMergeTreeReader::ValueSizeMap & value_size_map, const ReadBufferFromFileBase::ProfileCallback & profile_callback); @@ -207,7 +205,6 @@ private: const MergeTreeData::DataPartPtr & data_part, const AlterConversionsPtr & alter_conversions, const MergeTreeReadTaskColumns & task_columns, - const StorageMetadataPtr & metadata_snapshot, const MarkRanges & mark_ranges, const IMergeTreeReader::ValueSizeMap & value_size_map, const ReadBufferFromFileBase::ProfileCallback & profile_callback); diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index c7434eab05d..48779aa0df7 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -328,11 +328,24 @@ MergeTreeReadTaskColumns getReadTaskColumns( NameSet columns_from_previous_steps; auto add_step = [&](const PrewhereExprStep & step) { - Names step_column_names = step.actions->getActionsDAG().getRequiredColumnsNames(); + Names step_column_names; - injectRequiredColumns( - data_part_info_for_reader, storage_snapshot, with_subcolumns, step_column_names); + /// Computation results from previous steps might be used in the current step as well. In such a case these + /// computed columns will be present in the current step inputs. They don't need to be read from the disk so + /// exclude them from the list of columns to read. This filtering must be done before injecting required + /// columns to avoid adding unnecessary columns or failing to find required columns that are computation + /// results from previous steps. + /// Example: step1: sin(a)>b, step2: sin(a)>c + for (const auto & name : step.actions->getActionsDAG().getRequiredColumnsNames()) + if (!columns_from_previous_steps.contains(name)) + step_column_names.push_back(name); + if (!step_column_names.empty()) + injectRequiredColumns( + data_part_info_for_reader, storage_snapshot, + with_subcolumns, step_column_names); + + /// More columns could have been added, filter them as well by the list of columns from previous steps. Names columns_to_read_in_step; for (const auto & name : step_column_names) { @@ -343,6 +356,10 @@ MergeTreeReadTaskColumns getReadTaskColumns( columns_from_previous_steps.insert(name); } + /// Add results of the step to the list of already "known" columns so that we don't read or compute them again. + for (const auto & name : step.actions->getActionsDAG().getNames()) + columns_from_previous_steps.insert(name); + result.pre_columns.push_back(storage_snapshot->getColumnsByNames(options, columns_to_read_in_step)); }; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ec8ce3f5e3d..a0dc154f7ef 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -187,8 +187,8 @@ static void checkSuspiciousIndices(const ASTFunction * index_function) std::unordered_set unique_index_expression_hashes; for (const auto & child : index_function->arguments->children) { - IAST::Hash hash = child->getTreeHash(); - UInt64 first_half_of_hash = hash.first; + const IAST::Hash hash = child->getTreeHash(); + const auto & first_half_of_hash = hash.low64; if (!unique_index_expression_hashes.emplace(first_half_of_hash).second) throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -382,7 +382,7 @@ MergeTreeData::MergeTreeData( checkTTLExpressions(metadata_, metadata_); String reason; - if (!canUsePolymorphicParts(*settings, &reason) && !reason.empty()) + if (!canUsePolymorphicParts(*settings, reason) && !reason.empty()) LOG_WARNING(log, "{} Settings 'min_rows_for_wide_part'and 'min_bytes_for_wide_part' will be ignored.", reason); #if !USE_ROCKSDB @@ -465,9 +465,10 @@ void MergeTreeData::checkProperties( const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach, + bool allow_empty_sorting_key, ContextPtr local_context) const { - if (!new_metadata.sorting_key.definition_ast) + if (!new_metadata.sorting_key.definition_ast && !allow_empty_sorting_key) throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty"); KeyDescription new_sorting_key = new_metadata.sorting_key; @@ -580,6 +581,9 @@ void MergeTreeData::checkProperties( if (projections_names.find(projection.name) != projections_names.end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Projection with name {} already exists", backQuote(projection.name)); + /// We cannot alter a projection so far. So here we do not try to find a projection in old metadata. + bool is_aggregate = projection.type == ProjectionDescription::Type::Aggregate; + checkProperties(*projection.metadata, *projection.metadata, attach, is_aggregate, local_context); projections_names.insert(projection.name); } } @@ -593,7 +597,7 @@ void MergeTreeData::setProperties( bool attach, ContextPtr local_context) { - checkProperties(new_metadata, old_metadata, attach, local_context); + checkProperties(new_metadata, old_metadata, attach, false, local_context); setInMemoryMetadata(new_metadata); } @@ -2187,7 +2191,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) const DataPartPtr & part = *it; - part->last_removal_attemp_time.store(time_now, std::memory_order_relaxed); + part->last_removal_attempt_time.store(time_now, std::memory_order_relaxed); /// Do not remove outdated part if it may be visible for some transaction if (!part->version.canBeRemoved()) @@ -2324,7 +2328,7 @@ size_t MergeTreeData::clearOldPartsFromFilesystem(bool force) removePartsFinally(parts_to_remove); /// This is needed to close files to avoid they reside on disk after being deleted. /// NOTE: we can drop files from cache more selectively but this is good enough. - getContext()->dropMMappedFileCache(); + getContext()->clearMMappedFileCache(); return parts_to_remove.size(); } @@ -2615,8 +2619,50 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() if (detached_parts.empty()) return 0; - PartsTemporaryRename renamed_parts(*this, "detached/"); + auto get_last_touched_time = [&](const DetachedPartInfo & part_info) -> time_t + { + auto path = fs::path(relative_data_path) / "detached" / part_info.dir_name; + time_t last_change_time = part_info.disk->getLastChanged(path); + time_t last_modification_time = part_info.disk->getLastModified(path).epochTime(); + return std::max(last_change_time, last_modification_time); + }; + time_t ttl_seconds = getSettings()->merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds; + + size_t unfinished_deleting_parts = 0; + time_t current_time = time(nullptr); + for (const auto & part_info : detached_parts) + { + if (!part_info.dir_name.starts_with("deleting_")) + continue; + + time_t startup_time = current_time - static_cast(Context::getGlobalContextInstance()->getUptimeSeconds()); + time_t last_touch_time = get_last_touched_time(part_info); + + /// Maybe it's being deleted right now (for example, in ALTER DROP DETACHED) + bool had_restart = last_touch_time < startup_time; + bool ttl_expired = last_touch_time + ttl_seconds <= current_time; + if (!had_restart && !ttl_expired) + continue; + + /// We were trying to delete this detached part but did not finish deleting, probably because the server crashed + LOG_INFO(log, "Removing detached part {} that we failed to remove previously", part_info.dir_name); + try + { + removeDetachedPart(part_info.disk, fs::path(relative_data_path) / "detached" / part_info.dir_name / "", part_info.dir_name); + ++unfinished_deleting_parts; + } + catch (...) + { + tryLogCurrentException(log); + } + } + + if (!getSettings()->merge_tree_enable_clear_old_broken_detached) + return unfinished_deleting_parts; + + const auto full_path = fs::path(relative_data_path) / "detached"; + size_t removed_count = 0; for (const auto & part_info : detached_parts) { if (!part_info.valid_name || part_info.prefix.empty()) @@ -2631,31 +2677,24 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() if (!can_be_removed_by_timeout) continue; - time_t current_time = time(nullptr); - ssize_t threshold = current_time - getSettings()->merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds; - auto path = fs::path(relative_data_path) / "detached" / part_info.dir_name; - time_t last_change_time = part_info.disk->getLastChanged(path); - time_t last_modification_time = part_info.disk->getLastModified(path).epochTime(); - time_t last_touch_time = std::max(last_change_time, last_modification_time); + ssize_t threshold = current_time - ttl_seconds; + time_t last_touch_time = get_last_touched_time(part_info); if (last_touch_time == 0 || last_touch_time >= threshold) continue; - renamed_parts.addPart(part_info.dir_name, "deleting_" + part_info.dir_name, part_info.disk); - } + const String & old_name = part_info.dir_name; + String new_name = "deleting_" + part_info.dir_name; + part_info.disk->moveFile(fs::path(full_path) / old_name, fs::path(full_path) / new_name); - LOG_INFO(log, "Will clean up {} detached parts", renamed_parts.old_and_new_names.size()); - - renamed_parts.tryRenameAll(); - - for (auto & [old_name, new_name, disk] : renamed_parts.old_and_new_names) - { - removeDetachedPart(disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); + removeDetachedPart(part_info.disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); LOG_WARNING(log, "Removed broken detached part {} due to a timeout for broken detached parts", old_name); - old_name.clear(); + ++removed_count; } - return renamed_parts.old_and_new_names.size(); + LOG_INFO(log, "Cleaned up {} detached parts", removed_count); + + return removed_count + unfinished_deleting_parts; } size_t MergeTreeData::clearOldWriteAheadLogs() @@ -2795,7 +2834,7 @@ void MergeTreeData::rename(const String & new_table_path, const StorageID & new_ } if (!getStorageID().hasUUID()) - getContext()->dropCaches(); + getContext()->clearCaches(); /// TODO: remove const_cast for (const auto & part : data_parts_by_info) @@ -2836,9 +2875,9 @@ void MergeTreeData::dropAllData() } /// Tables in atomic databases have UUID and stored in persistent locations. - /// No need to drop caches (that are keyed by filesystem path) because collision is not possible. + /// No need to clear caches (that are keyed by filesystem path) because collision is not possible. if (!getStorageID().hasUUID()) - getContext()->dropCaches(); + getContext()->clearCaches(); /// Removing of each data part before recursive removal of directory is to speed-up removal, because there will be less number of syscalls. NameSet part_names_failed; @@ -3162,6 +3201,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context } } + if (command.type == AlterCommand::MODIFY_QUERY) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "ALTER MODIFY QUERY is not supported by MergeTree engines family"); + if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned) { throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -3286,7 +3329,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context } } - checkProperties(new_metadata, old_metadata, false, local_context); + checkProperties(new_metadata, old_metadata, false, false, local_context); checkTTLExpressions(new_metadata, old_metadata); if (!columns_to_check_conversion.empty()) @@ -3319,7 +3362,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context MergeTreeSettings copy = *getSettings(); copy.applyChange(changed_setting); String reason; - if (!canUsePolymorphicParts(copy, &reason) && !reason.empty()) + if (!canUsePolymorphicParts(copy, reason) && !reason.empty()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't change settings. Reason: {}", reason); } @@ -3344,7 +3387,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context auto copy = getDefaultSettings(); copy->applyChanges(new_changes); String reason; - if (!canUsePolymorphicParts(*copy, &reason) && !reason.empty()) + if (!canUsePolymorphicParts(*copy, reason) && !reason.empty()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't change settings. Reason: {}", reason); } @@ -3386,8 +3429,9 @@ MergeTreeDataPartFormat MergeTreeData::choosePartFormat(size_t bytes_uncompresse using PartType = MergeTreeDataPartType; using PartStorageType = MergeTreeDataPartStorageType; - const auto settings = getSettings(); - if (!canUsePolymorphicParts(*settings)) + String out_reason; + const auto settings = getSettings(); + if (!canUsePolymorphicParts(*settings, out_reason)) return {PartType::Wide, PartStorageType::Full}; auto satisfies = [&](const auto & min_bytes_for, const auto & min_rows_for) @@ -4026,7 +4070,7 @@ void MergeTreeData::restoreAndActivatePart(const DataPartPtr & part, DataPartsLo void MergeTreeData::outdateUnexpectedPartAndCloneToDetached(const DataPartPtr & part_to_detach) { LOG_INFO(log, "Cloning part {} to unexpected_{} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); - part_to_detach->makeCloneInDetached("unexpected", getInMemoryMetadataPtr()); + part_to_detach->makeCloneInDetached("unexpected", getInMemoryMetadataPtr(), /*disk_transaction*/ {}); DataPartsLock lock = lockParts(); part_to_detach->is_unexpected_local_part = true; @@ -5143,12 +5187,12 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( const DataPartsVector & data_parts, const String & data_path_in_backup, const BackupSettings & backup_settings, + const ReadSettings & read_settings, const ContextPtr & local_context) { MergeTreeData::PartsBackupEntries res; std::map> temp_dirs; TableLockHolder table_lock; - ReadSettings read_settings = local_context->getBackupReadSettings(); for (const auto & part : data_parts) { @@ -5182,6 +5226,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( part->getFileNamesWithoutChecksums(), data_path_in_backup, backup_settings, + read_settings, make_temporary_hard_links, backup_entries_from_part, &temp_dirs); @@ -5194,6 +5239,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts( projection_part->getFileNamesWithoutChecksums(), fs::path{data_path_in_backup} / part->name, backup_settings, + read_settings, make_temporary_hard_links, backup_entries_from_part, &temp_dirs); @@ -5261,7 +5307,7 @@ public: auto it = temp_dirs.find(disk); if (it == temp_dirs.end()) it = temp_dirs.emplace(disk, std::make_shared(disk, "tmp/")).first; - return it->second->getPath(); + return it->second->getRelativePath(); } private: @@ -5688,6 +5734,10 @@ bool MergeTreeData::supportsLightweightDelete() const auto lock = lockParts(); for (const auto & part : data_parts_by_info) { + if (part->getState() == MergeTreeDataPartState::Outdated + || part->getState() == MergeTreeDataPartState::Deleting) + continue; + if (!part->supportLightweightDeleteMutate()) return false; } @@ -5782,18 +5832,21 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const { const String source_dir = "detached/"; - std::map name_to_disk; - /// Let's compose a list of parts that should be added. if (attach_part) { const String part_id = partition->as().value.safeGet(); validateDetachedPartName(part_id); - auto disk = getDiskForDetachedPart(part_id); - renamed_parts.addPart(part_id, "attaching_" + part_id, disk); - - if (MergeTreePartInfo::tryParsePartName(part_id, format_version)) - name_to_disk[part_id] = getDiskForDetachedPart(part_id); + if (temporary_parts.contains(String(DETACHED_DIR_NAME) + "/" + part_id)) + { + LOG_WARNING(log, "Will not try to attach part {} because its directory is temporary, " + "probably it's being detached right now", part_id); + } + else + { + auto disk = getDiskForDetachedPart(part_id); + renamed_parts.addPart(part_id, "attaching_" + part_id, disk); + } } else { @@ -5810,6 +5863,12 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const for (const auto & part_info : detached_parts) { + if (temporary_parts.contains(String(DETACHED_DIR_NAME) + "/" + part_info.dir_name)) + { + LOG_WARNING(log, "Will not try to attach part {} because its directory is temporary, " + "probably it's being detached right now", part_info.dir_name); + continue; + } LOG_DEBUG(log, "Found part {}", part_info.dir_name); active_parts.add(part_info.dir_name); } @@ -5820,6 +5879,8 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const for (const auto & part_info : detached_parts) { const String containing_part = active_parts.getContainingPart(part_info.dir_name); + if (containing_part.empty()) + continue; LOG_DEBUG(log, "Found containing part {} for part {}", containing_part, part_info.dir_name); @@ -6994,7 +7055,8 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg ProjectionCandidate * selected_candidate = nullptr; size_t min_sum_marks = std::numeric_limits::max(); - if (metadata_snapshot->minmax_count_projection && !has_lightweight_delete_parts.load(std::memory_order_relaxed)) /// Disable ReadFromStorage for parts with lightweight. + if (settings.optimize_use_implicit_projections && metadata_snapshot->minmax_count_projection + && !has_lightweight_delete_parts.load(std::memory_order_relaxed)) /// Disable ReadFromStorage for parts with lightweight. add_projection_candidate(*metadata_snapshot->minmax_count_projection, true); std::optional minmax_count_projection_candidate; if (!candidates.empty()) @@ -7026,7 +7088,9 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg max_added_blocks.get(), query_context); - // minmax_count_projection should not be used when there is no data to process. + // minmax_count_projection cannot be used used when there is no data to process, because + // it will produce incorrect result during constant aggregation. + // See https://github.com/ClickHouse/ClickHouse/issues/36728 if (!query_info.minmax_count_projection_block) return; @@ -7763,7 +7827,7 @@ try LOG_WARNING(log, "Profile counters are not set"); } - part_log->add(part_log_elem); + part_log->add(std::move(part_log_elem)); } catch (...) { @@ -8003,22 +8067,23 @@ bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const bool MergeTreeData::canUsePolymorphicParts() const { - return canUsePolymorphicParts(*getSettings(), nullptr); + String unused; + return canUsePolymorphicParts(*getSettings(), unused); } -bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, String * out_reason) const +bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, String & out_reason) const { if (!canUseAdaptiveGranularity()) { - if (out_reason && (settings.min_rows_for_wide_part != 0 || settings.min_bytes_for_wide_part != 0 + if ((settings.min_rows_for_wide_part != 0 || settings.min_bytes_for_wide_part != 0 || settings.min_rows_for_compact_part != 0 || settings.min_bytes_for_compact_part != 0)) { - *out_reason = fmt::format( - "Table can't create parts with adaptive granularity, but settings" - " min_rows_for_wide_part = {}" - ", min_bytes_for_wide_part = {}" - ". Parts with non-adaptive granularity can be stored only in Wide (default) format.", - settings.min_rows_for_wide_part, settings.min_bytes_for_wide_part); + out_reason = fmt::format( + "Table can't create parts with adaptive granularity, but settings" + " min_rows_for_wide_part = {}" + ", min_bytes_for_wide_part = {}" + ". Parts with non-adaptive granularity can be stored only in Wide (default) format.", + settings.min_rows_for_wide_part, settings.min_bytes_for_wide_part); } return false; @@ -8416,7 +8481,7 @@ void MergeTreeData::incrementMergedPartsProfileEvent(MergeTreeDataPartType type) } } -MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart( +std::pair MergeTreeData::createEmptyPart( MergeTreePartInfo & new_part_info, const MergeTreePartition & partition, const String & new_part_name, const MergeTreeTransactionPtr & txn) { @@ -8435,6 +8500,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart( ReservationPtr reservation = reserveSpacePreferringTTLRules(metadata_snapshot, 0, move_ttl_infos, time(nullptr), 0, true); VolumePtr data_part_volume = createVolumeFromReservation(reservation, volume); + auto tmp_dir_holder = getTemporaryPartDirectoryHolder(EMPTY_PART_TMP_PREFIX + new_part_name); auto new_data_part = getDataPartBuilder(new_part_name, data_part_volume, EMPTY_PART_TMP_PREFIX + new_part_name) .withBytesAndRowsOnDisk(0, 0) .withPartInfo(new_part_info) @@ -8494,7 +8560,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart( out.finalizePart(new_data_part, sync_on_insert); new_data_part_storage->precommitTransaction(); - return new_data_part; + return std::make_pair(std::move(new_data_part), std::move(tmp_dir_holder)); } bool MergeTreeData::allowRemoveStaleMovingParts() const diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 8b5b50b1841..e4801cffa36 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -434,6 +434,8 @@ public: bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; } + bool supportsTrivialCountOptimization() const override { return !hasLightweightDeletedMask(); } + NamesAndTypesList getVirtuals() const override; bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const override; @@ -934,7 +936,9 @@ public: WriteAheadLogPtr getWriteAheadLog(); constexpr static auto EMPTY_PART_TMP_PREFIX = "tmp_empty_"; - MergeTreeData::MutableDataPartPtr createEmptyPart(MergeTreePartInfo & new_part_info, const MergeTreePartition & partition, const String & new_part_name, const MergeTreeTransactionPtr & txn); + std::pair createEmptyPart( + MergeTreePartInfo & new_part_info, const MergeTreePartition & partition, + const String & new_part_name, const MergeTreeTransactionPtr & txn); MergeTreeDataFormatVersion format_version; @@ -1229,7 +1233,7 @@ protected: /// The same for clearOldTemporaryDirectories. std::mutex clear_old_temporary_directories_mutex; - void checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false, ContextPtr local_context = nullptr) const; + void checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach, bool allow_empty_sorting_key, ContextPtr local_context) const; void setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false, ContextPtr local_context = nullptr); @@ -1334,7 +1338,7 @@ protected: using PartsBackupEntries = std::vector; /// Makes backup entries to backup the parts of this table. - PartsBackupEntries backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const BackupSettings & backup_settings, const ContextPtr & local_context); + PartsBackupEntries backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const BackupSettings & backup_settings, const ReadSettings & read_settings, const ContextPtr & local_context); class RestoredPartsHolder; @@ -1484,7 +1488,7 @@ private: /// Check selected parts for movements. Used by ALTER ... MOVE queries. CurrentlyMovingPartsTaggerPtr checkPartsForMove(const DataPartsVector & parts, SpacePtr space); - bool canUsePolymorphicParts(const MergeTreeSettings & settings, String * out_reason = nullptr) const; + bool canUsePolymorphicParts(const MergeTreeSettings & settings, String & out_reason) const; std::mutex write_ahead_log_mutex; WriteAheadLogPtr write_ahead_log; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index c0acc401506..e89cd8da232 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -136,7 +136,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( const AllowedMergingPredicate & can_merge_callback, bool merge_with_ttl_allowed, const MergeTreeTransactionPtr & txn, - String * out_disable_reason, + String & out_disable_reason, const PartitionIdsHint * partitions_hint) { MergeTreeData::DataPartsVector data_parts = getDataPartsToSelectMergeFrom(txn, partitions_hint); @@ -145,8 +145,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (data_parts.empty()) { - if (out_disable_reason) - *out_disable_reason = "There are no parts in the table"; + out_disable_reason = "There are no parts in the table"; return SelectPartsDecision::CANNOT_SELECT; } @@ -154,8 +153,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (info.parts_selected_precondition == 0) { - if (out_disable_reason) - *out_disable_reason = "No parts satisfy preconditions for merge"; + out_disable_reason = "No parts satisfy preconditions for merge"; return SelectPartsDecision::CANNOT_SELECT; } @@ -179,8 +177,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( /*optimize_skip_merged_partitions=*/true); } - if (out_disable_reason) - *out_disable_reason = "There is no need to merge parts according to merge selector algorithm"; + out_disable_reason = "There is no need to merge parts according to merge selector algorithm"; return SelectPartsDecision::CANNOT_SELECT; } @@ -197,7 +194,8 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart auto metadata_snapshot = data.getInMemoryMetadataPtr(); - MergeSelectingInfo info = getPossibleMergeRanges(data_parts, can_merge_callback, txn); + String out_reason; + MergeSelectingInfo info = getPossibleMergeRanges(data_parts, can_merge_callback, txn, out_reason); if (info.parts_selected_precondition == 0) return res; @@ -227,7 +225,7 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart /// This method should have been const, but something went wrong... it's const with dry_run = true auto status = const_cast(this)->selectPartsToMergeFromRanges( future_part, /*aggressive*/ false, max_total_size_to_merge, merge_with_ttl_allowed, - metadata_snapshot, ranges_per_partition[i], info.current_time, &out_disable_reason, + metadata_snapshot, ranges_per_partition[i], info.current_time, out_disable_reason, /* dry_run */ true); if (status == SelectPartsDecision::SELECTED) res.insert(all_partition_ids[i]); @@ -331,7 +329,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo const MergeTreeData::DataPartsVector & data_parts, const AllowedMergingPredicate & can_merge_callback, const MergeTreeTransactionPtr & txn, - String * out_disable_reason) const + String & out_disable_reason) const { MergeSelectingInfo res; @@ -444,7 +442,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( const StorageMetadataPtr & metadata_snapshot, const IMergeSelector::PartsRanges & parts_ranges, const time_t & current_time, - String * out_disable_reason, + String & out_disable_reason, bool dry_run) { const auto data_settings = data.getSettings(); @@ -515,8 +513,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( if (parts_to_merge.empty()) { - if (out_disable_reason) - *out_disable_reason = "Did not find any parts to merge (with usual merge selectors)"; + out_disable_reason = "Did not find any parts to merge (with usual merge selectors)"; return SelectPartsDecision::CANNOT_SELECT; } } @@ -563,22 +560,20 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti bool final, const StorageMetadataPtr & metadata_snapshot, const MergeTreeTransactionPtr & txn, - String * out_disable_reason, + String & out_disable_reason, bool optimize_skip_merged_partitions) { MergeTreeData::DataPartsVector parts = selectAllPartsFromPartition(partition_id); if (parts.empty()) { - if (out_disable_reason) - *out_disable_reason = "There are no parts inside partition"; + out_disable_reason = "There are no parts inside partition"; return SelectPartsDecision::CANNOT_SELECT; } if (!final && parts.size() == 1) { - if (out_disable_reason) - *out_disable_reason = "There is only one part inside partition"; + out_disable_reason = "There is only one part inside partition"; return SelectPartsDecision::CANNOT_SELECT; } @@ -587,8 +582,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti if (final && optimize_skip_merged_partitions && parts.size() == 1 && parts[0]->info.level > 0 && (!metadata_snapshot->hasAnyTTL() || parts[0]->checkAllTTLCalculated(metadata_snapshot))) { - if (out_disable_reason) - *out_disable_reason = "Partition skipped due to optimize_skip_merged_partitions"; + out_disable_reason = "Partition skipped due to optimize_skip_merged_partitions"; return SelectPartsDecision::NOTHING_TO_MERGE; } @@ -629,9 +623,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti static_cast((DISK_USAGE_COEFFICIENT_TO_SELECT - 1.0) * 100)); } - if (out_disable_reason) - *out_disable_reason = fmt::format("Insufficient available disk space, required {}", ReadableSize(required_disk_space)); - + out_disable_reason = fmt::format("Insufficient available disk space, required {}", ReadableSize(required_disk_space)); return SelectPartsDecision::CANNOT_SELECT; } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 428161ea71e..6eab0ee0c37 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -43,7 +43,7 @@ public: using AllowedMergingPredicate = std::function; + String &)>; explicit MergeTreeDataMergerMutator(MergeTreeData & data_); @@ -92,7 +92,7 @@ public: const MergeTreeData::DataPartsVector & data_parts, const AllowedMergingPredicate & can_merge_callback, const MergeTreeTransactionPtr & txn, - String * out_disable_reason = nullptr) const; + String & out_disable_reason) const; /// The third step of selecting parts to merge: takes ranges that we can merge, and selects parts that we want to merge SelectPartsDecision selectPartsToMergeFromRanges( @@ -103,7 +103,7 @@ public: const StorageMetadataPtr & metadata_snapshot, const IMergeSelector::PartsRanges & parts_ranges, const time_t & current_time, - String * out_disable_reason = nullptr, + String & out_disable_reason, bool dry_run = false); String getBestPartitionToOptimizeEntire(const PartitionsInfo & partitions_info) const; @@ -129,7 +129,7 @@ public: const AllowedMergingPredicate & can_merge, bool merge_with_ttl_allowed, const MergeTreeTransactionPtr & txn, - String * out_disable_reason = nullptr, + String & out_disable_reason, const PartitionIdsHint * partitions_hint = nullptr); /** Select all the parts in the specified partition for merge, if possible. @@ -144,7 +144,7 @@ public: bool final, const StorageMetadataPtr & metadata_snapshot, const MergeTreeTransactionPtr & txn, - String * out_disable_reason = nullptr, + String & out_disable_reason, bool optimize_skip_merged_partitions = false); /** Creates a task to merge parts. diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 6628cd68eaf..b4d405312e0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -187,15 +187,15 @@ bool MergeTreeDataPartChecksums::readV3(ReadBuffer & in) String name; Checksum sum; - readBinary(name, in); + readStringBinary(name, in); readVarUInt(sum.file_size, in); - readPODBinary(sum.file_hash, in); - readBinary(sum.is_compressed, in); + readBinaryLittleEndian(sum.file_hash, in); + readBinaryLittleEndian(sum.is_compressed, in); if (sum.is_compressed) { readVarUInt(sum.uncompressed_size, in); - readPODBinary(sum.uncompressed_hash, in); + readBinaryLittleEndian(sum.uncompressed_hash, in); } files.emplace(std::move(name), sum); @@ -223,15 +223,15 @@ void MergeTreeDataPartChecksums::write(WriteBuffer & to) const const String & name = it.first; const Checksum & sum = it.second; - writeBinary(name, out); + writeStringBinary(name, out); writeVarUInt(sum.file_size, out); - writePODBinary(sum.file_hash, out); - writeBinary(sum.is_compressed, out); + writeBinaryLittleEndian(sum.file_hash, out); + writeBinaryLittleEndian(sum.is_compressed, out); if (sum.is_compressed) { writeVarUInt(sum.uncompressed_size, out); - writePODBinary(sum.uncompressed_hash, out); + writeBinaryLittleEndian(sum.uncompressed_hash, out); } } } @@ -323,9 +323,7 @@ MergeTreeDataPartChecksums::Checksum::uint128 MergeTreeDataPartChecksums::getTot hash_of_all_files.update(checksum.file_hash); } - MergeTreeDataPartChecksums::Checksum::uint128 ret; - hash_of_all_files.get128(reinterpret_cast(&ret)); - return ret; + return getSipHash128AsPair(hash_of_all_files); } void MinimalisticDataPartChecksums::serialize(WriteBuffer & to) const @@ -339,9 +337,9 @@ void MinimalisticDataPartChecksums::serializeWithoutHeader(WriteBuffer & to) con writeVarUInt(num_compressed_files, to); writeVarUInt(num_uncompressed_files, to); - writePODBinary(hash_of_all_files, to); - writePODBinary(hash_of_uncompressed_files, to); - writePODBinary(uncompressed_hash_of_compressed_files, to); + writeBinaryLittleEndian(hash_of_all_files, to); + writeBinaryLittleEndian(hash_of_uncompressed_files, to); + writeBinaryLittleEndian(uncompressed_hash_of_compressed_files, to); } String MinimalisticDataPartChecksums::getSerializedString() const @@ -382,9 +380,9 @@ void MinimalisticDataPartChecksums::deserializeWithoutHeader(ReadBuffer & in) readVarUInt(num_compressed_files, in); readVarUInt(num_uncompressed_files, in); - readPODBinary(hash_of_all_files, in); - readPODBinary(hash_of_uncompressed_files, in); - readPODBinary(uncompressed_hash_of_compressed_files, in); + readBinaryLittleEndian(hash_of_all_files, in); + readBinaryLittleEndian(hash_of_uncompressed_files, in); + readBinaryLittleEndian(uncompressed_hash_of_compressed_files, in); } void MinimalisticDataPartChecksums::computeTotalChecksums(const MergeTreeDataPartChecksums & full_checksums_) @@ -415,14 +413,9 @@ void MinimalisticDataPartChecksums::computeTotalChecksums(const MergeTreeDataPar } } - auto get_hash = [] (SipHash & hash, uint128 & data) - { - hash.get128(data); - }; - - get_hash(hash_of_all_files_state, hash_of_all_files); - get_hash(hash_of_uncompressed_files_state, hash_of_uncompressed_files); - get_hash(uncompressed_hash_of_compressed_files_state, uncompressed_hash_of_compressed_files); + hash_of_all_files = getSipHash128AsPair(hash_of_all_files_state); + hash_of_uncompressed_files = getSipHash128AsPair(hash_of_uncompressed_files_state); + uncompressed_hash_of_compressed_files = getSipHash128AsPair(uncompressed_hash_of_compressed_files_state); } String MinimalisticDataPartChecksums::getSerializedString(const MergeTreeDataPartChecksums & full_checksums, bool minimalistic) diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 9c47608e364..c6d059498ff 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -30,7 +30,7 @@ MergeTreeDataPartCompact::MergeTreeDataPartCompact( IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( const NamesAndTypesList & columns_to_read, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -43,7 +43,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( auto * load_marks_threadpool = reader_settings.read_settings.load_marks_asynchronously ? &read_info->getContext()->getLoadMarksThreadpool() : nullptr; return std::make_unique( - read_info, columns_to_read, metadata_snapshot, uncompressed_cache, + read_info, columns_to_read, storage_snapshot, uncompressed_cache, mark_cache, mark_ranges, reader_settings, load_marks_threadpool, avg_value_size_hints, profile_callback); } @@ -115,7 +115,7 @@ void MergeTreeDataPartCompact::loadIndexGranularityImpl( { marks_reader->ignore(columns_count * sizeof(MarkInCompressedFile)); size_t granularity; - readIntBinary(granularity, *marks_reader); + readBinaryLittleEndian(granularity, *marks_reader); index_granularity_.appendMark(granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 08764eedb43..2bbac766c8e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -30,7 +30,7 @@ public: MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 468747a6c36..f04e08838a9 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -17,6 +17,7 @@ namespace DB namespace ErrorCodes { extern const int DIRECTORY_ALREADY_EXISTS; + extern const int NOT_IMPLEMENTED; } MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( @@ -32,7 +33,7 @@ MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( const NamesAndTypesList & columns_to_read, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * /* uncompressed_cache */, MarkCache * /* mark_cache */, @@ -45,7 +46,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( auto ptr = std::static_pointer_cast(shared_from_this()); return std::make_unique( - read_info, ptr, columns_to_read, metadata_snapshot, mark_ranges, reader_settings); + read_info, ptr, columns_to_read, storage_snapshot, mark_ranges, reader_settings); } IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter( @@ -138,8 +139,12 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & return new_data_part_storage; } -DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const +DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, + const StorageMetadataPtr & metadata_snapshot, + const DiskTransactionPtr & disk_transaction) const { + if (disk_transaction) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "InMemory parts are not compatible with disk transactions"); String detached_path = *getRelativePathForDetachedPart(prefix, /* broken */ false); return flushToDisk(detached_path, metadata_snapshot); } @@ -167,7 +172,7 @@ IMergeTreeDataPart::Checksum MergeTreeDataPartInMemory::calculateBlockChecksum() column.column->updateHashFast(hash); checksum.uncompressed_size = block.bytes(); - hash.get128(checksum.uncompressed_hash); + checksum.uncompressed_hash = getSipHash128AsPair(hash); return checksum; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index 2698b69b38e..95f7b796f9a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -19,7 +19,7 @@ public: MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -42,7 +42,8 @@ public: bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); } String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override; - DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override; + DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, + const DiskTransactionPtr & disk_transaction) const override; std::optional getColumnModificationTime(const String & /* column_name */) const override { return {}; } MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 2d886e2058b..20600909ce4 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -29,7 +29,7 @@ MergeTreeDataPartWide::MergeTreeDataPartWide( IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( const NamesAndTypesList & columns_to_read, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -41,7 +41,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( auto read_info = std::make_shared(shared_from_this(), alter_conversions); return std::make_unique( read_info, columns_to_read, - metadata_snapshot, uncompressed_cache, + storage_snapshot, uncompressed_cache, mark_cache, mark_ranges, reader_settings, avg_value_size_hints, profile_callback); } @@ -130,13 +130,13 @@ void MergeTreeDataPartWide::loadIndexGranularityImpl( MarkInCompressedFile mark; size_t granularity; - readBinary(mark.offset_in_compressed_file, *marks_reader); - readBinary(mark.offset_in_decompressed_block, *marks_reader); + readBinaryLittleEndian(mark.offset_in_compressed_file, *marks_reader); + readBinaryLittleEndian(mark.offset_in_decompressed_block, *marks_reader); ++marks_count; if (index_granularity_info_.mark_type.adaptive) { - readIntBinary(granularity, *marks_reader); + readBinaryLittleEndian(granularity, *marks_reader); index_granularity_.appendMark(granularity); } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 0b2ffeb4b18..2076a1ec028 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -25,7 +25,7 @@ public: MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, - const StorageMetadataPtr & metadata_snapshot, + const StorageSnapshotPtr & storage_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 5e1da21da5b..3475130bf24 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -228,8 +228,8 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G }; - writeIntBinary(plain_hashing.count(), marks_out); - writeIntBinary(static_cast(0), marks_out); + writeBinaryLittleEndian(plain_hashing.count(), marks_out); + writeBinaryLittleEndian(static_cast(0), marks_out); writeColumnSingleGranule( block.getByName(name_and_type->name), data_part->getSerialization(name_and_type->name), @@ -239,7 +239,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G prev_stream->hashing_buf.next(); } - writeIntBinary(granule.rows_to_write, marks_out); + writeBinaryLittleEndian(granule.rows_to_write, marks_out); } } @@ -270,10 +270,10 @@ void MergeTreeDataPartWriterCompact::fillDataChecksums(IMergeTreeDataPart::Check { for (size_t i = 0; i < columns_list.size(); ++i) { - writeIntBinary(plain_hashing.count(), marks_out); - writeIntBinary(static_cast(0), marks_out); + writeBinaryLittleEndian(plain_hashing.count(), marks_out); + writeBinaryLittleEndian(static_cast(0), marks_out); } - writeIntBinary(static_cast(0), marks_out); + writeBinaryLittleEndian(static_cast(0), marks_out); } for (const auto & [_, stream] : streams_by_codec) @@ -365,8 +365,9 @@ void MergeTreeDataPartWriterCompact::addToChecksums(MergeTreeDataPartChecksums & { uncompressed_size += stream->hashing_buf.count(); auto stream_hash = stream->hashing_buf.getHash(); + transformEndianness(stream_hash); uncompressed_hash = CityHash_v1_0_2::CityHash128WithSeed( - reinterpret_cast(&stream_hash), sizeof(stream_hash), uncompressed_hash); + reinterpret_cast(&stream_hash), sizeof(stream_hash), uncompressed_hash); } checksums.files[data_file_name].is_compressed = true; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index f57ffa5ee14..79b72d4ae39 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -313,13 +313,13 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block if (stream.compressed_hashing.offset() >= settings.min_compress_block_size) stream.compressed_hashing.next(); - writeIntBinary(stream.plain_hashing.count(), marks_out); - writeIntBinary(stream.compressed_hashing.offset(), marks_out); + writeBinaryLittleEndian(stream.plain_hashing.count(), marks_out); + writeBinaryLittleEndian(stream.compressed_hashing.offset(), marks_out); /// Actually this numbers is redundant, but we have to store them /// to be compatible with the normal .mrk2 file format if (settings.can_use_adaptive_granularity) - writeIntBinary(1UL, marks_out); + writeBinaryLittleEndian(1UL, marks_out); } size_t pos = granule.start_row; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index f9fe6f2c8ab..bcf340e0f55 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -275,10 +275,10 @@ void MergeTreeDataPartWriterWide::flushMarkToFile(const StreamNameAndMark & stre Stream & stream = *column_streams[stream_with_mark.stream_name]; WriteBuffer & marks_out = stream.compress_marks ? stream.marks_compressed_hashing : stream.marks_hashing; - writeIntBinary(stream_with_mark.mark.offset_in_compressed_file, marks_out); - writeIntBinary(stream_with_mark.mark.offset_in_decompressed_block, marks_out); + writeBinaryLittleEndian(stream_with_mark.mark.offset_in_compressed_file, marks_out); + writeBinaryLittleEndian(stream_with_mark.mark.offset_in_decompressed_block, marks_out); if (settings.can_use_adaptive_granularity) - writeIntBinary(rows_in_mark, marks_out); + writeBinaryLittleEndian(rows_in_mark, marks_out); } StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( @@ -452,10 +452,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai "Incorrect number of marks in memory {}, on disk (at least) {}", index_granularity.getMarksCount(), mark_num + 1); - DB::readBinary(offset_in_compressed_file, *mrk_in); - DB::readBinary(offset_in_decompressed_block, *mrk_in); + readBinaryLittleEndian(offset_in_compressed_file, *mrk_in); + readBinaryLittleEndian(offset_in_decompressed_block, *mrk_in); if (settings.can_use_adaptive_granularity) - DB::readBinary(index_granularity_rows, *mrk_in); + readBinaryLittleEndian(index_granularity_rows, *mrk_in); else index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index ee515106591..d4374611757 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1031,6 +1031,10 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd }; size_t num_threads = std::min(num_streams, parts.size()); + if (settings.max_threads_for_indexes) + { + num_threads = std::min(num_streams, settings.max_threads_for_indexes); + } if (num_threads <= 1) { diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 7e306880e9c..d6ba7cb3093 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -39,6 +39,7 @@ namespace ProfileEvents extern const Event MergeTreeDataProjectionWriterRows; extern const Event MergeTreeDataProjectionWriterUncompressedBytes; extern const Event MergeTreeDataProjectionWriterCompressedBytes; + extern const Event RejectedInserts; } namespace DB @@ -58,7 +59,8 @@ void buildScatterSelector( const ColumnRawPtrs & columns, PODArray & partition_num_to_first_row, IColumn::Selector & selector, - size_t max_parts) + size_t max_parts, + ContextPtr context) { /// Use generic hashed variant since partitioning is unlikely to be a bottleneck. using Data = HashMap; @@ -66,6 +68,8 @@ void buildScatterSelector( size_t num_rows = columns[0]->size(); size_t partitions_count = 0; + size_t throw_on_limit = context->getSettingsRef().throw_on_max_partitions_per_insert_block; + for (size_t i = 0; i < num_rows; ++i) { Data::key_type key = hash128(i, columns.size(), columns); @@ -75,7 +79,9 @@ void buildScatterSelector( if (inserted) { - if (max_parts && partitions_count >= max_parts) + if (max_parts && partitions_count >= max_parts && throw_on_limit) + { + ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception(ErrorCodes::TOO_MANY_PARTS, "Too many partitions for single INSERT block (more than {}). " "The limit is controlled by 'max_partitions_per_insert_block' setting. " @@ -85,6 +91,7 @@ void buildScatterSelector( "for a table is under 1000..10000. Please note, that partitioning is not intended " "to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). " "Partitions are intended for data manipulation (DROP PARTITION, etc).", max_parts); + } partition_num_to_first_row.push_back(i); it->getMapped() = partitions_count; @@ -102,6 +109,18 @@ void buildScatterSelector( if (partitions_count > 1) selector[i] = it->getMapped(); } + // Checking partitions per insert block again here outside the loop above + // so we can log the total number of partitions that would have parts created + if (max_parts && partitions_count >= max_parts && !throw_on_limit) + { + const auto & client_info = context->getClientInfo(); + Poco::Logger * log = &Poco::Logger::get("MergeTreeDataWriter"); + + LOG_WARNING(log, "INSERT query from initial_user {} (query ID: {}) inserted a block " + "that created parts in {} partitions. This is being logged " + "rather than throwing an exception as throw_on_max_partitions_per_insert_block=false.", + client_info.initial_user, client_info.initial_query_id, partitions_count); + } } /// Computes ttls and updates ttl infos @@ -171,23 +190,23 @@ void MergeTreeDataWriter::TemporaryPart::finalize() projection->getDataPartStorage().precommitTransaction(); } -std::vector scatterOffsetsBySelector(ChunkOffsetsPtr chunk_offsets, const IColumn::Selector & selector, size_t partition_num) +std::vector scatterAsyncInsertInfoBySelector(AsyncInsertInfoPtr async_insert_info, const IColumn::Selector & selector, size_t partition_num) { - if (nullptr == chunk_offsets) + if (nullptr == async_insert_info) { return {}; } if (selector.empty()) { - return {chunk_offsets}; + return {async_insert_info}; } - std::vector result(partition_num); + std::vector result(partition_num); std::vector last_row_for_partition(partition_num, -1); size_t offset_idx = 0; for (size_t i = 0; i < selector.size(); ++i) { ++last_row_for_partition[selector[i]]; - if (i + 1 == chunk_offsets->offsets[offset_idx]) + if (i + 1 == async_insert_info->offsets[offset_idx]) { for (size_t part_id = 0; part_id < last_row_for_partition.size(); ++part_id) { @@ -196,9 +215,12 @@ std::vector scatterOffsetsBySelector(ChunkOffsetsPtr chunk_offs continue; size_t offset = static_cast(last_row + 1); if (result[part_id] == nullptr) - result[part_id] = std::make_shared(); + result[part_id] = std::make_shared(); if (result[part_id]->offsets.empty() || offset > *result[part_id]->offsets.rbegin()) + { result[part_id]->offsets.push_back(offset); + result[part_id]->tokens.push_back(async_insert_info->tokens[offset_idx]); + } } ++offset_idx; } @@ -207,7 +229,7 @@ std::vector scatterOffsetsBySelector(ChunkOffsetsPtr chunk_offs } BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( - const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, ChunkOffsetsPtr chunk_offsets) + const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, AsyncInsertInfoPtr async_insert_info) { BlocksWithPartition result; if (!block || !block.rows()) @@ -218,8 +240,11 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( if (!metadata_snapshot->hasPartitionKey()) /// Table is not partitioned. { result.emplace_back(Block(block), Row{}); - if (chunk_offsets != nullptr) - result[0].offsets = std::move(chunk_offsets->offsets); + if (async_insert_info != nullptr) + { + result[0].offsets = std::move(async_insert_info->offsets); + result[0].tokens = std::move(async_insert_info->tokens); + } return result; } @@ -234,9 +259,9 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( PODArray partition_num_to_first_row; IColumn::Selector selector; - buildScatterSelector(partition_columns, partition_num_to_first_row, selector, max_parts); + buildScatterSelector(partition_columns, partition_num_to_first_row, selector, max_parts, context); - auto chunk_offsets_with_partition = scatterOffsetsBySelector(chunk_offsets, selector, partition_num_to_first_row.size()); + auto async_insert_info_with_partition = scatterAsyncInsertInfoBySelector(async_insert_info, selector, partition_num_to_first_row.size()); size_t partitions_count = partition_num_to_first_row.size(); result.reserve(partitions_count); @@ -255,8 +280,11 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( /// NOTE: returning a copy of the original block so that calculated partition key columns /// do not interfere with possible calculated primary key columns of the same name. result.emplace_back(Block(block), get_partition(0)); - if (!chunk_offsets_with_partition.empty()) - result[0].offsets = std::move(chunk_offsets_with_partition[0]->offsets); + if (!async_insert_info_with_partition.empty()) + { + result[0].offsets = std::move(async_insert_info_with_partition[0]->offsets); + result[0].tokens = std::move(async_insert_info_with_partition[0]->tokens); + } return result; } @@ -270,8 +298,11 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( result[i].block.getByPosition(col).column = std::move(scattered[i]); } - for (size_t i = 0; i < chunk_offsets_with_partition.size(); ++i) - result[i].offsets = std::move(chunk_offsets_with_partition[i]->offsets); + for (size_t i = 0; i < async_insert_info_with_partition.size(); ++i) + { + result[i].offsets = std::move(async_insert_info_with_partition[i]->offsets); + result[i].tokens = std::move(async_insert_info_with_partition[i]->tokens); + } return result; } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 795453b2afa..2fb6b1f22d4 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -23,14 +23,15 @@ struct BlockWithPartition Block block; Row partition; std::vector offsets; + std::vector tokens; BlockWithPartition(Block && block_, Row && partition_) : block(block_), partition(std::move(partition_)) { } - BlockWithPartition(Block && block_, Row && partition_, std::vector && offsets_) - : block(block_), partition(std::move(partition_)), offsets(std::move(offsets_)) + BlockWithPartition(Block && block_, Row && partition_, std::vector && offsets_, std::vector && tokens_) + : block(block_), partition(std::move(partition_)), offsets(std::move(offsets_)), tokens(std::move(tokens_)) { } }; @@ -51,7 +52,7 @@ public: * (split rows by partition) * Works deterministically: if same block was passed, function will return same result in same order. */ - static BlocksWithPartition splitBlockIntoParts(const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, ChunkOffsetsPtr chunk_offsets = nullptr); + static BlocksWithPartition splitBlockIntoParts(const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, AsyncInsertInfoPtr async_insert_info = nullptr); /// This structure contains not completely written temporary part. /// Some writes may happen asynchronously, e.g. for blob storages. diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 09a04f13fc7..548b61ce422 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -10,9 +10,16 @@ #include #include +#include + namespace DB { +namespace ErrorCodes +{ + extern const int ABORTED; +} + namespace { @@ -231,6 +238,11 @@ std::pair MergeTreeDeduplicationLog::addPart(const std: return std::make_pair(info, false); } + if (stopped) + { + throw Exception(ErrorCodes::ABORTED, "Storage has been shutdown when we add this part."); + } + chassert(current_writer != nullptr); /// Create new record @@ -261,6 +273,11 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf if (deduplication_window == 0) return; + if (stopped) + { + throw Exception(ErrorCodes::ABORTED, "Storage has been shutdown when we drop this part."); + } + chassert(current_writer != nullptr); for (auto itr = deduplication_map.begin(); itr != deduplication_map.end(); /* no increment here, we erasing from map */) diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index f77cfe4fed0..1c92645dbfa 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -25,6 +25,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +static constexpr auto DISTANCE_FUNCTION_L2 = "L2Distance"; +static constexpr auto DISTANCE_FUNCTION_COSINE = "cosineDistance"; + +static constexpr auto DEFAULT_TREES = 100uz; +static constexpr auto DEFAULT_DISTANCE_FUNCTION = DISTANCE_FUNCTION_L2; template AnnoyIndexWithSerialization::AnnoyIndexWithSerialization(size_t dimensions) @@ -224,9 +229,9 @@ bool MergeTreeIndexConditionAnnoy::alwaysUnknownOrTrue() const std::vector MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const { - if (distance_function == "L2Distance") + if (distance_function == DISTANCE_FUNCTION_L2) return getUsefulRangesImpl(idx_granule); - else if (distance_function == "cosineDistance") + else if (distance_function == DISTANCE_FUNCTION_COSINE) return getUsefulRangesImpl(idx_granule); std::unreachable(); } @@ -289,9 +294,9 @@ MergeTreeIndexAnnoy::MergeTreeIndexAnnoy(const IndexDescription & index_, UInt64 MergeTreeIndexGranulePtr MergeTreeIndexAnnoy::createIndexGranule() const { - if (distance_function == "L2Distance") + if (distance_function == DISTANCE_FUNCTION_L2) return std::make_shared>(index.name, index.sample_block); - else if (distance_function == "cosineDistance") + else if (distance_function == DISTANCE_FUNCTION_COSINE) return std::make_shared>(index.name, index.sample_block); std::unreachable(); } @@ -299,9 +304,9 @@ MergeTreeIndexGranulePtr MergeTreeIndexAnnoy::createIndexGranule() const MergeTreeIndexAggregatorPtr MergeTreeIndexAnnoy::createIndexAggregator() const { /// TODO: Support more metrics. Available metrics: https://github.com/spotify/annoy/blob/master/src/annoymodule.cc#L151-L171 - if (distance_function == "L2Distance") + if (distance_function == DISTANCE_FUNCTION_L2) return std::make_shared>(index.name, index.sample_block, trees); - else if (distance_function == "cosineDistance") + else if (distance_function == DISTANCE_FUNCTION_COSINE) return std::make_shared>(index.name, index.sample_block, trees); std::unreachable(); } @@ -313,14 +318,11 @@ MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const Selec MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index) { - static constexpr auto default_trees = 100uz; - static constexpr auto default_distance_function = "L2Distance"; - - String distance_function = default_distance_function; + String distance_function = DEFAULT_DISTANCE_FUNCTION; if (!index.arguments.empty()) distance_function = index.arguments[0].get(); - UInt64 trees = default_trees; + UInt64 trees = DEFAULT_TREES; if (index.arguments.size() > 1) trees = index.arguments[1].get(); @@ -350,8 +352,8 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */) if (!index.arguments.empty()) { String distance_name = index.arguments[0].get(); - if (distance_name != "L2Distance" && distance_name != "cosineDistance") - throw Exception(ErrorCodes::INCORRECT_DATA, "Annoy index only supports distance functions 'L2Distance' and 'cosineDistance'"); + if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE) + throw Exception(ErrorCodes::INCORRECT_DATA, "Annoy index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE); } /// Check data type of indexed column: diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 9c34a149128..eaa695c7f7a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -256,10 +256,6 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( if (!key_columns.contains(name)) key_columns.insert(name); - ASTPtr ast_filter_node = buildFilterNode(query_info.query); - if (!ast_filter_node) - return; - if (context->getSettingsRef().allow_experimental_analyzer) { if (!query_info.filter_actions_dag) @@ -280,6 +276,10 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( } else { + ASTPtr ast_filter_node = buildFilterNode(query_info.query); + if (!ast_filter_node) + return; + if (checkASTUseless(ast_filter_node)) return; @@ -457,8 +457,10 @@ const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const Actio if (arguments_size != 1) return nullptr; + const ActionsDAG::Node * argument = &traverseDAG(*arguments[0], result_dag, context, node_to_result_node); + auto bit_swap_last_two_function = FunctionFactory::instance().get("__bitSwapLastTwo", context); - return &result_dag->addFunction(bit_swap_last_two_function, {arguments[0]}, {}); + return &result_dag->addFunction(bit_swap_last_two_function, {argument}, {}); } else if (function_name == "and" || function_name == "indexHint" || function_name == "or") { diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 5c722eec380..c9b22c8a03e 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -160,7 +160,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() size_t granularity; reader->readStrict( reinterpret_cast(plain_marks.data() + i * columns_in_mark), columns_in_mark * sizeof(MarkInCompressedFile)); - readIntBinary(granularity, *reader); + readBinaryLittleEndian(granularity, *reader); } if (!reader->eof()) @@ -170,6 +170,16 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() mrk_path, marks_count, expected_uncompressed_size); } +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + std::ranges::for_each( + plain_marks, + [](auto & plain_mark) + { + plain_mark.offset_in_compressed_file = std::byteswap(plain_mark.offset_in_compressed_file); + plain_mark.offset_in_decompressed_block = std::byteswap(plain_mark.offset_in_decompressed_block); + }); +#endif + auto res = std::make_shared(plain_marks); ProfileEvents::increment(ProfileEvents::LoadedMarksCount, marks_count * columns_in_mark); diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index cac26c5ac23..4dbccb91620 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -61,7 +61,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP { auto out = disk->writeFile(std::filesystem::path(path_prefix) / file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings); *out << "format version: 1\n" - << "create time: " << LocalDateTime(create_time) << "\n"; + << "create time: " << LocalDateTime(create_time, DateLUT::serverTimezoneInstance()) << "\n"; *out << "commands: "; commands.writeText(*out, /* with_pure_metadata_commands = */ false); *out << "\n"; diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index bce33438229..63efd137b0b 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -265,12 +265,12 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const for (const Field & field : value) applyVisitor(hashing_visitor, field); - char hash_data[16]; - hash.get128(hash_data); - result.resize(32); - for (size_t i = 0; i < 16; ++i) + const auto hash_data = getSipHash128AsArray(hash); + const auto hash_size = hash_data.size(); + result.resize(hash_size * 2); + for (size_t i = 0; i < hash_size; ++i) #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - writeHexByteLowercase(hash_data[16 - 1 - i], &result[2 * i]); + writeHexByteLowercase(hash_data[hash_size - 1 - i], &result[2 * i]); #else writeHexByteLowercase(hash_data[i], &result[2 * i]); #endif diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index fbad7d2f7be..901801d8187 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -97,7 +97,7 @@ std::future MergeTreePrefetchedReadPool::createPrefetchedRea Priority priority) const { auto reader = data_part.getReader( - columns, storage_snapshot->metadata, required_ranges, + columns, storage_snapshot, required_ranges, uncompressed_cache, mark_cache, alter_conversions, reader_settings, IMergeTreeReader::ValueSizeMap{}, profile_callback); @@ -328,7 +328,10 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf for (const auto & range : part.ranges) part_info->sum_marks += range.end - range.begin; - part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, column_names); + const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info + ? prewhere_info->prewhere_actions->getRequiredColumnsNames() + : column_names; + part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, columns); const auto task_columns = getReadTaskColumns( part_reader_info, @@ -369,9 +372,9 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf } if (prewhere_info) { - for (const auto & columns : task_columns.pre_columns) + for (const auto & cols : task_columns.pre_columns) { - for (const auto & col : columns) + for (const auto & col : cols) { const size_t col_size = part.data_part->getColumnSize(col.name).data_compressed; part_info->estimated_memory_usage_for_single_prefetch += std::min(col_size, settings.prefetch_buffer_size); diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 2ab90189f9d..896769d9355 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -73,8 +73,10 @@ MergeTreeReadPool::MergeTreeReadPool( size_t total_marks = 0; for (const auto & part : parts_ranges) { - total_compressed_bytes += getApproxSizeOfPart( - *part.data_part, prewhere_info ? prewhere_info->prewhere_actions->getRequiredColumnsNames() : column_names_); + const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info + ? prewhere_info->prewhere_actions->getRequiredColumnsNames() + : column_names_; + total_compressed_bytes += getApproxSizeOfPart(*part.data_part, columns); total_marks += part.getMarksCount(); } diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index f65e66ff52d..feefca68e7b 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes MergeTreeReaderCompact::MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, @@ -29,7 +29,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( : IMergeTreeReader( data_part_info_for_read_, columns_, - metadata_snapshot_, + storage_snapshot_, uncompressed_cache_, mark_cache_, mark_ranges_, @@ -130,7 +130,7 @@ void MergeTreeReaderCompact::fillColumnPositions() size_t columns_num = columns_to_read.size(); column_positions.resize(columns_num); - read_only_offsets.resize(columns_num); + columns_for_offsets.resize(columns_num); for (size_t i = 0; i < columns_num; ++i) { @@ -149,20 +149,48 @@ void MergeTreeReaderCompact::fillColumnPositions() position.reset(); } + /// If array of Nested column is missing in part, + /// we have to read its offsets if they exist. if (!position && is_array) { - /// If array of Nested column is missing in part, - /// we have to read its offsets if they exist. - auto position_level = findColumnForOffsets(column_to_read); - if (position_level.has_value()) + NameAndTypePair column_to_read_with_subcolumns = column_to_read; + auto [name_in_storage, subcolumn_name] = Nested::splitName(column_to_read.name); + + /// If it is a part of Nested, we need to get the column from + /// storage metatadata which is converted to Nested type with subcolumns. + /// It is required for proper counting of shared streams. + if (!subcolumn_name.empty()) { - column_positions[i].emplace(position_level->first); - read_only_offsets[i].emplace(position_level->second); + /// If column is renamed get the new name from storage metadata. + if (alter_conversions->columnHasNewName(name_in_storage)) + name_in_storage = alter_conversions->getColumnNewName(name_in_storage); + + if (!storage_columns_with_collected_nested) + { + auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects(); + auto storage_columns_list = Nested::collect(storage_snapshot->getColumns(options)); + storage_columns_with_collected_nested = ColumnsDescription(std::move(storage_columns_list)); + } + + column_to_read_with_subcolumns = storage_columns_with_collected_nested + ->getColumnOrSubcolumn( + GetColumnsOptions::All, + Nested::concatenateName(name_in_storage, subcolumn_name)); + } + + auto name_level_for_offsets = findColumnForOffsets(column_to_read_with_subcolumns); + + if (name_level_for_offsets.has_value()) + { + column_positions[i] = data_part_info_for_read->getColumnPosition(name_level_for_offsets->first); + columns_for_offsets[i] = name_level_for_offsets; partially_read_columns.insert(column_to_read.name); } } else + { column_positions[i] = std::move(position); + } } } @@ -203,7 +231,7 @@ size_t MergeTreeReaderCompact::readRows( auto & column = res_columns[pos]; size_t column_size_before_reading = column->size(); - readData(columns_to_read[pos], column, from_mark, current_task_last_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]); + readData(columns_to_read[pos], column, from_mark, current_task_last_mark, *column_positions[pos], rows_to_read, columns_for_offsets[pos]); size_t read_rows_in_column = column->size() - column_size_before_reading; if (read_rows_in_column != rows_to_read) @@ -239,23 +267,37 @@ size_t MergeTreeReaderCompact::readRows( void MergeTreeReaderCompact::readData( const NameAndTypePair & name_and_type, ColumnPtr & column, size_t from_mark, size_t current_task_last_mark, size_t column_position, size_t rows_to_read, - std::optional only_offsets_level) + ColumnNameLevel name_level_for_offsets) { const auto & [name, type] = name_and_type; + std::optional column_for_offsets; + + if (name_level_for_offsets.has_value()) + { + const auto & part_columns = data_part_info_for_read->getColumnsDescription(); + column_for_offsets = part_columns.getPhysical(name_level_for_offsets->first); + } adjustUpperBound(current_task_last_mark); /// Must go before seek. if (!isContinuousReading(from_mark, column_position)) seekToMark(from_mark, column_position); + /// If we read only offsets we have to read prefix anyway + /// to preserve correctness of serialization. + auto buffer_getter_for_prefix = [&](const auto &) -> ReadBuffer * + { + return data_buffer; + }; + auto buffer_getter = [&](const ISerialization::SubstreamPath & substream_path) -> ReadBuffer * { /// Offset stream from another column could be read, in case of current /// column does not exists (see findColumnForOffsets() in /// MergeTreeReaderCompact::fillColumnPositions()) - bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; - if (only_offsets_level.has_value()) + if (name_level_for_offsets.has_value()) { + bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (!is_offsets) return nullptr; @@ -275,7 +317,7 @@ void MergeTreeReaderCompact::readData( /// /// Here only_offsets_level is the level of the alternative stream, /// and substream_path.size() is the level of the current stream. - if (only_offsets_level.value() < ISerialization::getArrayLevel(substream_path)) + if (name_level_for_offsets->second < ISerialization::getArrayLevel(substream_path)) return nullptr; } @@ -283,22 +325,32 @@ void MergeTreeReaderCompact::readData( }; ISerialization::DeserializeBinaryBulkStatePtr state; + ISerialization::DeserializeBinaryBulkStatePtr state_for_prefix; + ISerialization::DeserializeBinaryBulkSettings deserialize_settings; - deserialize_settings.getter = buffer_getter; deserialize_settings.avg_value_size_hint = avg_value_size_hints[name]; if (name_and_type.isSubcolumn()) { - const auto & type_in_storage = name_and_type.getTypeInStorage(); - const auto & name_in_storage = name_and_type.getNameInStorage(); + NameAndTypePair name_type_in_storage{name_and_type.getNameInStorage(), name_and_type.getTypeInStorage()}; - auto serialization = getSerializationInPart({name_in_storage, type_in_storage}); - ColumnPtr temp_column = type_in_storage->createColumn(*serialization); + /// In case of reading onlys offset use the correct serialization for reading of the prefix + auto serialization = getSerializationInPart(name_type_in_storage); + ColumnPtr temp_column = name_type_in_storage.type->createColumn(*serialization); + if (column_for_offsets) + { + auto serialization_for_prefix = getSerializationInPart(*column_for_offsets); + + deserialize_settings.getter = buffer_getter_for_prefix; + serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix); + } + + deserialize_settings.getter = buffer_getter; serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state); serialization->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state, nullptr); - auto subcolumn = type_in_storage->getSubcolumn(name_and_type.getSubcolumnName(), temp_column); + auto subcolumn = name_type_in_storage.type->getSubcolumn(name_and_type.getSubcolumnName(), temp_column); /// TODO: Avoid extra copying. if (column->empty()) @@ -308,13 +360,24 @@ void MergeTreeReaderCompact::readData( } else { + /// In case of reading only offsets use the correct serialization for reading the prefix auto serialization = getSerializationInPart(name_and_type); + + if (column_for_offsets) + { + auto serialization_for_prefix = getSerializationInPart(*column_for_offsets); + + deserialize_settings.getter = buffer_getter_for_prefix; + serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix); + } + + deserialize_settings.getter = buffer_getter; serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state); serialization->deserializeBinaryBulkWithMultipleStreams(column, rows_to_read, deserialize_settings, state, nullptr); } /// The buffer is left in inconsistent state after reading single offsets - if (only_offsets_level.has_value()) + if (name_level_for_offsets.has_value()) last_read_granule.reset(); else last_read_granule.emplace(from_mark, column_position); diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h index f180d7508f7..cf706526363 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.h +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h @@ -21,7 +21,7 @@ public: MergeTreeReaderCompact( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, @@ -52,12 +52,19 @@ private: MergeTreeMarksLoader marks_loader; + /// Storage columns with collected separate arrays of Nested to columns of Nested type. + /// They maybe be needed for finding offsets of missed Nested columns in parts. + /// They are rarely used and are heavy to initialized, so we create them + /// only on demand and cache in this field. + std::optional storage_columns_with_collected_nested; + /// Positions of columns in part structure. using ColumnPositions = std::vector>; ColumnPositions column_positions; + /// Should we read full column or only it's offsets. /// Element of the vector is the level of the alternative stream. - std::vector> read_only_offsets; + std::vector columns_for_offsets; /// For asynchronous reading from remote fs. Same meaning as in MergeTreeReaderStream. std::optional last_right_offset; @@ -68,8 +75,8 @@ private: void seekToMark(size_t row_index, size_t column_index); void readData(const NameAndTypePair & name_and_type, ColumnPtr & column, size_t from_mark, - size_t current_task_last_mark, size_t column_position, size_t rows_to_read, - std::optional only_offsets_level); + size_t current_task_last_mark, size_t column_position, + size_t rows_to_read, ColumnNameLevel name_level_for_offsets); /// Returns maximal value of granule size in compressed file from @mark_ranges. /// This value is used as size of read buffer. @@ -84,7 +91,6 @@ private: ReadBufferFromFileBase::ProfileCallback profile_callback; clockid_t clock_type; - bool initialized = false; }; diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index fed8032fb17..bacd86511f5 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -19,13 +19,13 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, DataPartInMemoryPtr data_part_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_) : IMergeTreeReader( data_part_info_for_read_, columns_, - metadata_snapshot_, + storage_snapshot_, nullptr, nullptr, mark_ranges_, @@ -42,7 +42,7 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory( { if (auto offsets_position = findColumnForOffsets(column_to_read)) { - positions_for_offsets[column_to_read.name] = offsets_position->first; + positions_for_offsets[column_to_read.name] = *data_part_info_for_read->getColumnPosition(offsets_position->first); partially_read_columns.insert(column_to_read.name); } } diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.h b/src/Storages/MergeTree/MergeTreeReaderInMemory.h index cb67bc46eae..e26a98f0916 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.h +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.h @@ -18,7 +18,7 @@ public: MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, DataPartInMemoryPtr data_part_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_); diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 140fb6da5df..a0fe2dc63b4 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -24,7 +24,7 @@ namespace MergeTreeReaderWide::MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, @@ -35,7 +35,7 @@ MergeTreeReaderWide::MergeTreeReaderWide( : IMergeTreeReader( data_part_info_, columns_, - metadata_snapshot_, + storage_snapshot_, uncompressed_cache_, mark_cache_, mark_ranges_, diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index c31b1baf32e..2a850cc2814 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -17,7 +17,7 @@ public: MergeTreeReaderWide( MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_, NamesAndTypesList columns_, - const StorageMetadataPtr & metadata_snapshot_, + const StorageSnapshotPtr & storage_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index ce4ba69c08c..e5a45ad5554 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -65,7 +65,7 @@ void MergeTreeSelectAlgorithm::initializeReaders() initializeMergeTreeReadersForPart( data_part, alter_conversions, task_columns, - storage_snapshot->getMetadataForQuery(), all_mark_ranges, {}, {}); + all_mark_ranges, /*value_size_map=*/ {}, /*profile_callback=*/ {}); } diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 5a6d59bf0be..8a9faa5cee4 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -138,6 +138,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( ReadSettings read_settings; if (read_with_direct_io) read_settings.direct_io_threshold = 1; + read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; MergeTreeReaderSettings reader_settings = { @@ -150,7 +151,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( mark_ranges.emplace(MarkRanges{MarkRange(0, data_part->getMarksCount())}); reader = data_part->getReader( - columns_for_reader, storage_snapshot->metadata, + columns_for_reader, storage_snapshot, *mark_ranges, /* uncompressed_cache = */ nullptr, mark_cache.get(), alter_conversions, reader_settings, {}, {}); } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index dc24327712c..bf67b6a0f52 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -33,7 +33,7 @@ struct Settings; /** Data storing format settings. */ \ M(UInt64, min_bytes_for_wide_part, 10485760, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \ M(UInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \ - M(Float, ratio_of_defaults_for_sparse_serialization, 1.0, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ + M(Float, ratio_of_defaults_for_sparse_serialization, 0.9375f, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ @@ -119,6 +119,7 @@ struct Settings; M(Bool, detach_not_byte_identical_parts, false, "Do not remove non byte-idential parts for ReplicatedMergeTree, instead detach them (maybe useful for further analysis).", 0) \ M(UInt64, max_replicated_fetches_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \ M(UInt64, max_replicated_sends_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \ + M(Milliseconds, wait_for_unique_parts_send_before_shutdown_ms, 0, "Before shutdown table will wait for required amount time for unique parts (exist only on current replica) to be fetched by other replicas (0 means disabled).", 0) \ \ /** Check delay of replicas settings. */ \ M(UInt64, min_relative_delay_to_measure, 120, "Calculate relative replica delay only if absolute delay is not less that this value.", 0) \ @@ -160,7 +161,7 @@ struct Settings; M(UInt64, min_marks_to_honor_max_concurrent_queries, 0, "Minimal number of marks to honor the MergeTree-level's max_concurrent_queries (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \ M(UInt64, min_bytes_to_rebalance_partition_over_jbod, 0, "Minimal amount of bytes to enable part rebalance over JBOD array (0 - disabled).", 0) \ M(Bool, check_sample_column_is_correct, true, "Check columns or columns by hash for sampling are unsigned integer.", 0) \ - M(Bool, allow_vertical_merges_from_compact_to_wide_parts, false, "Allows vertical merges from compact to wide parts. This settings must have the same value on all replicas", 0) \ + M(Bool, allow_vertical_merges_from_compact_to_wide_parts, true, "Allows vertical merges from compact to wide parts. This settings must have the same value on all replicas", 0) \ M(Bool, enable_the_endpoint_id_with_zookeeper_name_prefix, false, "Enable the endpoint id with zookeeper name prefix for the replicated merge tree table", 0) \ M(UInt64, zero_copy_merge_mutation_min_parts_size_sleep_before_lock, 1ULL * 1024 * 1024 * 1024, "If zero copy replication is enabled sleep random amount of time before trying to lock depending on parts size for merge or mutation", 0) \ \ @@ -169,8 +170,9 @@ struct Settings; M(UInt64, part_moves_between_shards_delay_seconds, 30, "Time to wait before/after moving parts between shards.", 0) \ M(Bool, use_metadata_cache, false, "Experimental feature to speed up parts loading process by using MergeTree metadata cache", 0) \ M(Bool, allow_remote_fs_zero_copy_replication, false, "Don't use this setting in production, because it is not ready.", 0) \ - M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for Zero-copy table-independet info.", 0) \ + M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for zero-copy table-independent info.", 0) \ M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \ + \ /** Compress marks and primary key. */ \ M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \ M(Bool, compress_primary_key, true, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp index 01094d65ac5..892ae9ead87 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp @@ -45,8 +45,6 @@ void MergeTreeThreadSelectAlgorithm::finalizeNewTask() /// Allows pool to reduce number of threads in case of too slow reads. auto profile_callback = [this](ReadBufferFromFileBase::ProfileInfo info_) { pool->profileFeedback(info_); }; - const auto & metadata_snapshot = storage_snapshot->metadata; - IMergeTreeReader::ValueSizeMap value_size_map; if (reader && part_name != last_read_part_name) @@ -57,7 +55,7 @@ void MergeTreeThreadSelectAlgorithm::finalizeNewTask() /// task->reader.valid() means there is a prefetched reader in this test, use it. const bool init_new_readers = !reader || task->reader.valid() || part_name != last_read_part_name; if (init_new_readers) - initializeMergeTreeReadersForCurrentTask(metadata_snapshot, value_size_map, profile_callback); + initializeMergeTreeReadersForCurrentTask(value_size_map, profile_callback); last_read_part_name = part_name; } diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 1620ba98d58..466dbb45a4d 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -22,6 +22,33 @@ namespace DB /// This is used to assume that condition is likely to have good selectivity. static constexpr auto threshold = 2; +static NameToIndexMap fillNamesPositions(const Names & names) +{ + NameToIndexMap names_positions; + + for (size_t position = 0; position < names.size(); ++position) + { + const auto & name = names[position]; + names_positions[name] = position; + } + + return names_positions; +} + +/// Find minimal position of any of the column in primary key. +static Int64 findMinPosition(const NameSet & condition_table_columns, const NameToIndexMap & primary_key_positions) +{ + Int64 min_position = std::numeric_limits::max() - 1; + + for (const auto & column : condition_table_columns) + { + auto it = primary_key_positions.find(column); + if (it != primary_key_positions.end()) + min_position = std::min(min_position, static_cast(it->second)); + } + + return min_position; +} MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( std::unordered_map column_sizes_, @@ -35,6 +62,7 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( , supported_columns{supported_columns_} , sorting_key_names{NameSet( metadata_snapshot->getSortingKey().column_names.begin(), metadata_snapshot->getSortingKey().column_names.end())} + , primary_key_names_positions(fillNamesPositions(metadata_snapshot->getPrimaryKey().column_names)) , log{log_} , column_sizes{std::move(column_sizes_)} { @@ -60,6 +88,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons where_optimizer_context.context = context; where_optimizer_context.array_joined_names = determineArrayJoinedNames(select); where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere; + where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere; where_optimizer_context.is_final = select.final(); RPNBuilderTreeContext tree_context(context, std::move(block_with_constants), {} /*prepared_sets*/); @@ -89,6 +118,7 @@ std::optional MergeTreeWhe where_optimizer_context.context = context; where_optimizer_context.array_joined_names = {}; where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef().move_all_conditions_to_prewhere; + where_optimizer_context.move_primary_key_columns_to_end_of_prewhere = context->getSettingsRef().move_primary_key_columns_to_end_of_prewhere; where_optimizer_context.is_final = is_final; RPNBuilderTreeContext tree_context(context); @@ -196,7 +226,7 @@ static bool isConditionGood(const RPNBuilderTreeNode & condition, const NameSet return false; } -void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const +void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context, std::set & pk_positions) const { auto function_node_optional = node.toFunctionNodeOrNull(); @@ -207,7 +237,7 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree for (size_t i = 0; i < arguments_size; ++i) { auto argument = function_node_optional->getArgumentAt(i); - analyzeImpl(res, argument, where_optimizer_context); + analyzeImpl(res, argument, where_optimizer_context, pk_positions); } } else @@ -234,6 +264,15 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const RPNBuilderTree if (cond.viable) cond.good = isConditionGood(node, table_columns); + if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere) + { + /// Consider all conditions good with this setting enabled. + cond.good = cond.viable; + /// Find min position in PK of any column that is used in this condition. + cond.min_position_in_primary_key = findMinPosition(cond.table_columns, primary_key_names_positions); + pk_positions.emplace(cond.min_position_in_primary_key); + } + res.emplace_back(std::move(cond)); } } @@ -243,7 +282,29 @@ MergeTreeWhereOptimizer::Conditions MergeTreeWhereOptimizer::analyze(const RPNBu const WhereOptimizerContext & where_optimizer_context) const { Conditions res; - analyzeImpl(res, node, where_optimizer_context); + std::set pk_positions; + analyzeImpl(res, node, where_optimizer_context, pk_positions); + + /// E.g., if the primary key is (a, b, c) but the condition is a = 1 and c = 1, + /// we should only put (a = 1) to the tail of PREWHERE, + /// and treat (c = 1) as a normal column. + if (where_optimizer_context.move_primary_key_columns_to_end_of_prewhere) + { + Int64 min_valid_pk_pos = -1; + for (auto pk_pos : pk_positions) + { + if (pk_pos != min_valid_pk_pos + 1) + break; + min_valid_pk_pos = pk_pos; + } + for (auto & cond : res) + { + if (cond.min_position_in_primary_key > min_valid_pk_pos) + cond.min_position_in_primary_key = std::numeric_limits::max() - 1; + } + LOG_TRACE(log, "The min valid primary key position for moving to the tail of PREWHERE is {}", min_valid_pk_pos); + } + return res; } diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 18555a72db1..dd9dc803f35 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -72,9 +72,14 @@ private: /// Does the condition presumably have good selectivity? bool good = false; + /// Does the condition contain primary key column? + /// If so, it is better to move it further to the end of PREWHERE chain depending on minimal position in PK of any + /// column in this condition because this condition have bigger chances to be already satisfied by PK analysis. + Int64 min_position_in_primary_key = std::numeric_limits::max() - 1; + auto tuple() const { - return std::make_tuple(!viable, !good, columns_size, table_columns.size()); + return std::make_tuple(!viable, !good, -min_position_in_primary_key, columns_size, table_columns.size()); } /// Is condition a better candidate for moving to PREWHERE? @@ -91,6 +96,7 @@ private: ContextPtr context; NameSet array_joined_names; bool move_all_conditions_to_prewhere = false; + bool move_primary_key_columns_to_end_of_prewhere = false; bool is_final = false; }; @@ -102,7 +108,7 @@ private: std::optional optimizeImpl(const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const; - void analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const; + void analyzeImpl(Conditions & res, const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context, std::set & pk_positions) const; /// Transform conjunctions chain in WHERE expression to Conditions list. Conditions analyze(const RPNBuilderTreeNode & node, const WhereOptimizerContext & where_optimizer_context) const; @@ -141,6 +147,7 @@ private: const Names queried_columns; const std::optional supported_columns; const NameSet sorting_key_names; + const NameToIndexMap primary_key_names_positions; Poco::Logger * log; std::unordered_map column_sizes; UInt64 total_size_of_queried_columns = 0; diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 6cb9d50436e..164b541d2b8 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -154,8 +154,12 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() if (!zero_copy_lock || !zero_copy_lock->isLocked()) { + LOG_DEBUG( + log, + "Mutation of part {} started by some other replica, will wait for it and mutated merged part. Number of tries {}", + entry.new_part_name, + entry.num_tries); storage.watchZeroCopyLock(entry.new_part_name, disk); - LOG_DEBUG(log, "Mutation of part {} started by some other replica, will wait it and mutated merged part", entry.new_part_name); return PrepareResult{ .prepared_successfully = false, diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 491c36433ca..865c2a87240 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -453,6 +453,7 @@ static ExecuteTTLType shouldExecuteTTL(const StorageMetadataPtr & metadata_snaps /// Return set of indices which should be recalculated during mutation also /// wraps input stream into additional expression stream static std::set getIndicesToRecalculate( + const MergeTreeDataPartPtr & source_part, QueryPipelineBuilder & builder, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, @@ -463,10 +464,15 @@ static std::set getIndicesToRecalculate( std::set indices_to_recalc; ASTPtr indices_recalc_expr_list = std::make_shared(); const auto & indices = metadata_snapshot->getSecondaryIndices(); + bool is_full_part_storage = isFullPartStorage(source_part->getDataPartStorage()); for (const auto & index : indices) { - if (materialized_indices.contains(index.name)) + bool need_recalculate = + materialized_indices.contains(index.name) + || (!is_full_part_storage && source_part->hasSecondaryIndex(index.name)); + + if (need_recalculate) { if (indices_to_recalc.insert(index_factory.get(index)).second) { @@ -496,15 +502,23 @@ static std::set getIndicesToRecalculate( } static std::set getProjectionsToRecalculate( + const MergeTreeDataPartPtr & source_part, const StorageMetadataPtr & metadata_snapshot, const NameSet & materialized_projections) { std::set projections_to_recalc; + bool is_full_part_storage = isFullPartStorage(source_part->getDataPartStorage()); + for (const auto & projection : metadata_snapshot->getProjections()) { - if (materialized_projections.contains(projection.name)) + bool need_recalculate = + materialized_projections.contains(projection.name) + || (!is_full_part_storage && source_part->hasProjection(projection.name)); + + if (need_recalculate) projections_to_recalc.insert(&projection); } + return projections_to_recalc; } @@ -1279,14 +1293,20 @@ private: removed_indices.insert(command.column_name); } + bool is_full_part_storage = isFullPartStorage(ctx->new_data_part->getDataPartStorage()); const auto & indices = ctx->metadata_snapshot->getSecondaryIndices(); + MergeTreeIndices skip_indices; for (const auto & idx : indices) { if (removed_indices.contains(idx.name)) continue; - if (ctx->materialized_indices.contains(idx.name)) + bool need_recalculate = + ctx->materialized_indices.contains(idx.name) + || (!is_full_part_storage && ctx->source_part->hasSecondaryIndex(idx.name)); + + if (need_recalculate) { skip_indices.push_back(MergeTreeIndexFactory::instance().get(idx)); } @@ -1319,7 +1339,11 @@ private: if (removed_projections.contains(projection.name)) continue; - if (ctx->materialized_projections.contains(projection.name)) + bool need_recalculate = + ctx->materialized_projections.contains(projection.name) + || (!is_full_part_storage && ctx->source_part->hasProjection(projection.name)); + + if (need_recalculate) { ctx->projections_to_build.push_back(&projection); } @@ -1841,6 +1865,7 @@ bool MutateTask::prepare() context_for_reading->setSetting("max_threads", 1); context_for_reading->setSetting("allow_asynchronous_read_from_io_pool_for_merge_tree", false); context_for_reading->setSetting("max_streams_for_merge_tree_reading", Field(0)); + context_for_reading->setSetting("read_from_filesystem_cache_if_exists_otherwise_bypass_cache", 1); MutationHelpers::splitAndModifyMutationCommands( ctx->source_part, ctx->metadata_snapshot, @@ -1920,9 +1945,16 @@ bool MutateTask::prepare() else /// TODO: check that we modify only non-key columns in this case. { ctx->indices_to_recalc = MutationHelpers::getIndicesToRecalculate( - ctx->mutating_pipeline_builder, ctx->metadata_snapshot, ctx->context, ctx->materialized_indices); + ctx->source_part, + ctx->mutating_pipeline_builder, + ctx->metadata_snapshot, + ctx->context, + ctx->materialized_indices); - ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate(ctx->metadata_snapshot, ctx->materialized_projections); + ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate( + ctx->source_part, + ctx->metadata_snapshot, + ctx->materialized_projections); ctx->files_to_skip = MutationHelpers::collectFilesToSkip( ctx->source_part, diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index 3b382b7b32d..76b8080f64c 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -684,7 +684,7 @@ CancellationCode PartMovesBetweenShardsOrchestrator::killPartMoveToShard(const U continue; } else - throw Coordination::Exception(code, entry.znode_path); + throw Coordination::Exception::fromPath(code, entry.znode_path); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp index b4748ee77ea..6ad77119016 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp @@ -174,7 +174,7 @@ bool ReplicatedMergeMutateTaskBase::executeImpl() part_log_writer = prepare_result.part_log_writer; - /// Avoid resheduling, execute fetch here, in the same thread. + /// Avoid rescheduling, execute fetch here, in the same thread. if (!prepare_result.prepared_successfully) return execute_fetch(prepare_result.need_to_check_missing_part_in_fetch); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index bcc4dc749fb..b72c148a4e8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -149,8 +149,7 @@ Float32 ReplicatedMergeTreeCleanupThread::iterate() /// do it under share lock cleaned_other += storage.clearOldWriteAheadLogs(); cleaned_part_like += storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds()); - if (storage.getSettings()->merge_tree_enable_clear_old_broken_detached) - cleaned_part_like += storage.clearOldBrokenPartsFromDetachedDirectory(); + cleaned_part_like += storage.clearOldBrokenPartsFromDetachedDirectory(); } /// This is loose condition: no problem if we actually had lost leadership at this moment @@ -492,7 +491,7 @@ size_t ReplicatedMergeTreeCleanupThread::clearOldBlocks(const String & blocks_di } else { - LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", path, Coordination::errorMessage(rc)); + LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", path, rc); } first_outdated_block++; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index ac956433eab..9eb8b6ce24c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -48,7 +48,7 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const format_version = std::max(format_version, FORMAT_WITH_LOG_ENTRY_ID); out << "format version: " << format_version << "\n" - << "create_time: " << LocalDateTime(create_time ? create_time : time(nullptr)) << "\n" + << "create_time: " << LocalDateTime(create_time ? create_time : time(nullptr), DateLUT::serverTimezoneInstance()) << "\n" << "source replica: " << source_replica << '\n' << "block_id: " << escape << block_id << '\n'; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp index 1bbb246338c..e2c23ecfe85 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMutationEntry.cpp @@ -12,7 +12,7 @@ namespace DB void ReplicatedMergeTreeMutationEntry::writeText(WriteBuffer & out) const { out << "format version: 1\n" - << "create time: " << LocalDateTime(create_time ? create_time : time(nullptr)) << "\n" + << "create time: " << LocalDateTime(create_time ? create_time : time(nullptr), DateLUT::serverTimezoneInstance()) << "\n" << "source replica: " << source_replica << "\n" << "block numbers count: " << block_numbers.size() << "\n"; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp index 88f4a3ec66f..24d907dbad6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartHeader.cpp @@ -12,9 +12,7 @@ static std::array getSipHash(const String & str) { SipHash hash; hash.update(str.data(), str.size()); - std::array result; - hash.get128(result.data()); - return result; + return getSipHash128AsArray(hash); } ReplicatedMergeTreePartHeader ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes( diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 07f46c07466..84307a3ca7a 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -497,7 +497,7 @@ void ReplicatedMergeTreeQueue::updateTimesInZooKeeper( if (code != Coordination::Error::ZOK) LOG_ERROR(log, "Couldn't set value of nodes for insert times " "({}/min_unprocessed_insert_time, max_processed_insert_time): {}. " - "This shouldn't happen often.", replica_path, Coordination::errorMessage(code)); + "This shouldn't happen often.", replica_path, code); } } @@ -554,7 +554,7 @@ void ReplicatedMergeTreeQueue::removeProcessedEntry(zkutil::ZooKeeperPtr zookeep auto code = zookeeper->tryRemove(fs::path(replica_path) / "queue" / entry->znode_name); if (code != Coordination::Error::ZOK) - LOG_ERROR(log, "Couldn't remove {}/queue/{}: {}. This shouldn't happen often.", replica_path, entry->znode_name, Coordination::errorMessage(code)); + LOG_ERROR(log, "Couldn't remove {}/queue/{}: {}. This shouldn't happen often.", replica_path, entry->znode_name, code); updateTimesInZooKeeper(zookeeper, min_unprocessed_insert_time_changed, max_processed_insert_time_changed); } @@ -576,7 +576,7 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper /// It's ok if replica became readonly due to connection loss after we got current zookeeper (in this case zookeeper must be expired). /// And it's ok if replica became readonly after shutdown. /// In other cases it's likely that someone called pullLogsToQueue(...) when queue is not initialized yet by RestartingThread. - bool not_completely_initialized = storage.is_readonly && !zookeeper->expired() && !storage.shutdown_called; + bool not_completely_initialized = storage.is_readonly && !zookeeper->expired() && !storage.shutdown_prepared_called; if (not_completely_initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "Tried to pull logs to queue (reason: {}) on readonly replica {}, it's a bug", reason, storage.getStorageID().getNameForLogs()); @@ -1147,7 +1147,7 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange( auto code = zookeeper->tryRemove(fs::path(replica_path) / "queue" / znode_name); if (code != Coordination::Error::ZOK) - LOG_INFO(log, "Couldn't remove {}: {}", (fs::path(replica_path) / "queue" / znode_name).string(), Coordination::errorMessage(code)); + LOG_INFO(log, "Couldn't remove {}: {}", (fs::path(replica_path) / "queue" / znode_name).string(), code); updateStateOnQueueEntryRemoval( *it, /* is_successful = */ false, @@ -1370,13 +1370,27 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( if (data_settings->allow_remote_fs_zero_copy_replication) { auto disks = storage.getDisks(); - bool only_s3_storage = true; + DiskPtr disk_with_zero_copy = nullptr; for (const auto & disk : disks) - if (!disk->supportZeroCopyReplication()) - only_s3_storage = false; + { + if (disk->supportZeroCopyReplication()) + { + disk_with_zero_copy = disk; + break; + } + } + /// Technically speaking if there are more than one disk that could store the part (a local hot + cloud cold) + /// It would be possible for the merge to happen concurrently with other replica if the other replica is doing + /// a merge using zero-copy and the cloud storage, and the local replica uses the local storage instead + /// The question is, is it worth keep retrying to do the merge over and over for the opportunity to do + /// double the work? Probably not + /// So what we do is that, even if hot merge could happen, check the zero copy lock anyway. + /// Keep in mind that for the zero copy lock check to happen (via existing_zero_copy_locks) we need to + /// have failed first because of it and added it via watchZeroCopyLock. Considering we've already tried to + /// use cloud storage and zero-copy replication, the most likely scenario is that we'll try again String replica_to_execute_merge; - if (!disks.empty() && only_s3_storage && storage.checkZeroCopyLockExists(entry.new_part_name, disks[0], replica_to_execute_merge)) + if (disk_with_zero_copy && storage.checkZeroCopyLockExists(entry.new_part_name, disk_with_zero_copy, replica_to_execute_merge)) { constexpr auto fmt_string = "Not executing merge/mutation for the part {}, waiting for {} to execute it and will fetch after."; out_postpone_reason = fmt::format(fmt_string, entry.new_part_name, replica_to_execute_merge); @@ -1789,7 +1803,7 @@ std::map ReplicatedMergeTreeQueue::getAlterMutationCo Int64 part_data_version = part->info.getDataVersion(); Int64 part_metadata_version = part->getMetadataVersion(); - LOG_DEBUG(log, "Looking for mutations for part {} (part data version {}, part metadata version {})", part->name, part_data_version, part_metadata_version); + LOG_TEST(log, "Looking for mutations for part {} (part data version {}, part metadata version {})", part->name, part_data_version, part_metadata_version); std::map result; /// Here we return mutation commands for part which has bigger alter version than part metadata version. @@ -2155,7 +2169,7 @@ CommittingBlocks BaseMergePredicate::getCommitti { auto & response = locks_children[i]; if (response.error != Coordination::Error::ZOK && !partition_ids_hint) - throw Coordination::Exception(response.error, paths[i]); + throw Coordination::Exception::fromPath(response.error, paths[i]); if (response.error != Coordination::Error::ZOK) { @@ -2231,7 +2245,7 @@ bool BaseMergePredicate::operator()( const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, const MergeTreeTransaction *, - String * out_reason) const + String & out_reason) const { if (left) return canMergeTwoParts(left, right, out_reason); @@ -2243,7 +2257,7 @@ template bool BaseMergePredicate::canMergeTwoParts( const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, - String * out_reason) const + String & out_reason) const { /// A sketch of a proof of why this method actually works: /// @@ -2287,22 +2301,19 @@ bool BaseMergePredicate::canMergeTwoParts( { if (pinned_part_uuids_ && pinned_part_uuids_->part_uuids.contains(part->uuid)) { - if (out_reason) - *out_reason = "Part " + part->name + " has uuid " + toString(part->uuid) + " which is currently pinned"; + out_reason = "Part " + part->name + " has uuid " + toString(part->uuid) + " which is currently pinned"; return false; } if (inprogress_quorum_part_ && part->name == *inprogress_quorum_part_) { - if (out_reason) - *out_reason = "Quorum insert for part " + part->name + " is currently in progress"; + out_reason = "Quorum insert for part " + part->name + " is currently in progress"; return false; } if (prev_virtual_parts_ && prev_virtual_parts_->getContainingPart(part->info).empty()) { - if (out_reason) - *out_reason = "Entry for part " + part->name + " hasn't been read from the replication log yet"; + out_reason = "Entry for part " + part->name + " hasn't been read from the replication log yet"; return false; } } @@ -2316,8 +2327,7 @@ bool BaseMergePredicate::canMergeTwoParts( { if (partition_ids_hint && !partition_ids_hint->contains(left->info.partition_id)) { - if (out_reason) - *out_reason = fmt::format("Uncommitted block were not loaded for unexpected partition {}", left->info.partition_id); + out_reason = fmt::format("Uncommitted block were not loaded for unexpected partition {}", left->info.partition_id); return false; } @@ -2329,10 +2339,8 @@ bool BaseMergePredicate::canMergeTwoParts( auto block_it = block_numbers.upper_bound(left_max_block); if (block_it != block_numbers.end() && *block_it < right_min_block) { - if (out_reason) - *out_reason = "Block number " + toString(*block_it) + " is still being inserted between parts " - + left->name + " and " + right->name; - + out_reason = "Block number " + toString(*block_it) + " is still being inserted between parts " + + left->name + " and " + right->name; return false; } } @@ -2351,8 +2359,7 @@ bool BaseMergePredicate::canMergeTwoParts( String containing_part = virtual_parts_->getContainingPart(part->info); if (containing_part != part->name) { - if (out_reason) - *out_reason = "Part " + part->name + " has already been assigned a merge into " + containing_part; + out_reason = "Part " + part->name + " has already been assigned a merge into " + containing_part; return false; } } @@ -2369,10 +2376,9 @@ bool BaseMergePredicate::canMergeTwoParts( Strings covered = virtual_parts_->getPartsCoveredBy(gap_part_info); if (!covered.empty()) { - if (out_reason) - *out_reason = "There are " + toString(covered.size()) + " parts (from " + covered.front() - + " to " + covered.back() + ") that are still not present or being processed by " - + " other background process on this replica between " + left->name + " and " + right->name; + out_reason = "There are " + toString(covered.size()) + " parts (from " + covered.front() + + " to " + covered.back() + ") that are still not present or being processed by " + + " other background process on this replica between " + left->name + " and " + right->name; return false; } } @@ -2388,9 +2394,8 @@ bool BaseMergePredicate::canMergeTwoParts( if (left_mutation_ver != right_mutation_ver) { - if (out_reason) - *out_reason = "Current mutation versions of parts " + left->name + " and " + right->name + " differ: " - + toString(left_mutation_ver) + " and " + toString(right_mutation_ver) + " respectively"; + out_reason = "Current mutation versions of parts " + left->name + " and " + right->name + " differ: " + + toString(left_mutation_ver) + " and " + toString(right_mutation_ver) + " respectively"; return false; } } @@ -2401,26 +2406,23 @@ bool BaseMergePredicate::canMergeTwoParts( template bool BaseMergePredicate::canMergeSinglePart( const MergeTreeData::DataPartPtr & part, - String * out_reason) const + String & out_reason) const { if (pinned_part_uuids_ && pinned_part_uuids_->part_uuids.contains(part->uuid)) { - if (out_reason) - *out_reason = fmt::format("Part {} has uuid {} which is currently pinned", part->name, part->uuid); + out_reason = fmt::format("Part {} has uuid {} which is currently pinned", part->name, part->uuid); return false; } if (inprogress_quorum_part_ && part->name == *inprogress_quorum_part_) { - if (out_reason) - *out_reason = fmt::format("Quorum insert for part {} is currently in progress", part->name); + out_reason = fmt::format("Quorum insert for part {} is currently in progress", part->name); return false; } if (prev_virtual_parts_ && prev_virtual_parts_->getContainingPart(part->info).empty()) { - if (out_reason) - *out_reason = fmt::format("Entry for part {} hasn't been read from the replication log yet", part->name); + out_reason = fmt::format("Entry for part {} hasn't been read from the replication log yet", part->name); return false; } @@ -2435,8 +2437,7 @@ bool BaseMergePredicate::canMergeSinglePart( String containing_part = virtual_parts_->getContainingPart(part->info); if (containing_part != part->name) { - if (out_reason) - *out_reason = fmt::format("Part {} has already been assigned a merge into {}", part->name, containing_part); + out_reason = fmt::format("Part {} has already been assigned a merge into {}", part->name, containing_part); return false; } } @@ -2445,7 +2446,7 @@ bool BaseMergePredicate::canMergeSinglePart( } -bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, String * out_reason) const +bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, String & out_reason) const { std::lock_guard lock(queue.state_mutex); for (const auto & entry : queue.queue) @@ -2458,9 +2459,7 @@ bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const Mer if (part->info.isDisjoint(MergeTreePartInfo::fromPartName(part_name, queue.format_version))) continue; - if (out_reason) - *out_reason = fmt::format("Part {} participates in REPLACE_RANGE {} ({})", part_name, entry->new_part_name, entry->znode_name); - + out_reason = fmt::format("Part {} participates in REPLACE_RANGE {} ({})", part_name, entry->new_part_name, entry->znode_name); return true; } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index f205526a660..611866877d8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -505,19 +505,19 @@ public: bool operator()(const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, const MergeTreeTransaction * txn, - String * out_reason = nullptr) const; + String & out_reason) const; /// Can we assign a merge with these two parts? /// (assuming that no merge was assigned after the predicate was constructed) /// If we can't and out_reason is not nullptr, set it to the reason why we can't merge. bool canMergeTwoParts(const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, - String * out_reason = nullptr) const; + String & out_reason) const; /// Can we assign a merge this part and some other part? /// For example a merge of a part and itself is needed for TTL. /// This predicate is checked for the first part of each range. - bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String * out_reason) const; + bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String & out_reason) const; CommittingBlocks getCommittingBlocks(zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, Poco::Logger * log_); @@ -561,7 +561,7 @@ public: /// Returns true if part is needed for some REPLACE_RANGE entry. /// We should not drop part in this case, because replication queue may stuck without that part. - bool partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, String * out_reason) const; + bool partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, String & out_reason) const; /// Return nonempty optional of desired mutation version and alter version. /// If we have no alter (modify/drop) mutations in mutations queue, than we return biggest possible diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index d7166b4a3b9..79054ef46da 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -329,7 +329,7 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() void ReplicatedMergeTreeRestartingThread::partialShutdown(bool part_of_full_shutdown) { - setReadonly(part_of_full_shutdown); + setReadonly(/* on_shutdown = */ part_of_full_shutdown); storage.partialShutdown(); } @@ -339,10 +339,15 @@ void ReplicatedMergeTreeRestartingThread::shutdown(bool part_of_full_shutdown) /// Stop restarting_thread before stopping other tasks - so that it won't restart them again. need_stop = true; task->deactivate(); + + /// Explicitly set the event, because the restarting thread will not set it again + if (part_of_full_shutdown) + storage.startup_event.set(); + LOG_TRACE(log, "Restarting thread finished"); - /// Stop other tasks. - partialShutdown(part_of_full_shutdown); + setReadonly(part_of_full_shutdown); + } void ReplicatedMergeTreeRestartingThread::setReadonly(bool on_shutdown) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 9e99baab4c3..02103272a1f 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -25,6 +26,7 @@ public: void start(bool schedule = true) { + LOG_TRACE(log, "Starting restating thread, schedule: {}", schedule); if (schedule) task->activateAndSchedule(); else @@ -36,6 +38,7 @@ public: void shutdown(bool part_of_full_shutdown); void run(); + private: StorageReplicatedMergeTree & storage; String log_name; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 22e2ab945eb..d5b23b14524 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -78,7 +78,7 @@ struct ReplicatedMergeTreeSinkImpl::DelayedChunk unmerged_block_with_partition(std::move(unmerged_block_with_partition_)), part_counters(std::move(part_counters_)) { - initBlockIDMap(); + initBlockIDMap(); } void initBlockIDMap() @@ -209,8 +209,8 @@ std::vector testSelfDeduplicate(std::vector data, std::vectorinsert(datum); } Block block({ColumnWithTypeAndName(std::move(column), DataTypePtr(new DataTypeInt64()), "a")}); - - BlockWithPartition block1(std::move(block), Row(), std::move(offsets)); + std::vector tokens(offsets.size()); + BlockWithPartition block1(std::move(block), Row(), std::move(offsets), std::move(tokens)); ProfileEvents::Counters profile_counters; ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition part( &Poco::Logger::get("testSelfDeduplicate"), MergeTreeDataWriter::TemporaryPart(), 0, std::move(hashes), std::move(block1), std::nullopt, std::move(profile_counters)); @@ -242,22 +242,24 @@ namespace size_t start = 0; auto cols = block.block.getColumns(); std::vector block_id_vec; - for (auto offset : block.offsets) + for (size_t i = 0; i < block.offsets.size(); ++i) { - SipHash hash; - for (size_t i = start; i < offset; ++i) + size_t offset = block.offsets[i]; + std::string_view token = block.tokens[i]; + if (token.empty()) { - for (const auto & col : cols) - col->updateHashWithValue(i, hash); - } - union - { - char bytes[16]; - UInt64 words[2]; - } hash_value; - hash.get128(hash_value.bytes); + SipHash hash; + for (size_t j = start; j < offset; ++j) + { + for (const auto & col : cols) + col->updateHashWithValue(j, hash); + } - block_id_vec.push_back(partition_id + "_" + DB::toString(hash_value.words[0]) + "_" + DB::toString(hash_value.words[1])); + const auto hash_value = hash.get128(); + block_id_vec.push_back(partition_id + "_" + DB::toString(hash_value.items[0]) + "_" + DB::toString(hash_value.items[1])); + } + else + block_id_vec.push_back(partition_id + "_" + std::string(token)); start = offset; } @@ -344,7 +346,7 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const if (active_replicas < quorum_size) { if (Coordination::isHardwareError(keeper_error)) - throw Coordination::Exception("Failed to check number of alive replicas", keeper_error); + throw Coordination::Exception::fromMessage(keeper_error, "Failed to check number of alive replicas"); throw Exception(ErrorCodes::TOO_FEW_LIVE_REPLICAS, "Number of alive replicas ({}) is less than requested quorum ({}/{}).", active_replicas, quorum_size, replicas_number); @@ -418,18 +420,18 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) convertDynamicColumnsToTuples(block, storage_snapshot); - ChunkOffsetsPtr chunk_offsets; + AsyncInsertInfoPtr async_insert_info; if constexpr (async_insert) { const auto & chunk_info = chunk.getChunkInfo(); - if (const auto * chunk_offsets_ptr = typeid_cast(chunk_info.get())) - chunk_offsets = std::make_shared(chunk_offsets_ptr->offsets); + if (const auto * async_insert_info_ptr = typeid_cast(chunk_info.get())) + async_insert_info = std::make_shared(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens); else throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } - auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context, chunk_offsets); + auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; using DelayedPartitions = std::vector; @@ -453,7 +455,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) { /// we copy everything but offsets which we move because they are only used by async insert if (settings.optimize_on_insert && storage.writer.getMergingMode() != MergeTreeData::MergingParams::Mode::Ordinary) - unmerged_block.emplace(Block(current_block.block), Row(current_block.partition), std::move(current_block.offsets)); + unmerged_block.emplace(Block(current_block.block), Row(current_block.partition), std::move(current_block.offsets), std::move(current_block.tokens)); } /// Write part to the filesystem under temporary name. Calculate a checksum. @@ -468,7 +470,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if constexpr (async_insert) { - /// TODO consider insert_deduplication_token block_id = getHashesForBlocks(unmerged_block.has_value() ? *unmerged_block : current_block, temp_part.part->info.partition_id); LOG_TRACE(log, "async insert part, part id {}, block id {}, offsets {}, size {}", temp_part.part->info.partition_id, toString(block_id), toString(current_block.offsets), current_block.offsets.size()); } @@ -627,8 +628,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithFa delayed_chunk.reset(); } -template -void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::MutableDataPartPtr & part) +template<> +bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::MutableDataPartPtr & part) { /// NOTE: No delay in this case. That's Ok. auto origin_zookeeper = storage.getZooKeeper(); @@ -643,8 +644,13 @@ void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData: try { part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - commitPart(zookeeper, part, BlockIDsType(), replicas_num, true); - PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot())); + String block_id = deduplicate ? fmt::format("{}_{}", part->info.partition_id, part->checksums.getTotalChecksumHex()) : ""; + bool deduplicated = commitPart(zookeeper, part, block_id, replicas_num, /* writing_existing_part */ true).second; + + /// Set a special error code if the block is duplicate + int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; + PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot()), ExecutionStatus(error)); + return deduplicated; } catch (...) { @@ -723,7 +729,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: retries_ctl.setUserError( ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Insert failed due to zookeeper error. Please retry. Reason: {}", - Coordination::errorMessage(write_part_info_keeper_error)); + write_part_info_keeper_error); } retries_ctl.stopRetries(); @@ -1033,7 +1039,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: retries_ctl.setUserError( ErrorCodes::UNKNOWN_STATUS_OF_INSERT, "Unknown status, client must retry. Reason: {}", - Coordination::errorMessage(multi_code)); + multi_code); return; } else if (Coordination::isUserError(multi_code)) @@ -1109,7 +1115,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: "Unexpected logical error while adding block {} with ID '{}': {}, path {}", block_number, toString(block_id), - Coordination::errorMessage(multi_code), + multi_code, failed_op_path); } } @@ -1122,7 +1128,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: "Unexpected ZooKeeper error while adding block {} with ID '{}': {}", block_number, toString(block_id), - Coordination::errorMessage(multi_code)); + multi_code); } }, [&zookeeper]() { zookeeper->cleanupEphemeralNodes(); }); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 868590efa25..4a192a822f5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -56,7 +56,7 @@ public: String getName() const override { return "ReplicatedMergeTreeSink"; } /// For ATTACHing existing data on filesystem. - void writeExistingPart(MergeTreeData::MutableDataPartPtr & part); + bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); /// For proper deduplication in MaterializedViews bool lastBlockIsDuplicate() const override diff --git a/src/Storages/MergeTree/ZooKeeperRetries.h b/src/Storages/MergeTree/ZooKeeperRetries.h index e55b04c27b3..e46c3f974c7 100644 --- a/src/Storages/MergeTree/ZooKeeperRetries.h +++ b/src/Storages/MergeTree/ZooKeeperRetries.h @@ -72,7 +72,7 @@ public: if (!Coordination::isHardwareError(e.code)) throw; - setKeeperError(e.code, e.message()); + setKeeperError(std::current_exception(), e.code, e.message()); } catch (...) { @@ -91,16 +91,16 @@ public: } catch (const zkutil::KeeperException & e) { - setKeeperError(e.code, e.message()); + setKeeperError(std::current_exception(), e.code, e.message()); } catch (const Exception & e) { - setUserError(e.code(), e.what()); + setUserError(std::current_exception(), e.code(), e.what()); } return false; } - void setUserError(int code, std::string message) + void setUserError(std::exception_ptr exception, int code, std::string message) { if (retries_info.logger) LOG_TRACE( @@ -113,16 +113,28 @@ public: iteration_succeeded = false; user_error.code = code; user_error.message = std::move(message); + user_error.exception = exception; keeper_error = KeeperError{}; } + template + void setUserError(std::exception_ptr exception, int code, fmt::format_string fmt, Args &&... args) + { + setUserError(exception, code, fmt::format(fmt, std::forward(args)...)); + } + + void setUserError(int code, std::string message) + { + setUserError(std::make_exception_ptr(Exception::createDeprecated(message, code)), code, message); + } + template void setUserError(int code, fmt::format_string fmt, Args &&... args) { setUserError(code, fmt::format(fmt, std::forward(args)...)); } - void setKeeperError(Coordination::Error code, std::string message) + void setKeeperError(std::exception_ptr exception, Coordination::Error code, std::string message) { if (retries_info.logger) LOG_TRACE( @@ -135,9 +147,21 @@ public: iteration_succeeded = false; keeper_error.code = code; keeper_error.message = std::move(message); + keeper_error.exception = exception; user_error = UserError{}; } + template + void setKeeperError(std::exception_ptr exception, Coordination::Error code, fmt::format_string fmt, Args &&... args) + { + setKeeperError(exception, code, fmt::format(fmt, std::forward(args)...)); + } + + void setKeeperError(Coordination::Error code, std::string message) + { + setKeeperError(std::make_exception_ptr(zkutil::KeeperException::createDeprecated(message, code)), code, message); + } + template void setKeeperError(Coordination::Error code, fmt::format_string fmt, Args &&... args) { @@ -163,12 +187,14 @@ private: using Code = Coordination::Error; Code code = Code::ZOK; std::string message; + std::exception_ptr exception; }; struct UserError { int code = ErrorCodes::OK; std::string message; + std::exception_ptr exception; }; bool canTry() @@ -232,11 +258,11 @@ private: void throwIfError() const { - if (user_error.code != ErrorCodes::OK) - throw Exception::createDeprecated(user_error.message, user_error.code); + if (user_error.exception) + std::rethrow_exception(user_error.exception); - if (keeper_error.code != KeeperError::Code::ZOK) - throw zkutil::KeeperException(keeper_error.message, keeper_error.code); + if (keeper_error.exception) + std::rethrow_exception(keeper_error.exception); } void logLastError(std::string_view header) diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 1967357a840..71229f6656e 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -201,20 +201,20 @@ static IMergeTreeDataPart::Checksums checkDataPart( continue; auto checksum_it = checksums_data.files.find(file_name); - /// Skip files that we already calculated. Also skip metadata files that are not checksummed. if (checksum_it == checksums_data.files.end() && !files_without_checksums.contains(file_name)) { auto txt_checksum_it = checksums_txt_files.find(file_name); - if (txt_checksum_it == checksums_txt_files.end() || txt_checksum_it->second.uncompressed_size == 0) + if ((txt_checksum_it != checksums_txt_files.end() && txt_checksum_it->second.is_compressed)) + { + /// If we have both compressed and uncompressed in txt or its .cmrk(2/3) or .cidx, then calculate them + checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); + } + else { /// The file is not compressed. checksum_file(file_name); } - else /// If we have both compressed and uncompressed in txt, then calculate them - { - checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name); - } } } diff --git a/src/Storages/MergeTree/localBackup.cpp b/src/Storages/MergeTree/localBackup.cpp index 6faacf3c066..4adde07da97 100644 --- a/src/Storages/MergeTree/localBackup.cpp +++ b/src/Storages/MergeTree/localBackup.cpp @@ -125,9 +125,6 @@ void localBackup( size_t try_no = 0; const size_t max_tries = 10; - CleanupOnFail cleanup(disk_transaction ? std::function([]{}) : - [disk, destination_path]() { disk->removeRecursive(destination_path); }); - /** Files in the directory can be permanently added and deleted. * If some file is deleted during an attempt to make a backup, then try again, * because it's important to take into account any new files that might appear. @@ -136,10 +133,30 @@ void localBackup( { try { - if (copy_instead_of_hardlinks && !disk_transaction) - disk->copyDirectoryContent(source_path, disk, destination_path); - else + if (disk_transaction) + { localBackupImpl(disk, disk_transaction.get(), source_path, destination_path, make_source_readonly, 0, max_level, copy_instead_of_hardlinks, files_to_copy_intead_of_hardlinks); + } + else if (copy_instead_of_hardlinks) + { + CleanupOnFail cleanup([disk, destination_path]() { disk->removeRecursive(destination_path); }); + disk->copyDirectoryContent(source_path, disk, destination_path); + cleanup.success(); + } + else + { + std::function cleaner; + if (disk->supportZeroCopyReplication()) + /// Note: this code will create garbage on s3. We should always remove `copy_instead_of_hardlinks` files. + /// The third argument should be a list of exceptions, but (looks like) it is ignored for keep_all_shared_data = true. + cleaner = [disk, destination_path]() { disk->removeSharedRecursive(destination_path, /*keep_all_shared_data*/ true, {}); }; + else + cleaner = [disk, destination_path]() { disk->removeRecursive(destination_path); }; + + CleanupOnFail cleanup(std::move(cleaner)); + localBackupImpl(disk, disk_transaction.get(), source_path, destination_path, make_source_readonly, 0, max_level, false, files_to_copy_intead_of_hardlinks); + cleanup.success(); + } } catch (const DB::ErrnoException & e) { @@ -166,8 +183,6 @@ void localBackup( break; } - - cleanup.success(); } } diff --git a/src/Storages/MergeTree/tests/gtest_async_inserts.cpp b/src/Storages/MergeTree/tests/gtest_async_inserts.cpp index f67c2f7fb0f..2d8cd0acc3e 100644 --- a/src/Storages/MergeTree/tests/gtest_async_inserts.cpp +++ b/src/Storages/MergeTree/tests/gtest_async_inserts.cpp @@ -8,7 +8,7 @@ namespace DB { -std::vector scatterOffsetsBySelector(ChunkOffsetsPtr chunk_offsets, const IColumn::Selector & selector, size_t partition_num); +std::vector scatterAsyncInsertInfoBySelector(AsyncInsertInfoPtr chunk_offsets, const IColumn::Selector & selector, size_t partition_num); class AsyncInsertsTest : public ::testing::TestPartResult {}; @@ -16,31 +16,36 @@ class AsyncInsertsTest : public ::testing::TestPartResult TEST(AsyncInsertsTest, testScatterOffsetsBySelector) { - auto test_impl = [](std::vector offsets, std::vector selector_data, size_t part_num, std::vector> expected) + auto test_impl = [](std::vector offsets, std::vector selector_data, std::vector tokens, size_t part_num, std::vector>> expected) { - auto offset_ptr = std::make_shared(offsets); + auto offset_ptr = std::make_shared(offsets, tokens); IColumn::Selector selector(selector_data.size()); size_t num_rows = selector_data.size(); for (size_t i = 0; i < num_rows; i++) selector[i] = selector_data[i]; - auto results = scatterOffsetsBySelector(offset_ptr, selector, part_num); + auto results = scatterAsyncInsertInfoBySelector(offset_ptr, selector, part_num); ASSERT_EQ(results.size(), expected.size()); for (size_t i = 0; i < results.size(); i++) { - auto result = results[i]->offsets; + auto result = results[i]; auto expect = expected[i]; - ASSERT_EQ(result.size(), expect.size()); - for (size_t j = 0; j < result.size(); j++) - ASSERT_EQ(result[j], expect[j]); + ASSERT_EQ(result->offsets.size(), expect.size()); + ASSERT_EQ(result->tokens.size(), expect.size()); + for (size_t j = 0; j < expect.size(); j++) + { + ASSERT_EQ(result->offsets[j], std::get<0>(expect[j])); + ASSERT_EQ(result->tokens[j], std::get<1>(expect[j])); + } } }; - test_impl({5}, {0,1,0,1,0}, 2, {{3},{2}}); - test_impl({5,10}, {0,1,0,1,0,1,0,1,0,1}, 2, {{3,5},{2,5}}); - test_impl({4,8,12}, {0,1,0,1,0,2,0,2,1,2,1,2}, 3, {{2,4},{2,4},{2,4}}); - test_impl({1,2,3,4,5}, {0,1,2,3,4}, 5, {{1},{1},{1},{1},{1}}); - test_impl({3,6,10}, {1,1,1,2,2,2,0,0,0,0}, 3, {{4},{3},{3}}); + test_impl({1}, {0}, {"a"}, 1, {{{1,"a"}}}); + test_impl({5}, {0,1,0,1,0}, {"a"}, 2, {{{3,"a"}},{{2,"a"}}}); + test_impl({5,10}, {0,1,0,1,0,1,0,1,0,1}, {"a", "b"}, 2, {{{3,"a"},{5,"b"}},{{2,"a"},{5,"b"}}}); + test_impl({4,8,12}, {0,1,0,1,0,2,0,2,1,2,1,2}, {"a", "b", "c"}, 3, {{{2, "a"},{4, "b"}},{{2,"a"},{4,"c"}},{{2,"b"},{4,"c"}}}); + test_impl({1,2,3,4,5}, {0,1,2,3,4}, {"a", "b", "c", "d", "e"}, 5, {{{1,"a"}},{{1,"b"}},{{1, "c"}},{{1, "d"}},{{1, "e"}}}); + test_impl({3,6,10}, {1,1,1,2,2,2,0,0,0,0}, {"a", "b", "c"}, 3, {{{4, "c"}},{{3, "a"}},{{3, "b"}}}); } std::vector testSelfDeduplicate(std::vector data, std::vector offsets, std::vector hashes); diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index d048c94ac75..d01746ddf1b 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -556,8 +556,9 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl void MaterializedPostgreSQLConsumer::syncTables() { - for (const auto & table_name : tables_to_sync) + while (!tables_to_sync.empty()) { + auto table_name = *tables_to_sync.begin(); auto & storage_data = storages.find(table_name)->second; Block result_rows = storage_data.buffer.description.sample_block.cloneWithColumns(std::move(storage_data.buffer.columns)); storage_data.buffer.columns = storage_data.buffer.description.sample_block.cloneEmptyColumns(); @@ -589,8 +590,12 @@ void MaterializedPostgreSQLConsumer::syncTables() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + /// Retry this buffer later. + storage_data.buffer.columns = result_rows.mutateColumns(); + throw; } + + tables_to_sync.erase(tables_to_sync.begin()); } LOG_DEBUG(log, "Table sync end for {} tables, last lsn: {} = {}, (attempted lsn {})", tables_to_sync.size(), current_lsn, getLSNValue(current_lsn), getLSNValue(final_lsn)); @@ -742,8 +747,12 @@ void MaterializedPostgreSQLConsumer::setSetting(const SettingChange & setting) /// Read binary changes from replication slot via COPY command (starting from current lsn in a slot). bool MaterializedPostgreSQLConsumer::consume() { - bool slot_empty = true; + if (!tables_to_sync.empty()) + { + syncTables(); + } + bool slot_empty = true; try { auto tx = std::make_shared(connection->getRef()); diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h index e8d42ef3668..d3d2faba497 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLSettings.h @@ -21,6 +21,9 @@ namespace DB M(Bool, materialized_postgresql_tables_list_with_schema, false, \ "Consider by default that if there is a dot in tables list 'name.name', " \ "then the first name is postgres schema and second is postgres table. This setting is needed to allow table names with dots", 0) \ + M(UInt64, materialized_postgresql_backoff_min_ms, 200, "Poll backoff start point", 0) \ + M(UInt64, materialized_postgresql_backoff_max_ms, 10000, "Poll backoff max point", 0) \ + M(UInt64, materialized_postgresql_backoff_factor, 2, "Poll backoff factor", 0) \ DECLARE_SETTINGS_TRAITS(MaterializedPostgreSQLSettingsTraits, LIST_OF_MATERIALIZED_POSTGRESQL_SETTINGS) diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 998db4ea79e..f57a6a26a62 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -22,8 +22,6 @@ namespace DB { -static const auto RESCHEDULE_MS = 1000; -static const auto BACKOFF_TRESHOLD_MS = 10000; static const auto CLEANUP_RESCHEDULE_MS = 600000 * 3; /// 30 min namespace ErrorCodes @@ -80,7 +78,10 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler( , schema_list(replication_settings.materialized_postgresql_schema_list) , schema_as_a_part_of_table_name(!schema_list.empty() || replication_settings.materialized_postgresql_tables_list_with_schema) , user_provided_snapshot(replication_settings.materialized_postgresql_snapshot) - , milliseconds_to_wait(RESCHEDULE_MS) + , reschedule_backoff_min_ms(replication_settings.materialized_postgresql_backoff_min_ms) + , reschedule_backoff_max_ms(replication_settings.materialized_postgresql_backoff_max_ms) + , reschedule_backoff_factor(replication_settings.materialized_postgresql_backoff_factor) + , milliseconds_to_wait(reschedule_backoff_min_ms) { if (!schema_list.empty() && !tables_list.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot have schema list and tables list at the same time"); @@ -166,7 +167,7 @@ void PostgreSQLReplicationHandler::checkConnectionAndStart() throw; LOG_ERROR(log, "Unable to set up connection. Reconnection attempt will continue. Error message: {}", pqxx_error.what()); - startup_task->scheduleAfter(RESCHEDULE_MS); + startup_task->scheduleAfter(milliseconds_to_wait); } catch (...) { @@ -435,18 +436,18 @@ void PostgreSQLReplicationHandler::consumerFunc() if (schedule_now) { - milliseconds_to_wait = RESCHEDULE_MS; + milliseconds_to_wait = reschedule_backoff_min_ms; consumer_task->schedule(); LOG_DEBUG(log, "Scheduling replication thread: now"); } else { - consumer_task->scheduleAfter(milliseconds_to_wait); - if (milliseconds_to_wait < BACKOFF_TRESHOLD_MS) - milliseconds_to_wait *= 2; + if (milliseconds_to_wait < reschedule_backoff_max_ms) + milliseconds_to_wait = std::min(milliseconds_to_wait * reschedule_backoff_factor, reschedule_backoff_max_ms); LOG_DEBUG(log, "Scheduling replication thread: after {} ms", milliseconds_to_wait); + consumer_task->scheduleAfter(milliseconds_to_wait); } } @@ -892,7 +893,7 @@ void PostgreSQLReplicationHandler::addTableToReplication(StorageMaterializedPost catch (...) { consumer_task->activate(); - consumer_task->scheduleAfter(RESCHEDULE_MS); + consumer_task->scheduleAfter(milliseconds_to_wait); auto error_message = getCurrentExceptionMessage(false); throw Exception(ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR, @@ -922,7 +923,7 @@ void PostgreSQLReplicationHandler::removeTableFromReplication(const String & pos catch (...) { consumer_task->activate(); - consumer_task->scheduleAfter(RESCHEDULE_MS); + consumer_task->scheduleAfter(milliseconds_to_wait); auto error_message = getCurrentExceptionMessage(false); throw Exception(ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR, diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h index 10a196cf31b..4c16ff95692 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h @@ -140,13 +140,16 @@ private: BackgroundSchedulePool::TaskHolder consumer_task; BackgroundSchedulePool::TaskHolder cleanup_task; + const UInt64 reschedule_backoff_min_ms; + const UInt64 reschedule_backoff_max_ms; + const UInt64 reschedule_backoff_factor; + UInt64 milliseconds_to_wait; + std::atomic stop_synchronization = false; /// MaterializedPostgreSQL tables. Used for managing all operations with its internal nested tables. MaterializedStorages materialized_storages; - UInt64 milliseconds_to_wait; - bool replication_handler_initialized = false; }; diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index e568fba0495..cddf252a7e1 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -7,18 +7,18 @@ #include #include #include +#include #include #include -#include #include #include -#include -#include -#include -#include #include #include +#include +#include +#include +#include #include @@ -109,9 +109,16 @@ ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const auto external_storage_holder = std::make_shared(query_context, columns, ConstraintsDescription{}); StoragePtr storage = external_storage_holder->getTable(); InterpreterSelectQuery select( - result.query_ast, query_context, storage, {}, + result.query_ast, + query_context, + storage, + {}, /// Here we ignore ast optimizations because otherwise aggregation keys may be removed from result header as constants. - SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias().ignoreASTOptimizations()); + SelectQueryOptions{QueryProcessingStage::WithMergeableState} + .modify() + .ignoreAlias() + .ignoreASTOptimizations() + .ignoreSettingConstraints()); result.required_columns = select.getRequiredColumns(); result.sample_block = select.getSampleBlock(); @@ -220,9 +227,16 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( auto external_storage_holder = std::make_shared(query_context, columns, ConstraintsDescription{}); StoragePtr storage = external_storage_holder->getTable(); InterpreterSelectQuery select( - result.query_ast, query_context, storage, {}, + result.query_ast, + query_context, + storage, + {}, /// Here we ignore ast optimizations because otherwise aggregation keys may be removed from result header as constants. - SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias().ignoreASTOptimizations()); + SelectQueryOptions{QueryProcessingStage::WithMergeableState} + .modify() + .ignoreAlias() + .ignoreASTOptimizations() + .ignoreSettingConstraints()); result.required_columns = select.getRequiredColumns(); result.sample_block = select.getSampleBlock(); @@ -241,7 +255,8 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( result.sample_block_for_keys.insert({nullptr, key.type, key.name}); auto it = partition_column_name_to_value_index.find(key.name); if (it == partition_column_name_to_value_index.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "minmax_count projection can only have keys about partition columns. It's a bug"); + throw Exception( + ErrorCodes::LOGICAL_ERROR, "minmax_count projection can only have keys about partition columns. It's a bug"); result.partition_value_indices.push_back(it->second); } } @@ -282,7 +297,9 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) Pipe(std::make_shared(block)), SelectQueryOptions{ type == ProjectionDescription::Type::Normal ? QueryProcessingStage::FetchColumns - : QueryProcessingStage::WithMergeableState}) + : QueryProcessingStage::WithMergeableState} + .ignoreASTOptimizations() + .ignoreSettingConstraints()) .buildQueryPipeline(); builder.resize(1); // Generate aggregated blocks with rows less or equal than the original block. @@ -308,7 +325,7 @@ String ProjectionsDescription::toString() const for (const auto & projection : projections) list.children.push_back(projection.definition_ast); - return serializeAST(list, true); + return serializeAST(list); } ProjectionsDescription ProjectionsDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr query_context) @@ -339,9 +356,8 @@ const ProjectionDescription & ProjectionsDescription::get(const String & project auto it = map.find(projection_name); if (it == map.end()) { - String exception_message = fmt::format("There is no projection {} in table", projection_name); - appendHintsMessage(exception_message, projection_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE, "There is no projection {} in table{}", + projection_name, getHintsMessage(projection_name)); } return *(it->second); @@ -353,8 +369,8 @@ void ProjectionsDescription::add(ProjectionDescription && projection, const Stri { if (if_not_exists) return; - throw Exception(ErrorCodes::ILLEGAL_PROJECTION, "Cannot add projection {}: projection with this name already exists", - projection.name); + throw Exception( + ErrorCodes::ILLEGAL_PROJECTION, "Cannot add projection {}: projection with this name already exists", projection.name); } auto insert_it = projections.cend(); @@ -363,10 +379,10 @@ void ProjectionsDescription::add(ProjectionDescription && projection, const Stri insert_it = projections.cbegin(); else if (!after_projection.empty()) { - auto it = std::find_if(projections.cbegin(), projections.cend(), [&after_projection](const auto & projection_) - { - return projection_.name == after_projection; - }); + auto it = std::find_if( + projections.cbegin(), + projections.cend(), + [&after_projection](const auto & projection_) { return projection_.name == after_projection; }); if (it != projections.cend()) ++it; insert_it = it; @@ -384,9 +400,8 @@ void ProjectionsDescription::remove(const String & projection_name, bool if_exis if (if_exists) return; - String exception_message = fmt::format("There is no projection {} in table", projection_name); - appendHintsMessage(exception_message, projection_name); - throw Exception::createDeprecated(exception_message, ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE, "There is no projection {} in table{}", + projection_name, getHintsMessage(projection_name)); } projections.erase(it->second); diff --git a/src/Storages/ReadFromStorageProgress.cpp b/src/Storages/ReadFromStorageProgress.cpp deleted file mode 100644 index 8ad1cf92209..00000000000 --- a/src/Storages/ReadFromStorageProgress.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include -#include -#include - -namespace DB -{ - -void updateRowsProgressApprox( - ISource & source, - size_t num_rows, - UInt64 chunk_bytes_size, - UInt64 total_result_size, - UInt64 & total_rows_approx_accumulated, - size_t & total_rows_count_times, - UInt64 & total_rows_approx_max) -{ - if (!total_result_size) - return; - - if (!num_rows) - return; - - const auto progress = source.getReadProgress(); - if (progress && !progress->limits.empty()) - { - for (const auto & limit : progress->limits) - { - if (limit.leaf_limits.max_rows || limit.leaf_limits.max_bytes - || limit.local_limits.size_limits.max_rows || limit.local_limits.size_limits.max_bytes) - return; - } - } - - const auto bytes_per_row = std::ceil(static_cast(chunk_bytes_size) / num_rows); - size_t total_rows_approx = static_cast(std::ceil(static_cast(total_result_size) / bytes_per_row)); - total_rows_approx_accumulated += total_rows_approx; - ++total_rows_count_times; - total_rows_approx = total_rows_approx_accumulated / total_rows_count_times; - - /// We need to add diff, because total_rows_approx is incremental value. - /// It would be more correct to send total_rows_approx as is (not a diff), - /// but incrementation of total_rows_to_read does not allow that. - /// A new counter can be introduced for that to be sent to client, but it does not worth it. - if (total_rows_approx > total_rows_approx_max) - { - size_t diff = total_rows_approx - total_rows_approx_max; - source.addTotalRowsApprox(diff); - total_rows_approx_max = total_rows_approx; - } -} - -} diff --git a/src/Storages/ReadFromStorageProgress.h b/src/Storages/ReadFromStorageProgress.h deleted file mode 100644 index 2be37d26fee..00000000000 --- a/src/Storages/ReadFromStorageProgress.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -class ISource; - -void updateRowsProgressApprox( - ISource & source, - size_t num_rows, - UInt64 chunk_bytes_size, - UInt64 total_result_size, - UInt64 & total_rows_approx_accumulated, - size_t & total_rows_count_times, - UInt64 & total_rows_approx_max); - -} diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.cpp b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp new file mode 100644 index 00000000000..4624566a517 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueFilesMetadata.cpp @@ -0,0 +1,353 @@ +#include "IO/VarInt.h" +#include "config.h" + +#if USE_AWS_S3 +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TIMEOUT_EXCEEDED; +} + +namespace +{ + UInt64 getCurrentTime() + { + return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + } +} + +void S3QueueFilesMetadata::S3QueueCollection::read(ReadBuffer & in) +{ + files = {}; + if (in.eof()) + return; + + size_t files_num; + in >> files_num >> "\n"; + while (files_num--) + { + TrackedCollectionItem item; + in >> item.file_path >> "\n"; + in >> item.timestamp >> "\n"; + in >> item.retries_count >> "\n"; + in >> item.last_exception >> "\n"; + files.push_back(item); + } +} + +void S3QueueFilesMetadata::S3QueueCollection::write(WriteBuffer & out) const +{ + out << files.size() << "\n"; + for (const auto & processed_file : files) + { + out << processed_file.file_path << "\n"; + out << processed_file.timestamp << "\n"; + out << processed_file.retries_count << "\n"; + out << processed_file.last_exception << "\n"; + } +} + +String S3QueueFilesMetadata::S3QueueCollection::toString() const +{ + WriteBufferFromOwnString out; + write(out); + return out.str(); +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::S3QueueCollection::getFileNames() +{ + S3FilesCollection keys = {}; + for (const auto & pair : files) + keys.insert(pair.file_path); + return keys; +} + + +S3QueueFilesMetadata::S3QueueProcessedCollection::S3QueueProcessedCollection(const UInt64 & max_size_, const UInt64 & max_age_) + : max_size(max_size_), max_age(max_age_) +{ +} + +void S3QueueFilesMetadata::S3QueueProcessedCollection::parse(const String & collection_str) +{ + ReadBufferFromString buf(collection_str); + read(buf); + if (max_age > 0) // Remove old items + { + std::erase_if( + files, + [timestamp = getCurrentTime(), this](const TrackedCollectionItem & processed_file) + { return (timestamp - processed_file.timestamp) > max_age; }); + } +} + + +void S3QueueFilesMetadata::S3QueueProcessedCollection::add(const String & file_name) +{ + TrackedCollectionItem processed_file; + processed_file.file_path = file_name; + processed_file.timestamp = getCurrentTime(); + files.push_back(processed_file); + + /// TODO: it is strange that in parse() we take into account only max_age, but here only max_size. + while (files.size() > max_size) + { + files.pop_front(); + } +} + + +S3QueueFilesMetadata::S3QueueFailedCollection::S3QueueFailedCollection(const UInt64 & max_retries_count_) + : max_retries_count(max_retries_count_) +{ +} + +void S3QueueFilesMetadata::S3QueueFailedCollection::parse(const String & collection_str) +{ + ReadBufferFromString buf(collection_str); + read(buf); +} + + +bool S3QueueFilesMetadata::S3QueueFailedCollection::add(const String & file_name, const String & exception_message) +{ + auto failed_it = std::find_if( + files.begin(), files.end(), + [&file_name](const TrackedCollectionItem & s) { return s.file_path == file_name; }); + + if (failed_it == files.end()) + { + files.emplace_back(file_name, 0, max_retries_count, exception_message); + } + else if (failed_it->retries_count == 0 || --failed_it->retries_count == 0) + { + return false; + } + return true; +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::S3QueueFailedCollection::getFileNames() +{ + S3FilesCollection failed_keys; + for (const auto & pair : files) + { + if (pair.retries_count == 0) + failed_keys.insert(pair.file_path); + } + return failed_keys; +} + +void S3QueueFilesMetadata::S3QueueProcessingCollection::parse(const String & collection_str) +{ + ReadBufferFromString rb(collection_str); + Strings result; + readQuoted(result, rb); + files = S3FilesCollection(result.begin(), result.end()); +} + +void S3QueueFilesMetadata::S3QueueProcessingCollection::add(const Strings & file_names) +{ + files.insert(file_names.begin(), file_names.end()); +} + +void S3QueueFilesMetadata::S3QueueProcessingCollection::remove(const String & file_name) +{ + files.erase(file_name); +} + +String S3QueueFilesMetadata::S3QueueProcessingCollection::toString() const +{ + return DB::toString(Strings(files.begin(), files.end())); +} + + +S3QueueFilesMetadata::S3QueueFilesMetadata( + const StorageS3Queue * storage_, + const S3QueueSettings & settings_) + : storage(storage_) + , mode(settings_.mode) + , max_set_size(settings_.s3queue_tracked_files_limit.value) + , max_set_age_sec(settings_.s3queue_tracked_file_ttl_sec.value) + , max_loading_retries(settings_.s3queue_loading_retries.value) + , zookeeper_processing_path(fs::path(storage->getZooKeeperPath()) / "processing") + , zookeeper_processed_path(fs::path(storage->getZooKeeperPath()) / "processed") + , zookeeper_failed_path(fs::path(storage->getZooKeeperPath()) / "failed") + , zookeeper_lock_path(fs::path(storage->getZooKeeperPath()) / "lock") + , log(&Poco::Logger::get("S3QueueFilesMetadata")) +{ +} + +void S3QueueFilesMetadata::setFileProcessed(const String & file_path) +{ + auto zookeeper = storage->getZooKeeper(); + auto lock = acquireLock(zookeeper); + + switch (mode) + { + case S3QueueMode::UNORDERED: + { + S3QueueProcessedCollection processed_files(max_set_size, max_set_age_sec); + processed_files.parse(zookeeper->get(zookeeper_processed_path)); + processed_files.add(file_path); + zookeeper->set(zookeeper_processed_path, processed_files.toString()); + break; + } + case S3QueueMode::ORDERED: + { + // Check that we set in ZooKeeper node only maximum processed file path. + // This check can be useful, when multiple table engines consume in ordered mode. + String max_file = getMaxProcessedFile(); + if (max_file.compare(file_path) <= 0) + zookeeper->set(zookeeper_processed_path, file_path); + break; + } + } + removeProcessingFile(file_path); +} + + +bool S3QueueFilesMetadata::setFileFailed(const String & file_path, const String & exception_message) +{ + auto zookeeper = storage->getZooKeeper(); + auto lock = acquireLock(zookeeper); + + S3QueueFailedCollection failed_collection(max_loading_retries); + failed_collection.parse(zookeeper->get(zookeeper_failed_path)); + const bool can_be_retried = failed_collection.add(file_path, exception_message); + zookeeper->set(zookeeper_failed_path, failed_collection.toString()); + removeProcessingFile(file_path); + return can_be_retried; +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::getFailedFiles() +{ + auto zookeeper = storage->getZooKeeper(); + String failed_files = zookeeper->get(zookeeper_failed_path); + + S3QueueFailedCollection failed_collection(max_loading_retries); + failed_collection.parse(failed_files); + return failed_collection.getFileNames(); +} + +String S3QueueFilesMetadata::getMaxProcessedFile() +{ + auto zookeeper = storage->getZooKeeper(); + return zookeeper->get(zookeeper_processed_path); +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::getProcessingFiles() +{ + auto zookeeper = storage->getZooKeeper(); + String processing_files; + if (!zookeeper->tryGet(zookeeper_processing_path, processing_files)) + return {}; + + S3QueueProcessingCollection processing_collection; + if (!processing_files.empty()) + processing_collection.parse(processing_files); + return processing_collection.getFileNames(); +} + +void S3QueueFilesMetadata::setFilesProcessing(const Strings & file_paths) +{ + auto zookeeper = storage->getZooKeeper(); + String processing_files; + zookeeper->tryGet(zookeeper_processing_path, processing_files); + + S3QueueProcessingCollection processing_collection; + if (!processing_files.empty()) + processing_collection.parse(processing_files); + processing_collection.add(file_paths); + + if (zookeeper->exists(zookeeper_processing_path)) + zookeeper->set(zookeeper_processing_path, processing_collection.toString()); + else + zookeeper->create(zookeeper_processing_path, processing_collection.toString(), zkutil::CreateMode::Ephemeral); +} + +void S3QueueFilesMetadata::removeProcessingFile(const String & file_path) +{ + auto zookeeper = storage->getZooKeeper(); + String processing_files; + zookeeper->tryGet(zookeeper_processing_path, processing_files); + + S3QueueProcessingCollection processing_collection; + processing_collection.parse(processing_files); + processing_collection.remove(file_path); + zookeeper->set(zookeeper_processing_path, processing_collection.toString()); +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::getUnorderedProcessedFiles() +{ + auto zookeeper = storage->getZooKeeper(); + S3QueueProcessedCollection processed_collection(max_set_size, max_set_age_sec); + processed_collection.parse(zookeeper->get(zookeeper_processed_path)); + return processed_collection.getFileNames(); +} + +S3QueueFilesMetadata::S3FilesCollection S3QueueFilesMetadata::getProcessedFailedAndProcessingFiles() +{ + S3FilesCollection processed_and_failed_files = getFailedFiles(); + switch (mode) + { + case S3QueueMode::UNORDERED: + { + processed_and_failed_files.merge(getUnorderedProcessedFiles()); + break; + } + case S3QueueMode::ORDERED: + { + processed_and_failed_files.insert(getMaxProcessedFile()); + break; + } + } + processed_and_failed_files.merge(getProcessingFiles()); + return processed_and_failed_files; +} + +std::shared_ptr S3QueueFilesMetadata::acquireLock(zkutil::ZooKeeperPtr zookeeper) +{ + UInt32 retry_count = 200; + UInt32 sleep_ms = 100; + UInt32 retries = 0; + + while (true) + { + Coordination::Error code = zookeeper->tryCreate(zookeeper_lock_path, "", zkutil::CreateMode::Ephemeral); + if (code == Coordination::Error::ZNONODE || code == Coordination::Error::ZNODEEXISTS) + { + retries++; + if (retries > retry_count) + { + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Can't acquire zookeeper lock"); + } + sleepForMilliseconds(sleep_ms); + } + else if (code != Coordination::Error::ZOK) + { + throw Coordination::Exception::fromPath(code, zookeeper_lock_path); + } + else + { + return zkutil::EphemeralNodeHolder::existing(zookeeper_lock_path, *zookeeper); + } + } +} + +} + +#endif diff --git a/src/Storages/S3Queue/S3QueueFilesMetadata.h b/src/Storages/S3Queue/S3QueueFilesMetadata.h new file mode 100644 index 00000000000..577c71b2227 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueFilesMetadata.h @@ -0,0 +1,127 @@ +#pragma once + +#if USE_AWS_S3 + +# include +# include +# include +# include + +namespace DB +{ +class StorageS3Queue; +struct S3QueueSettings; + +class S3QueueFilesMetadata +{ +public: + struct TrackedCollectionItem + { + TrackedCollectionItem() = default; + TrackedCollectionItem(const String & file_path_, UInt64 timestamp_, UInt64 retries_count_, const String & last_exception_) + : file_path(file_path_), timestamp(timestamp_), retries_count(retries_count_), last_exception(last_exception_) {} + String file_path; + UInt64 timestamp = 0; + UInt64 retries_count = 0; + String last_exception; + }; + + using S3FilesCollection = std::unordered_set; + using TrackedFiles = std::deque; + + S3QueueFilesMetadata(const StorageS3Queue * storage_, const S3QueueSettings & settings_); + + void setFilesProcessing(const Strings & file_paths); + void setFileProcessed(const String & file_path); + bool setFileFailed(const String & file_path, const String & exception_message); + + S3FilesCollection getProcessedFailedAndProcessingFiles(); + String getMaxProcessedFile(); + std::shared_ptr acquireLock(zkutil::ZooKeeperPtr zookeeper); + + struct S3QueueCollection + { + public: + virtual ~S3QueueCollection() = default; + virtual String toString() const; + S3FilesCollection getFileNames(); + + virtual void parse(const String & collection_str) = 0; + + protected: + TrackedFiles files; + + void read(ReadBuffer & in); + void write(WriteBuffer & out) const; + }; + + struct S3QueueProcessedCollection : public S3QueueCollection + { + public: + S3QueueProcessedCollection(const UInt64 & max_size_, const UInt64 & max_age_); + + void parse(const String & collection_str) override; + void add(const String & file_name); + + private: + const UInt64 max_size; + const UInt64 max_age; + }; + + struct S3QueueFailedCollection : S3QueueCollection + { + public: + S3QueueFailedCollection(const UInt64 & max_retries_count_); + + void parse(const String & collection_str) override; + bool add(const String & file_name, const String & exception_message); + + S3FilesCollection getFileNames(); + + private: + UInt64 max_retries_count; + }; + + struct S3QueueProcessingCollection + { + public: + S3QueueProcessingCollection() = default; + + void parse(const String & collection_str); + void add(const Strings & file_names); + void remove(const String & file_name); + + String toString() const; + const S3FilesCollection & getFileNames() const { return files; } + + private: + S3FilesCollection files; + }; + +private: + const StorageS3Queue * storage; + const S3QueueMode mode; + const UInt64 max_set_size; + const UInt64 max_set_age_sec; + const UInt64 max_loading_retries; + + const String zookeeper_processing_path; + const String zookeeper_processed_path; + const String zookeeper_failed_path; + const String zookeeper_lock_path; + + mutable std::mutex mutex; + Poco::Logger * log; + + S3FilesCollection getFailedFiles(); + S3FilesCollection getProcessingFiles(); + S3FilesCollection getUnorderedProcessedFiles(); + + void removeProcessingFile(const String & file_path); +}; + + +} + + +#endif diff --git a/src/Storages/S3Queue/S3QueueSettings.cpp b/src/Storages/S3Queue/S3QueueSettings.cpp new file mode 100644 index 00000000000..b74cf8d39bb --- /dev/null +++ b/src/Storages/S3Queue/S3QueueSettings.cpp @@ -0,0 +1,41 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_TRAITS(S3QueueSettingsTraits, LIST_OF_S3QUEUE_SETTINGS) + +void S3QueueSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + e.addMessage("for storage " + storage_def.engine->name); + throw; + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } +} + +} diff --git a/src/Storages/S3Queue/S3QueueSettings.h b/src/Storages/S3Queue/S3QueueSettings.h new file mode 100644 index 00000000000..75defc4a57f --- /dev/null +++ b/src/Storages/S3Queue/S3QueueSettings.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +class ASTStorage; + + +#define S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ + M(S3QueueMode, \ + mode, \ + S3QueueMode::ORDERED, \ + "With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKepeer." \ + "With ordered mode, only the max name of the successfully consumed file stored.", \ + 0) \ + M(S3QueueAction, after_processing, S3QueueAction::KEEP, "Delete or keep file in S3 after successful processing", 0) \ + M(String, keeper_path, "", "Zookeeper node path", 0) \ + M(UInt64, s3queue_loading_retries, 0, "Retry loading up to specified number of times", 0) \ + M(UInt64, s3queue_polling_min_timeout_ms, 1000, "Minimal timeout before next polling", 0) \ + M(UInt64, s3queue_polling_max_timeout_ms, 10000, "Maximum timeout before next polling", 0) \ + M(UInt64, s3queue_polling_backoff_ms, 0, "Polling backoff", 0) \ + M(UInt64, s3queue_tracked_files_limit, 1000, "Max set size for tracking processed files in unordered mode in ZooKeeper", 0) \ + M(UInt64, \ + s3queue_tracked_file_ttl_sec, \ + 0, \ + "Maximum number of seconds to store processed files in ZooKeeper node (store forever by default)", \ + 0) \ + M(UInt64, s3queue_polling_size, 50, "Maximum files to fetch from S3 with SELECT", 0) + +#define LIST_OF_S3QUEUE_SETTINGS(M, ALIAS) \ + S3QUEUE_RELATED_SETTINGS(M, ALIAS) \ + FORMAT_FACTORY_SETTINGS(M, ALIAS) + +DECLARE_SETTINGS_TRAITS(S3QueueSettingsTraits, LIST_OF_S3QUEUE_SETTINGS) + + +struct S3QueueSettings : public BaseSettings +{ + void loadFromQuery(ASTStorage & storage_def); +}; + +} diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp new file mode 100644 index 00000000000..8d1d8e7b143 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -0,0 +1,310 @@ +#include +#include +#include +#include "IO/ParallelReadBuffer.h" +#include "Parsers/ASTCreateQuery.h" +#include "config.h" + +#if USE_AWS_S3 + +# include + +# include + +# include +# include + +# include + +# include +# include + +# include +# include +# include +# include +# include +# include +# include + +# include + +# include +# include +# include + +# include + +# include + +# include +# include +# include + +# include +# include + + +namespace CurrentMetrics +{ +extern const Metric StorageS3Threads; +extern const Metric StorageS3ThreadsActive; +} + +namespace ProfileEvents +{ +extern const Event S3DeleteObjects; +extern const Event S3ListObjects; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int S3_ERROR; +} + + +StorageS3QueueSource::QueueGlobIterator::QueueGlobIterator( + const S3::Client & client_, + const S3::URI & globbed_uri_, + ASTPtr query, + const Block & virtual_header, + ContextPtr context, + UInt64 & max_poll_size_, + const S3Settings::RequestSettings & request_settings_) + : max_poll_size(max_poll_size_) + , glob_iterator(std::make_unique( + client_, globbed_uri_, query, virtual_header, context, nullptr, request_settings_)) +{ + /// todo(kssenii): remove this loop, it should not be here + while (true) + { + KeyWithInfo val = glob_iterator->next(); + if (val.key.empty()) + break; + keys_buf.push_back(val); + } +} + +Strings StorageS3QueueSource::QueueGlobIterator::filterProcessingFiles( + const S3QueueMode & engine_mode, std::unordered_set & exclude_keys, const String & max_file) +{ + for (const KeyWithInfo & val : keys_buf) + { + auto full_path = val.key; + if (exclude_keys.find(full_path) != exclude_keys.end()) + { + LOG_TEST(log, "File {} will be skipped, because it was found in exclude files list " + "(either already processed or failed to be processed)", val.key); + continue; + } + + if ((engine_mode == S3QueueMode::ORDERED) && (full_path.compare(max_file) <= 0)) + continue; + + if ((processing_keys.size() < max_poll_size) || (engine_mode == S3QueueMode::ORDERED)) + { + processing_keys.push_back(val); + } + else + { + break; + } + } + + if (engine_mode == S3QueueMode::ORDERED) + { + std::sort( + processing_keys.begin(), + processing_keys.end(), + [](const KeyWithInfo & lhs, const KeyWithInfo & rhs) { return lhs.key.compare(rhs.key) < 0; }); + + if (processing_keys.size() > max_poll_size) + { + processing_keys.erase(processing_keys.begin() + max_poll_size, processing_keys.end()); + } + } + + Strings keys; + for (const auto & key_info : processing_keys) + keys.push_back(key_info.key); + + processing_keys.push_back(KeyWithInfo()); + processing_iterator = processing_keys.begin(); + return keys; +} + + +StorageS3QueueSource::KeyWithInfo StorageS3QueueSource::QueueGlobIterator::next() +{ + std::lock_guard lock(mutex); + if (processing_iterator != processing_keys.end()) + { + return *processing_iterator++; + } + + return KeyWithInfo(); +} + +StorageS3QueueSource::StorageS3QueueSource( + const ReadFromFormatInfo & info, + const String & format_, + String name_, + ContextPtr context_, + std::optional format_settings_, + UInt64 max_block_size_, + const S3Settings::RequestSettings & request_settings_, + String compression_hint_, + const std::shared_ptr & client_, + const String & bucket_, + const String & version_id_, + std::shared_ptr file_iterator_, + std::shared_ptr files_metadata_, + const S3QueueAction & action_, + const size_t download_thread_num_) + : ISource(info.source_header) + , WithContext(context_) + , name(std::move(name_)) + , bucket(bucket_) + , version_id(version_id_) + , format(format_) + , columns_desc(info.columns_description) + , request_settings(request_settings_) + , client(client_) + , files_metadata(files_metadata_) + , requested_virtual_columns(info.requested_virtual_columns) + , requested_columns(info.requested_columns) + , file_iterator(file_iterator_) + , action(action_) +{ + internal_source = std::make_shared( + info, + format_, + name_, + context_, + format_settings_, + max_block_size_, + request_settings_, + compression_hint_, + client_, + bucket_, + version_id_, + file_iterator, + download_thread_num_); + reader = std::move(internal_source->reader); + if (reader) + reader_future = std::move(internal_source->reader_future); +} + +StorageS3QueueSource::~StorageS3QueueSource() +{ + internal_source->create_reader_pool.wait(); +} + +String StorageS3QueueSource::getName() const +{ + return name; +} + +Chunk StorageS3QueueSource::generate() +{ + auto file_progress = getContext()->getFileProgressCallback(); + while (true) + { + if (isCancelled() || !reader) + { + if (reader) + reader->cancel(); + break; + } + + Chunk chunk; + bool success_in_pulling = false; + try + { + if (reader->pull(chunk)) + { + UInt64 num_rows = chunk.getNumRows(); + auto file_path = reader.getPath(); + + for (const auto & virtual_column : requested_virtual_columns) + { + if (virtual_column.name == "_path") + { + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_path)->convertToFullColumnIfConst()); + } + else if (virtual_column.name == "_file") + { + size_t last_slash_pos = file_path.find_last_of('/'); + auto column = virtual_column.type->createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)); + chunk.addColumn(column->convertToFullColumnIfConst()); + } + } + success_in_pulling = true; + } + } + catch (const Exception & e) + { + LOG_ERROR(log, "Exception in chunk pulling: {} ", e.displayText()); + files_metadata->setFileFailed(reader.getFile(), e.message()); + success_in_pulling = false; + } + if (success_in_pulling) + { + applyActionAfterProcessing(reader.getFile()); + files_metadata->setFileProcessed(reader.getFile()); + return chunk; + } + + + assert(reader_future.valid()); + reader = reader_future.get(); + + if (!reader) + break; + + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + internal_source->create_reader_pool.wait(); + reader_future = internal_source->createReaderAsync(); + } + + return {}; +} + + +void StorageS3QueueSource::applyActionAfterProcessing(const String & file_path) +{ + switch (action) + { + case S3QueueAction::DELETE: + deleteProcessedObject(file_path); + break; + case S3QueueAction::KEEP: + break; + } +} + +void StorageS3QueueSource::deleteProcessedObject(const String & file_path) +{ + LOG_INFO(log, "Delete processed file {} from bucket {}", file_path, bucket); + + S3::DeleteObjectRequest request; + request.WithKey(file_path).WithBucket(bucket); + auto outcome = client->DeleteObject(request); + if (!outcome.IsSuccess()) + { + const auto & err = outcome.GetError(); + LOG_ERROR(log, "{} (Code: {})", err.GetMessage(), static_cast(err.GetErrorType())); + } + else + { + LOG_TRACE(log, "Object with path {} was removed from S3", file_path); + } +} + +} + +#endif diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h new file mode 100644 index 00000000000..523b8c0e81f --- /dev/null +++ b/src/Storages/S3Queue/S3QueueSource.h @@ -0,0 +1,124 @@ +#pragma once +#include "config.h" + +#if USE_AWS_S3 + +# include + +# include + +# include +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + + +namespace DB +{ + + +class StorageS3QueueSource : public ISource, WithContext +{ +public: + using IIterator = StorageS3Source::IIterator; + using DisclosedGlobIterator = StorageS3Source::DisclosedGlobIterator; + using KeysWithInfo = StorageS3Source::KeysWithInfo; + using KeyWithInfo = StorageS3Source::KeyWithInfo; + class QueueGlobIterator : public IIterator + { + public: + QueueGlobIterator( + const S3::Client & client_, + const S3::URI & globbed_uri_, + ASTPtr query, + const Block & virtual_header, + ContextPtr context, + UInt64 & max_poll_size_, + const S3Settings::RequestSettings & request_settings_ = {}); + + KeyWithInfo next() override; + + Strings + filterProcessingFiles(const S3QueueMode & engine_mode, std::unordered_set & exclude_keys, const String & max_file = ""); + + private: + UInt64 max_poll_size; + KeysWithInfo keys_buf; + KeysWithInfo processing_keys; + mutable std::mutex mutex; + std::unique_ptr glob_iterator; + std::vector::iterator processing_iterator; + + Poco::Logger * log = &Poco::Logger::get("StorageS3QueueSourceIterator"); + }; + + static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); + + StorageS3QueueSource( + const ReadFromFormatInfo & info, + const String & format, + String name_, + ContextPtr context_, + std::optional format_settings_, + UInt64 max_block_size_, + const S3Settings::RequestSettings & request_settings_, + String compression_hint_, + const std::shared_ptr & client_, + const String & bucket, + const String & version_id, + std::shared_ptr file_iterator_, + std::shared_ptr files_metadata_, + const S3QueueAction & action_, + size_t download_thread_num); + + ~StorageS3QueueSource() override; + + String getName() const override; + + Chunk generate() override; + + +private: + String name; + String bucket; + String version_id; + String format; + ColumnsDescription columns_desc; + S3Settings::RequestSettings request_settings; + std::shared_ptr client; + + std::shared_ptr files_metadata; + using ReaderHolder = StorageS3Source::ReaderHolder; + ReaderHolder reader; + + NamesAndTypesList requested_virtual_columns; + NamesAndTypesList requested_columns; + std::shared_ptr file_iterator; + const S3QueueAction action; + + Poco::Logger * log = &Poco::Logger::get("StorageS3QueueSource"); + + std::future reader_future; + + mutable std::mutex mutex; + + std::shared_ptr internal_source; + void deleteProcessedObject(const String & file_path); + void applyActionAfterProcessing(const String & file_path); +}; + +} +#endif diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp new file mode 100644 index 00000000000..23eebb6ded9 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp @@ -0,0 +1,115 @@ +#include + +#if USE_AWS_S3 + +# include +# include +# include +# include +# include +# include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int METADATA_MISMATCH; +} + +S3QueueTableMetadata::S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings) +{ + format_name = configuration.format; + after_processing = engine_settings.after_processing.toString(); + mode = engine_settings.mode.toString(); + s3queue_tracked_files_limit = engine_settings.s3queue_tracked_files_limit; + s3queue_tracked_file_ttl_sec = engine_settings.s3queue_tracked_file_ttl_sec; +} + + +String S3QueueTableMetadata::toString() const +{ + Poco::JSON::Object json; + json.set("after_processing", after_processing); + json.set("mode", mode); + json.set("s3queue_tracked_files_limit", s3queue_tracked_files_limit); + json.set("s3queue_tracked_file_ttl_sec", s3queue_tracked_file_ttl_sec); + json.set("format_name", format_name); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + oss.exceptions(std::ios::failbit); + Poco::JSON::Stringifier::stringify(json, oss); + return oss.str(); +} + +void S3QueueTableMetadata::read(const String & metadata_str) +{ + Poco::JSON::Parser parser; + auto json = parser.parse(metadata_str).extract(); + after_processing = json->getValue("after_processing"); + mode = json->getValue("mode"); + s3queue_tracked_files_limit = json->getValue("s3queue_tracked_files_limit"); + s3queue_tracked_file_ttl_sec = json->getValue("s3queue_tracked_file_ttl_sec"); + format_name = json->getValue("format_name"); +} + +S3QueueTableMetadata S3QueueTableMetadata::parse(const String & metadata_str) +{ + S3QueueTableMetadata metadata; + metadata.read(metadata_str); + return metadata; +} + + +void S3QueueTableMetadata::checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const +{ + if (after_processing != from_zk.after_processing) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs " + "in action after processing. Stored in ZooKeeper: {}, local: {}", + DB::toString(from_zk.after_processing), + DB::toString(after_processing)); + + if (mode != from_zk.mode) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in engine mode. " + "Stored in ZooKeeper: {}, local: {}", + DB::toString(from_zk.after_processing), + DB::toString(after_processing)); + + if (s3queue_tracked_files_limit != from_zk.s3queue_tracked_files_limit) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in max set size. " + "Stored in ZooKeeper: {}, local: {}", + from_zk.s3queue_tracked_files_limit, + s3queue_tracked_files_limit); + + if (s3queue_tracked_file_ttl_sec != from_zk.s3queue_tracked_file_ttl_sec) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in max set age. " + "Stored in ZooKeeper: {}, local: {}", + from_zk.s3queue_tracked_file_ttl_sec, + s3queue_tracked_file_ttl_sec); + + if (format_name != from_zk.format_name) + throw Exception( + ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in format name. " + "Stored in ZooKeeper: {}, local: {}", + from_zk.format_name, + format_name); +} + +void S3QueueTableMetadata::checkEquals(const S3QueueTableMetadata & from_zk) const +{ + checkImmutableFieldsEquals(from_zk); +} + +} + +#endif diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h new file mode 100644 index 00000000000..4b6fbc54825 --- /dev/null +++ b/src/Storages/S3Queue/S3QueueTableMetadata.h @@ -0,0 +1,43 @@ +#pragma once + +#if USE_AWS_S3 + +# include +# include +# include + +namespace DB +{ + +class WriteBuffer; +class ReadBuffer; + +/** The basic parameters of S3Queue table engine for saving in ZooKeeper. + * Lets you verify that they match local ones. + */ +struct S3QueueTableMetadata +{ + String format_name; + String after_processing; + String mode; + UInt64 s3queue_tracked_files_limit; + UInt64 s3queue_tracked_file_ttl_sec; + + S3QueueTableMetadata() = default; + S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings); + + void read(const String & metadata_str); + static S3QueueTableMetadata parse(const String & metadata_str); + + String toString() const; + + void checkEquals(const S3QueueTableMetadata & from_zk) const; + +private: + void checkImmutableFieldsEquals(const S3QueueTableMetadata & from_zk) const; +}; + + +} + +#endif diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp new file mode 100644 index 00000000000..673c0dde1d2 --- /dev/null +++ b/src/Storages/S3Queue/StorageS3Queue.cpp @@ -0,0 +1,640 @@ +#include "config.h" + + +#if USE_AWS_S3 + +# include +# include +# include +# include +# include +# include +# include +# include +# include "IO/ParallelReadBuffer.h" + +# include + +# include + +# include + +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + + +# include + +# include +# include +# include + +# include + +# include + +# include + +# include +# include +# include +# include + +namespace fs = std::filesystem; + +namespace ProfileEvents +{ +extern const Event S3DeleteObjects; +extern const Event S3ListObjects; +} + +namespace DB +{ + +static const String PARTITION_ID_WILDCARD = "{_partition_id}"; +static const auto MAX_THREAD_WORK_DURATION_MS = 60000; + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int S3_ERROR; + extern const int NOT_IMPLEMENTED; + extern const int QUERY_NOT_ALLOWED; + extern const int NO_ZOOKEEPER; + extern const int REPLICA_ALREADY_EXISTS; + extern const int INCOMPATIBLE_COLUMNS; +} + + +StorageS3Queue::StorageS3Queue( + std::unique_ptr s3queue_settings_, + const StorageS3::Configuration & configuration_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + ContextPtr context_, + std::optional format_settings_, + ASTPtr partition_by_) + : IStorage(table_id_) + , WithContext(context_) + , s3queue_settings(std::move(s3queue_settings_)) + , after_processing(s3queue_settings->after_processing) + , configuration{configuration_} + , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms) + , format_settings(format_settings_) + , partition_by(partition_by_) + , log(&Poco::Logger::get("StorageS3Queue (" + table_id_.table_name + ")")) +{ + if (configuration.url.key.ends_with('/')) + configuration.url.key += '*'; + + if (!withGlobs()) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue url must either end with '/' or contain globs"); + + String setting_zk_path = s3queue_settings->keeper_path; + if (setting_zk_path.empty()) + { + auto database = DatabaseCatalog::instance().getDatabase(table_id_.database_name); + bool is_in_replicated_database = database->getEngineName() == "Replicated"; + + auto default_path = getContext()->getSettingsRef().s3queue_default_zookeeper_path.value; + String zk_path_prefix; + + if (!default_path.empty()) + { + zk_path_prefix = default_path; + } + else if (is_in_replicated_database) + { + LOG_INFO(log, "S3Queue engine zookeeper path is not specified. " + "Using replicated database zookeeper path"); + + zk_path_prefix = fs::path(assert_cast(database.get())->getZooKeeperPath()) / "s3queue"; + } + else + { + throw Exception(ErrorCodes::NO_ZOOKEEPER, + "S3Queue keeper_path engine setting not specified, " + "s3queue_default_zookeeper_path_prefix not specified"); + } + + zk_path = zkutil::extractZooKeeperPath( + fs::path(zk_path_prefix) / toString(table_id_.uuid), /* check_starts_with_slash */ true, log); + } + else + { + /// We do not add table uuid here on purpose. + zk_path = zkutil::extractZooKeeperPath(s3queue_settings->keeper_path.value, /* check_starts_with_slash */ true, log); + } + + LOG_INFO(log, "Using zookeeper path: {}", zk_path); + + FormatFactory::instance().checkFormatName(configuration.format); + context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri); + StorageInMemoryMetadata storage_metadata; + configuration.update(context_); + + if (columns_.empty()) + { + auto columns = StorageS3::getTableStructureFromDataImpl(configuration, format_settings, context_); + storage_metadata.setColumns(columns); + } + else + storage_metadata.setColumns(columns_); + + storage_metadata.setConstraints(constraints_); + storage_metadata.setComment(comment); + setInMemoryMetadata(storage_metadata); + + auto metadata_snapshot = getInMemoryMetadataPtr(); + const bool is_first_replica = createTableIfNotExists(metadata_snapshot); + + if (!is_first_replica) + { + checkTableStructure(zk_path, metadata_snapshot); + } + + files_metadata = std::make_shared(this, *s3queue_settings); + + auto default_virtuals = NamesAndTypesList{ + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; + + auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList(); + virtual_columns = getVirtualsForStorage(columns, default_virtuals); + for (const auto & column : virtual_columns) + virtual_block.insert({column.type->createColumn(), column.type, column.name}); + + auto poll_thread = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); }); + task = std::make_shared(std::move(poll_thread)); +} + + +bool StorageS3Queue::supportsSubcolumns() const +{ + return true; +} + +bool StorageS3Queue::supportsSubsetOfColumns() const +{ + return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); +} + +Pipe StorageS3Queue::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + size_t /* num_streams */) +{ + if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) + throw Exception( + ErrorCodes::QUERY_NOT_ALLOWED, "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); + + if (mv_attached) + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageS3Queue with attached materialized views"); + + auto query_configuration = updateConfigurationAndGetCopy(local_context); + + std::shared_ptr iterator_wrapper = createFileIterator(local_context, query_info.query); + + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); + + const size_t max_download_threads = local_context->getSettingsRef().max_download_threads; + + return Pipe(std::make_shared( + read_from_format_info, + configuration.format, + getName(), + local_context, + format_settings, + max_block_size, + query_configuration.request_settings, + configuration.compression_method, + query_configuration.client, + query_configuration.url.bucket, + query_configuration.url.version_id, + iterator_wrapper, + files_metadata, + after_processing, + max_download_threads)); +} + +SinkToStoragePtr StorageS3Queue::write(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, bool) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Write is not supported by storage {}", getName()); +} + +void StorageS3Queue::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate is not supported by storage {}", getName()); +} + +NamesAndTypesList StorageS3Queue::getVirtuals() const +{ + return virtual_columns; +} + +bool StorageS3Queue::supportsPartitionBy() const +{ + return true; +} + +void StorageS3Queue::startup() +{ + if (task) + task->holder->activateAndSchedule(); +} + +void StorageS3Queue::shutdown() +{ + shutdown_called = true; + if (task) + { + task->stream_cancelled = true; + task->holder->deactivate(); + } +} + +size_t StorageS3Queue::getTableDependentCount() const +{ + auto table_id = getStorageID(); + // Check if at least one direct dependency is attached + return DatabaseCatalog::instance().getDependentViews(table_id).size(); +} + +bool StorageS3Queue::hasDependencies(const StorageID & table_id) +{ + // Check if all dependencies are attached + auto view_ids = DatabaseCatalog::instance().getDependentViews(table_id); + LOG_TEST(log, "Number of attached views {} for {}", view_ids.size(), table_id.getNameForLogs()); + + if (view_ids.empty()) + return false; + + // Check the dependencies are ready? + for (const auto & view_id : view_ids) + { + auto view = DatabaseCatalog::instance().tryGetTable(view_id, getContext()); + if (!view) + return false; + + // If it materialized view, check it's target table + auto * materialized_view = dynamic_cast(view.get()); + if (materialized_view && !materialized_view->tryGetTargetTable()) + return false; + } + + return true; +} + +void StorageS3Queue::threadFunc() +{ + bool reschedule = true; + try + { + auto table_id = getStorageID(); + + auto dependencies_count = getTableDependentCount(); + if (dependencies_count) + { + auto start_time = std::chrono::steady_clock::now(); + + mv_attached.store(true); + // Keep streaming as long as there are attached views and streaming is not cancelled + while (!task->stream_cancelled) + { + if (!hasDependencies(table_id)) + { + /// For this case, we can not wait for watch thread to wake up + reschedule = true; + break; + } + + LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count); + streamToViews(); + + auto ts = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast(ts - start_time); + if (duration.count() > MAX_THREAD_WORK_DURATION_MS) + { + LOG_TRACE(log, "Thread work duration limit exceeded. Reschedule."); + reschedule = true; + break; + } + + reschedule_processing_interval_ms = s3queue_settings->s3queue_polling_min_timeout_ms; + } + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + mv_attached.store(false); + + if (reschedule && !shutdown_called) + { + LOG_TRACE(log, "Reschedule S3 Queue thread func."); + /// Reschedule with backoff. + if (reschedule_processing_interval_ms < s3queue_settings->s3queue_polling_max_timeout_ms) + reschedule_processing_interval_ms += s3queue_settings->s3queue_polling_backoff_ms; + task->holder->scheduleAfter(reschedule_processing_interval_ms); + } +} + + +void StorageS3Queue::streamToViews() +{ + auto table_id = getStorageID(); + auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); + if (!table) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); + + auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); + + // Create an INSERT query for streaming data + auto insert = std::make_shared(); + insert->table_id = table_id; + + size_t block_size = 100; + + auto s3queue_context = Context::createCopy(getContext()); + s3queue_context->makeQueryContext(); + auto query_configuration = updateConfigurationAndGetCopy(s3queue_context); + + // Create a stream for each consumer and join them in a union stream + // Only insert into dependent views and expect that input blocks contain virtual columns + InterpreterInsertQuery interpreter(insert, s3queue_context, false, true, true); + auto block_io = interpreter.execute(); + auto column_names = block_io.pipeline.getHeader().getNames(); + + // Create a stream for each consumer and join them in a union stream + + std::shared_ptr iterator_wrapper = createFileIterator(s3queue_context, nullptr); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); + const size_t max_download_threads = s3queue_context->getSettingsRef().max_download_threads; + + auto pipe = Pipe(std::make_shared( + read_from_format_info, + configuration.format, + getName(), + s3queue_context, + format_settings, + block_size, + query_configuration.request_settings, + configuration.compression_method, + query_configuration.client, + query_configuration.url.bucket, + query_configuration.url.version_id, + iterator_wrapper, + files_metadata, + after_processing, + max_download_threads)); + + + std::atomic_size_t rows = 0; + { + block_io.pipeline.complete(std::move(pipe)); + block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); + CompletedPipelineExecutor executor(block_io.pipeline); + executor.execute(); + } +} + +StorageS3Queue::Configuration StorageS3Queue::updateConfigurationAndGetCopy(ContextPtr local_context) +{ + configuration.update(local_context); + return configuration; +} + +zkutil::ZooKeeperPtr StorageS3Queue::getZooKeeper() const +{ + std::lock_guard lock{zk_mutex}; + if (!zk_client || zk_client->expired()) + { + zk_client = getContext()->getZooKeeper(); + zk_client->sync(zk_path); + } + return zk_client; +} + + +bool StorageS3Queue::createTableIfNotExists(const StorageMetadataPtr & metadata_snapshot) +{ + auto zookeeper = getZooKeeper(); + zookeeper->createAncestors(zk_path); + + for (size_t i = 0; i < zk_create_table_retries; ++i) + { + Coordination::Requests ops; + bool is_first_replica = true; + if (zookeeper->exists(zk_path + "/metadata")) + { + if (!zookeeper->exists(zk_path + "/processing")) + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/processing", "", zkutil::CreateMode::Ephemeral)); + LOG_DEBUG(log, "This table {} is already created, will use existing metadata for checking engine settings", zk_path); + is_first_replica = false; + } + else + { + String metadata_str = S3QueueTableMetadata(configuration, *s3queue_settings).toString(); + ops.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/processed", "", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/failed", "", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/processing", "", zkutil::CreateMode::Ephemeral)); + ops.emplace_back(zkutil::makeCreateRequest( + zk_path + "/columns", metadata_snapshot->getColumns().toString(), zkutil::CreateMode::Persistent)); + + ops.emplace_back(zkutil::makeCreateRequest(zk_path + "/metadata", metadata_str, zkutil::CreateMode::Persistent)); + } + + Coordination::Responses responses; + auto code = zookeeper->tryMulti(ops, responses); + if (code == Coordination::Error::ZNODEEXISTS) + { + LOG_INFO(log, "It looks like the table {} was created by another server at the same moment, will retry", zk_path); + continue; + } + else if (code != Coordination::Error::ZOK) + { + zkutil::KeeperMultiException::check(code, ops, responses); + } + + return is_first_replica; + } + + throw Exception( + ErrorCodes::REPLICA_ALREADY_EXISTS, + "Cannot create table, because it is created concurrently every time or because " + "of wrong zk_path or because of logical error"); +} + + +/** Verify that list of columns and table settings match those specified in ZK (/metadata). + * If not, throw an exception. + */ +void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot) +{ + auto zookeeper = getZooKeeper(); + + S3QueueTableMetadata old_metadata(configuration, *s3queue_settings); + + Coordination::Stat metadata_stat; + String metadata_str = zookeeper->get(fs::path(zookeeper_prefix) / "metadata", &metadata_stat); + auto metadata_from_zk = S3QueueTableMetadata::parse(metadata_str); + old_metadata.checkEquals(metadata_from_zk); + + Coordination::Stat columns_stat; + auto columns_from_zk = ColumnsDescription::parse(zookeeper->get(fs::path(zookeeper_prefix) / "columns", &columns_stat)); + + const ColumnsDescription & old_columns = metadata_snapshot->getColumns(); + if (columns_from_zk != old_columns) + { + throw Exception( + ErrorCodes::INCOMPATIBLE_COLUMNS, + "Table columns structure in ZooKeeper is different from local table structure. Local columns:\n" + "{}\nZookeeper columns:\n{}", + old_columns.toString(), + columns_from_zk.toString()); + } +} + + +std::shared_ptr +StorageS3Queue::createFileIterator(ContextPtr local_context, ASTPtr query) +{ + auto it = std::make_shared( + *configuration.client, + configuration.url, + query, + virtual_block, + local_context, + s3queue_settings->s3queue_polling_size.value, + configuration.request_settings); + + auto zookeeper = getZooKeeper(); + auto lock = files_metadata->acquireLock(zookeeper); + S3QueueFilesMetadata::S3FilesCollection files_to_skip = files_metadata->getProcessedFailedAndProcessingFiles(); + + Strings files_to_process; + if (s3queue_settings->mode == S3QueueMode::UNORDERED) + { + files_to_process = it->filterProcessingFiles(s3queue_settings->mode, files_to_skip); + } + else + { + String max_processed_file = files_metadata->getMaxProcessedFile(); + files_to_process = it->filterProcessingFiles(s3queue_settings->mode, files_to_skip, max_processed_file); + } + + LOG_TEST(log, "Found files to process: {}", fmt::join(files_to_process, ", ")); + + files_metadata->setFilesProcessing(files_to_process); + return it; +} + +void StorageS3Queue::drop() +{ + auto zookeeper = getZooKeeper(); + if (zookeeper->exists(zk_path)) + zookeeper->removeRecursive(zk_path); +} + +void registerStorageS3QueueImpl(const String & name, StorageFactory & factory) +{ + factory.registerStorage( + name, + [](const StorageFactory::Arguments & args) + { + auto & engine_args = args.engine_args; + if (engine_args.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); + auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext()); + + // Use format settings from global server context + settings from + // the SETTINGS clause of the create query. Settings from current + // session and user are ignored. + std::optional format_settings; + + auto s3queue_settings = std::make_unique(); + if (args.storage_def->settings) + { + s3queue_settings->loadFromQuery(*args.storage_def); + FormatFactorySettings user_format_settings; + + // Apply changed settings from global context, but ignore the + // unknown ones, because we only have the format settings here. + const auto & changes = args.getContext()->getSettingsRef().changes(); + for (const auto & change : changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.set(change.name, change.value); + else + LOG_TRACE(&Poco::Logger::get("StorageS3"), "Remove: {}", change.name); + args.storage_def->settings->changes.removeSetting(change.name); + } + + for (const auto & change : args.storage_def->settings->changes) + { + if (user_format_settings.has(change.name)) + user_format_settings.applyChange(change); + } + format_settings = getFormatSettings(args.getContext(), user_format_settings); + } + else + { + format_settings = getFormatSettings(args.getContext()); + } + + ASTPtr partition_by; + if (args.storage_def->partition_by) + partition_by = args.storage_def->partition_by->clone(); + + return std::make_shared( + std::move(s3queue_settings), + std::move(configuration), + args.table_id, + args.columns, + args.constraints, + args.comment, + args.getContext(), + format_settings, + partition_by); + }, + { + .supports_settings = true, + .supports_sort_order = true, // for partition by + .supports_schema_inference = true, + .source_access_type = AccessType::S3, + }); +} + +void registerStorageS3Queue(StorageFactory & factory) +{ + return registerStorageS3QueueImpl("S3Queue", factory); +} + +} + + +#endif diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h new file mode 100644 index 00000000000..9737d5fcefa --- /dev/null +++ b/src/Storages/S3Queue/StorageS3Queue.h @@ -0,0 +1,146 @@ +#pragma once + +#include "config.h" + +#if USE_AWS_S3 + +# include + +# include +# include + +# include +# include +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +namespace Aws::S3 +{ +class Client; +} + +namespace DB +{ + + +class StorageS3Queue : public IStorage, WithContext +{ +public: + using Configuration = typename StorageS3::Configuration; + + StorageS3Queue( + std::unique_ptr s3queue_settings_, + const Configuration & configuration_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & comment, + ContextPtr context_, + std::optional format_settings_, + ASTPtr partition_by_ = nullptr); + + String getName() const override { return "S3Queue"; } + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + SinkToStoragePtr write( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr context, + bool async_insert) override; + + void truncate( + const ASTPtr & /*query*/, + const StorageMetadataPtr & /*metadata_snapshot*/, + ContextPtr /*local_context*/, + TableExclusiveLockHolder &) override; + + NamesAndTypesList getVirtuals() const override; + + bool supportsPartitionBy() const override; + + const auto & getFormatName() const { return configuration.format; } + + const String & getZooKeeperPath() const { return zk_path; } + + zkutil::ZooKeeperPtr getZooKeeper() const; + +private: + const std::unique_ptr s3queue_settings; + const S3QueueAction after_processing; + + std::shared_ptr files_metadata; + Configuration configuration; + NamesAndTypesList virtual_columns; + Block virtual_block; + UInt64 reschedule_processing_interval_ms; + + std::optional format_settings; + ASTPtr partition_by; + + String zk_path; + mutable zkutil::ZooKeeperPtr zk_client; + mutable std::mutex zk_mutex; + + std::atomic mv_attached = false; + std::atomic shutdown_called{false}; + Poco::Logger * log; + + bool supportsSubcolumns() const override; + bool withGlobs() const { return configuration.url.key.find_first_of("*?{") != std::string::npos; } + + void threadFunc(); + size_t getTableDependentCount() const; + bool hasDependencies(const StorageID & table_id); + + void startup() override; + void shutdown() override; + void drop() override; + + struct TaskContext + { + BackgroundSchedulePool::TaskHolder holder; + std::atomic stream_cancelled{false}; + explicit TaskContext(BackgroundSchedulePool::TaskHolder && task_) : holder(std::move(task_)) { } + }; + std::shared_ptr task; + + bool supportsSubsetOfColumns() const override; + + const UInt32 zk_create_table_retries = 1000; + bool createTableIfNotExists(const StorageMetadataPtr & metadata_snapshot); + void checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot); + + using KeysWithInfo = StorageS3QueueSource::KeysWithInfo; + + std::shared_ptr + createFileIterator(ContextPtr local_context, ASTPtr query); + + void streamToViews(); + Configuration updateConfigurationAndGetCopy(ContextPtr local_context); +}; + +} + +#endif diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 13d6909fd52..8a3451e0662 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -258,11 +258,13 @@ struct SelectQueryInfo bool parallel_replicas_disabled = false; bool is_parameterized_view = false; - NameToNameMap parameterized_view_values; // If limit is not 0, that means it's a trivial limit query. UInt64 limit = 0; + /// For IStorageSystemOneBlock + std::vector columns_mask; + InputOrderInfoPtr getInputOrderInfo() const { return input_order_info ? input_order_info : (projection ? projection->input_order_info : nullptr); diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 0880d058cb5..8bea9e04df2 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -31,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -87,7 +87,7 @@ const std::unordered_set optional_configuration_keys = { bool isConnectionString(const std::string & candidate) { - return candidate.starts_with("DefaultEndpointsProtocol"); + return !candidate.starts_with("http"); } } @@ -258,7 +258,7 @@ void registerStorageAzureBlob(StorageFactory & factory) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); auto configuration = StorageAzureBlob::getConfiguration(engine_args, args.getLocalContext()); - auto client = StorageAzureBlob::createClient(configuration); + auto client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); // Use format settings from global server context + settings from // the SETTINGS clause of the create query. Settings from current // session and user are ignored. @@ -300,6 +300,7 @@ void registerStorageAzureBlob(StorageFactory & factory) args.constraints, args.comment, format_settings, + /* distributed_processing */ false, partition_by); }, { @@ -310,58 +311,113 @@ void registerStorageAzureBlob(StorageFactory & factory) }); } -AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration) +static bool containerExists(std::unique_ptr &blob_service_client, std::string container_name) +{ + Azure::Storage::Blobs::ListBlobContainersOptions options; + options.Prefix = container_name; + options.PageSizeHint = 1; + + auto containers_list_response = blob_service_client->ListBlobContainers(options); + auto containers_list = containers_list_response.BlobContainers; + + for (const auto & container : containers_list) + { + if (container_name == container.Name) + return true; + } + return false; +} + +AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only) { AzureClientPtr result; if (configuration.is_connection_string) { + std::unique_ptr blob_service_client = std::make_unique(BlobServiceClient::CreateFromConnectionString(configuration.connection_url)); result = std::make_unique(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container)); - result->CreateIfNotExists(); - } - else - { - if (configuration.account_name.has_value() && configuration.account_key.has_value()) + bool container_exists = containerExists(blob_service_client,configuration.container); + + if (!container_exists) { - auto storage_shared_key_credential = std::make_shared(*configuration.account_name, *configuration.account_key); - auto blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); + if (is_read_only) + throw Exception( + ErrorCodes::DATABASE_ACCESS_DENIED, + "AzureBlobStorage container does not exist '{}'", + configuration.container); + try { - result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) + result->CreateIfNotExists(); + } catch (const Azure::Storage::StorageException & e) { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) - { - auto final_url = configuration.connection_url - + (configuration.connection_url.back() == '/' ? "" : "/") - + configuration.container; - - result = std::make_unique(final_url, storage_shared_key_credential); - } - else + if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.")) { throw; } } } + } + else + { + std::shared_ptr storage_shared_key_credential; + if (configuration.account_name.has_value() && configuration.account_key.has_value()) + { + storage_shared_key_credential + = std::make_shared(*configuration.account_name, *configuration.account_key); + } + + std::unique_ptr blob_service_client; + if (storage_shared_key_credential) + { + blob_service_client = std::make_unique(configuration.connection_url, storage_shared_key_credential); + } else { - auto managed_identity_credential = std::make_shared(); - auto blob_service_client = std::make_unique(configuration.connection_url, managed_identity_credential); + blob_service_client = std::make_unique(configuration.connection_url); + } + + bool container_exists = containerExists(blob_service_client,configuration.container); + + std::string final_url; + size_t pos = configuration.connection_url.find('?'); + if (pos != std::string::npos) + { + auto url_without_sas = configuration.connection_url.substr(0, pos); + final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + configuration.container + + configuration.connection_url.substr(pos); + } + else + final_url + = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container; + + if (container_exists) + { + if (storage_shared_key_credential) + result = std::make_unique(final_url, storage_shared_key_credential); + else + result = std::make_unique(final_url); + } + else + { + if (is_read_only) + throw Exception( + ErrorCodes::DATABASE_ACCESS_DENIED, + "AzureBlobStorage container does not exist '{}'", + configuration.container); try { result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } - catch (const Azure::Storage::StorageException & e) + } catch (const Azure::Storage::StorageException & e) { - if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) + if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict + && e.ReasonPhrase == "The specified container already exists.") { - auto final_url = configuration.connection_url - + (configuration.connection_url.back() == '/' ? "" : "/") - + configuration.container; - - result = std::make_unique(final_url, managed_identity_credential); + if (storage_shared_key_credential) + result = std::make_unique(final_url, storage_shared_key_credential); + else + result = std::make_unique(final_url); } else { @@ -393,12 +449,13 @@ StorageAzureBlob::StorageAzureBlob( const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_, + bool distributed_processing_, ASTPtr partition_by_) : IStorage(table_id_) , name("AzureBlobStorage") , configuration(configuration_) , object_storage(std::move(object_storage_)) - , distributed_processing(false) + , distributed_processing(distributed_processing_) , format_settings(format_settings_) , partition_by(partition_by_) { @@ -408,7 +465,7 @@ StorageAzureBlob::StorageAzureBlob( StorageInMemoryMetadata storage_metadata; if (columns_.empty()) { - auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context); + auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context, distributed_processing); storage_metadata.setColumns(columns); } else @@ -439,7 +496,7 @@ void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont { throw Exception( ErrorCodes::DATABASE_ACCESS_DENIED, - "S3 key '{}' contains globs, so the table is in readonly mode", + "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", configuration.blob_path); } @@ -604,7 +661,7 @@ private: Pipe StorageAzureBlob::read( const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, + const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, @@ -616,63 +673,35 @@ Pipe StorageAzureBlob::read( Pipes pipes; - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - std::shared_ptr iterator_wrapper; - if (configuration.withGlobs()) + if (distributed_processing) + { + iterator_wrapper = std::make_shared(local_context, + local_context->getReadTaskCallback()); + } + else if (configuration.withGlobs()) { /// Iterate through disclosed globs and make a source for each file iterator_wrapper = std::make_shared( object_storage.get(), configuration.container, configuration.blob_path, - query_info.query, virtual_block, local_context, nullptr); + query_info.query, virtual_block, local_context, nullptr, local_context->getFileProgressCallback()); } else { iterator_wrapper = std::make_shared( object_storage.get(), configuration.container, configuration.blobs_paths, - query_info.query, virtual_block, local_context, nullptr); - } - - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); + query_info.query, virtual_block, local_context, nullptr, local_context->getFileProgressCallback()); } + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( - requested_virtual_columns, + read_from_format_info, configuration.format, getName(), - block_for_format, local_context, format_settings, - columns_description, max_block_size, configuration.compression_method, object_storage.get(), @@ -763,11 +792,6 @@ bool StorageAzureBlob::supportsPartitionBy() const return true; } -bool StorageAzureBlob::supportsSubcolumns() const -{ - return FormatFactory::instance().checkIfFormatSupportsSubcolumns(configuration.format); -} - bool StorageAzureBlob::supportsSubsetOfColumns() const { return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); @@ -807,7 +831,8 @@ StorageAzureBlobSource::GlobIterator::GlobIterator( ASTPtr query_, const Block & virtual_header_, ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_) + RelativePathsWithMetadata * outer_blobs_, + std::function file_progress_callback_) : IIterator(context_) , object_storage(object_storage_) , container(container_) @@ -815,6 +840,7 @@ StorageAzureBlobSource::GlobIterator::GlobIterator( , query(query_) , virtual_header(virtual_header_) , outer_blobs(outer_blobs_) + , file_progress_callback(file_progress_callback_) { const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{")); @@ -826,6 +852,7 @@ StorageAzureBlobSource::GlobIterator::GlobIterator( blobs_with_metadata.emplace_back(blob_path_with_globs, object_metadata); if (outer_blobs) outer_blobs->emplace_back(blobs_with_metadata.back()); + is_finished = true; return; } @@ -844,8 +871,10 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() { std::lock_guard lock(next_mutex); - if (is_finished) + if (is_finished && index >= blobs_with_metadata.size()) + { return {}; + } bool need_new_batch = blobs_with_metadata.empty() || index >= blobs_with_metadata.size(); @@ -893,7 +922,8 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() blobs_with_metadata.clear(); for (UInt64 idx : idxs.getData()) { - total_size.fetch_add(new_batch[idx].metadata.size_bytes, std::memory_order_relaxed); + if (file_progress_callback) + file_progress_callback(FileProgress(0, new_batch[idx].metadata.size_bytes)); blobs_with_metadata.emplace_back(std::move(new_batch[idx])); if (outer_blobs) outer_blobs->emplace_back(blobs_with_metadata.back()); @@ -905,8 +935,11 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end()); blobs_with_metadata = std::move(new_batch); - for (const auto & [_, info] : blobs_with_metadata) - total_size.fetch_add(info.size_bytes, std::memory_order_relaxed); + if (file_progress_callback) + { + for (const auto & [_, info] : blobs_with_metadata) + file_progress_callback(FileProgress(0, info.size_bytes)); + } } } @@ -916,11 +949,6 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next() return blobs_with_metadata[current_index]; } -size_t StorageAzureBlobSource::GlobIterator::getTotalSize() const -{ - return total_size.load(std::memory_order_relaxed); -} - void StorageAzureBlobSource::GlobIterator::createFilterAST(const String & any_key) { @@ -940,17 +968,17 @@ void StorageAzureBlobSource::GlobIterator::createFilterAST(const String & any_ke StorageAzureBlobSource::KeysIterator::KeysIterator( AzureObjectStorage * object_storage_, const std::string & container_, - Strings keys_, + const Strings & keys_, ASTPtr query_, const Block & virtual_header_, ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_) + RelativePathsWithMetadata * outer_blobs, + std::function file_progress_callback) : IIterator(context_) , object_storage(object_storage_) , container(container_) , query(query_) , virtual_header(virtual_header_) - , outer_blobs(outer_blobs_) { Strings all_keys = keys_; @@ -986,7 +1014,8 @@ StorageAzureBlobSource::KeysIterator::KeysIterator( for (auto && key : all_keys) { ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); - total_size += object_metadata.size_bytes; + if (file_progress_callback) + file_progress_callback(FileProgress(0, object_metadata.size_bytes)); keys.emplace_back(RelativePathWithMetadata{key, object_metadata}); } @@ -1003,12 +1032,6 @@ RelativePathWithMetadata StorageAzureBlobSource::KeysIterator::next() return keys[current_index]; } -size_t StorageAzureBlobSource::KeysIterator::getTotalSize() const -{ - return total_size.load(std::memory_order_relaxed); -} - - Chunk StorageAzureBlobSource::generate() { while (true) @@ -1024,17 +1047,10 @@ Chunk StorageAzureBlobSource::generate() if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); + size_t chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); const auto & file_path = reader.getPath(); - if (num_rows && total_objects_size) - { - size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } - for (const auto & virtual_column : requested_virtual_columns) { if (virtual_column.name == "_path") @@ -1059,13 +1075,6 @@ Chunk StorageAzureBlobSource::generate() if (!reader) break; - size_t object_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); - /// Adjust total_rows_approx_accumulated with new total size. - if (total_objects_size) - total_rows_approx_accumulated = static_cast( - std::ceil(static_cast(total_objects_size + object_size) / total_objects_size * total_rows_approx_accumulated)); - total_objects_size += object_size; - /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. create_reader_pool.wait(); @@ -1075,35 +1084,26 @@ Chunk StorageAzureBlobSource::generate() return {}; } -Block StorageAzureBlobSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - StorageAzureBlobSource::StorageAzureBlobSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format_, String name_, - const Block & sample_block_, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, String compression_hint_, AzureObjectStorage * object_storage_, const String & container_, std::shared_ptr file_iterator_) - :ISource(getHeader(sample_block_, requested_virtual_columns_)) + :ISource(info.source_header, false) , WithContext(context_) - , requested_virtual_columns(requested_virtual_columns_) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) , format(format_) , name(std::move(name_)) - , sample_block(sample_block_) + , sample_block(info.format_header) , format_settings(format_settings_) - , columns_desc(columns_) + , columns_desc(info.columns_description) , max_block_size(max_block_size_) , compression_hint(compression_hint_) , object_storage(std::move(object_storage_)) @@ -1114,13 +1114,7 @@ StorageAzureBlobSource::StorageAzureBlobSource( { reader = createReader(); if (reader) - { - const auto & read_buf = reader.getReadBuffer(); - if (read_buf) - total_objects_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); - reader_future = createReaderAsync(); - } } @@ -1159,10 +1153,17 @@ StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() { return std::make_shared(header, columns_desc, *input_format, getContext()); }); } + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto current_reader = std::make_unique(*pipeline); - return ReaderHolder{fs::path(container) / current_key, std::move(read_buf), input_format, std::move(pipeline), std::move(current_reader)}; + return ReaderHolder{fs::path(container) / current_key, std::move(read_buf), std::move(input_format), std::move(pipeline), std::move(current_reader)}; } std::future StorageAzureBlobSource::createReaderAsync() @@ -1193,11 +1194,17 @@ ColumnsDescription StorageAzureBlob::getTableStructureFromData( AzureObjectStorage * object_storage, const Configuration & configuration, const std::optional & format_settings, - ContextPtr ctx) + ContextPtr ctx, + bool distributed_processing) { RelativePathsWithMetadata read_keys; std::shared_ptr file_iterator; - if (configuration.withGlobs()) + if (distributed_processing) + { + file_iterator = std::make_shared(ctx, + ctx->getReadTaskCallback()); + } + else if (configuration.withGlobs()) { file_iterator = std::make_shared( object_storage, configuration.container, configuration.blob_path, nullptr, Block{}, ctx, &read_keys); @@ -1228,7 +1235,7 @@ ColumnsDescription StorageAzureBlob::getTableStructureFromData( return nullptr; } - /// S3 file iterator could get new keys after new iteration, check them in schema cache. + ///AzureBlobStorage file iterator could get new keys after new iteration, check them in schema cache. if (ctx->getSettingsRef().schema_inference_use_cache_for_azure && read_keys.size() > prev_read_keys_size) { columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, ctx); diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 3d6b0c64998..680c49bd8b3 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB { @@ -62,10 +63,11 @@ public: const ConstraintsDescription & constraints_, const String & comment, std::optional format_settings_, + bool distributed_processing_, ASTPtr partition_by_); static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); - static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration); + static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only); static AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context); @@ -93,7 +95,7 @@ public: bool supportsPartitionBy() const override; - bool supportsSubcolumns() const override; + bool supportsSubcolumns() const override { return true; } bool supportsSubsetOfColumns() const override; @@ -107,7 +109,8 @@ public: AzureObjectStorage * object_storage, const Configuration & configuration, const std::optional & format_settings, - ContextPtr ctx); + ContextPtr ctx, + bool distributed_processing = false); private: std::string name; @@ -136,7 +139,6 @@ private: const String & format_name, const ContextPtr & ctx); - }; class StorageAzureBlobSource : public ISource, WithContext @@ -148,7 +150,6 @@ public: IIterator(ContextPtr context_):WithContext(context_) {} virtual ~IIterator() = default; virtual RelativePathWithMetadata next() = 0; - virtual size_t getTotalSize() const = 0; RelativePathWithMetadata operator ()() { return next(); } }; @@ -163,13 +164,13 @@ public: ASTPtr query_, const Block & virtual_header_, ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_); + RelativePathsWithMetadata * outer_blobs_, + std::function file_progress_callback_ = {}); RelativePathWithMetadata next() override; - size_t getTotalSize() const override; ~GlobIterator() override = default; - private: + private: AzureObjectStorage * object_storage; std::string container; String blob_path_with_globs; @@ -178,7 +179,6 @@ public: Block virtual_header; size_t index = 0; - std::atomic total_size = 0; RelativePathsWithMetadata blobs_with_metadata; RelativePathsWithMetadata * outer_blobs; @@ -191,6 +191,23 @@ public: bool is_finished = false; bool is_initialized = false; std::mutex next_mutex; + + std::function file_progress_callback; + }; + + class ReadIterator : public IIterator + { + public: + explicit ReadIterator(ContextPtr context_, + const ReadTaskCallback & callback_) + : IIterator(context_), callback(callback_) { } + RelativePathWithMetadata next() override + { + return {callback(), {}}; + } + + private: + ReadTaskCallback callback; }; class KeysIterator : public IIterator @@ -199,14 +216,14 @@ public: KeysIterator( AzureObjectStorage * object_storage_, const std::string & container_, - Strings keys_, + const Strings & keys_, ASTPtr query_, const Block & virtual_header_, ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_); + RelativePathsWithMetadata * outer_blobs, + std::function file_progress_callback = {}); RelativePathWithMetadata next() override; - size_t getTotalSize() const override; ~KeysIterator() override = default; private: @@ -219,19 +236,14 @@ public: Block virtual_header; std::atomic index = 0; - std::atomic total_size = 0; - - RelativePathsWithMetadata * outer_blobs; }; StorageAzureBlobSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format_, String name_, - const Block & sample_block_, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, String compression_hint_, AzureObjectStorage * object_storage_, @@ -244,10 +256,9 @@ public: String getName() const override; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - private: - std::vector requested_virtual_columns; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; String format; String name; Block sample_block; @@ -270,7 +281,7 @@ private: std::unique_ptr reader_) : path(std::move(path_)) , read_buf(std::move(read_buf_)) - , input_format(input_format_) + , input_format(std::move(input_format_)) , pipeline(std::move(pipeline_)) , reader(std::move(reader_)) { @@ -301,10 +312,7 @@ private: PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } const String & getPath() const { return path; } - - const std::unique_ptr & getReadBuffer() const { return read_buf; } - - const std::shared_ptr & getFormat() const { return input_format; } + const IInputFormat * getInputFormat() const { return input_format.get(); } private: String path; @@ -322,11 +330,6 @@ private: ThreadPoolCallbackRunner create_reader_scheduler; std::future reader_future; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; - size_t total_objects_size = 0; - /// Recreate ReadBuffer and Pipeline for each file. ReaderHolder createReader(); std::future createReaderAsync(); diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp new file mode 100644 index 00000000000..1a3f48731a7 --- /dev/null +++ b/src/Storages/StorageAzureBlobCluster.cpp @@ -0,0 +1,99 @@ +#include "Storages/StorageAzureBlobCluster.h" + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +StorageAzureBlobCluster::StorageAzureBlobCluster( + const String & cluster_name_, + const StorageAzureBlob::Configuration & configuration_, + std::unique_ptr && object_storage_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_, + bool structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageAzureBlobCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + , configuration{configuration_} + , object_storage(std::move(object_storage_)) +{ + context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL()); + StorageInMemoryMetadata storage_metadata; + + if (columns_.empty()) + { + /// `format_settings` is set to std::nullopt, because StorageAzureBlobCluster is used only as table function + auto columns = StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context_, false); + storage_metadata.setColumns(columns); + } + else + storage_metadata.setColumns(columns_); + + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); + + auto default_virtuals = NamesAndTypesList{ + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; + + auto columns = storage_metadata.getSampleBlock().getNamesAndTypesList(); + virtual_columns = getVirtualsForStorage(columns, default_virtuals); + for (const auto & column : virtual_columns) + virtual_block.insert({column.type->createColumn(), column.type, column.name}); +} + +void StorageAzureBlobCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) +{ + ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); + if (!expression_list) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query)); + + TableFunctionAzureBlobStorageCluster::addColumnsStructureToArguments(expression_list->children, structure, context); +} + +RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const +{ + auto iterator = std::make_shared( + object_storage.get(), configuration.container, configuration.blob_path, + query, virtual_block, context, nullptr); + auto callback = std::make_shared>([iterator]() mutable -> String{ return iterator->next().relative_path; }); + return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; +} + +NamesAndTypesList StorageAzureBlobCluster::getVirtuals() const +{ + return virtual_columns; +} + + +} + +#endif diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h new file mode 100644 index 00000000000..6ef994fb873 --- /dev/null +++ b/src/Storages/StorageAzureBlobCluster.h @@ -0,0 +1,53 @@ +#pragma once + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include + +#include "Client/Connection.h" +#include +#include +#include + +namespace DB +{ + +class Context; + +class StorageAzureBlobCluster : public IStorageCluster +{ +public: + StorageAzureBlobCluster( + const String & cluster_name_, + const StorageAzureBlob::Configuration & configuration_, + std::unique_ptr && object_storage_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_, + bool structure_argument_was_provided_); + + std::string getName() const override { return "AzureBlobStorageCluster"; } + + NamesAndTypesList getVirtuals() const override; + + RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + +private: + void updateBeforeRead(const ContextPtr & /*context*/) override {} + + void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; + + StorageAzureBlob::Configuration configuration; + NamesAndTypesList virtual_columns; + Block virtual_block; + std::unique_ptr object_storage; +}; + + +} + +#endif diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index d021667f771..eb154d0d943 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -682,7 +682,7 @@ void StorageBuffer::startup() } -void StorageBuffer::flush() +void StorageBuffer::flushAndPrepareForShutdown() { if (!flush_handle) return; @@ -996,8 +996,11 @@ void StorageBuffer::reschedule() std::unique_lock lock(buffer.tryLock()); if (lock.owns_lock()) { - min_first_write_time = buffer.first_write_time; - rows += buffer.data.rows(); + if (buffer.data) + { + min_first_write_time = std::min(min_first_write_time, buffer.first_write_time); + rows += buffer.data.rows(); + } } } diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 8f089a4d580..db3cde93be5 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -92,7 +92,7 @@ public: void startup() override; /// Flush all buffers into the subordinate table and stop background thread. - void flush() override; + void flushAndPrepareForShutdown() override; bool optimize( const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index c46192ab43b..6f0072c4560 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -60,7 +60,6 @@ #include #include #include -#include #include #include #include @@ -75,6 +74,7 @@ #include #include #include +#include #include #include @@ -434,7 +434,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( { /// Always calculate optimized cluster here, to avoid conditions during read() /// (Anyway it will be calculated in the read()) - ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info.query); + ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info); if (optimized_cluster) { LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}", @@ -691,7 +691,11 @@ QueryTreeNodePtr buildQueryTreeDistributed(SelectQueryInfo & query_info, if (remote_storage_id.hasDatabase()) resolved_remote_storage_id = query_context->resolveStorageID(remote_storage_id); - auto storage = std::make_shared(resolved_remote_storage_id, distributed_storage_snapshot->metadata->getColumns(), distributed_storage_snapshot->object_columns); + auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); + + auto column_names_and_types = distributed_storage_snapshot->getColumns(get_column_options); + + auto storage = std::make_shared(resolved_remote_storage_id, ColumnsDescription{column_names_and_types}); auto table_node = std::make_shared(std::move(storage), query_context); if (table_expression_modifiers) @@ -906,15 +910,14 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu String new_query_str; { WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.always_quote_identifiers = true; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers_=*/ true); new_query->IAST::format(ast_format_settings); new_query_str = buf.str(); } QueryPipeline pipeline; ContextMutablePtr query_context = Context::createCopy(local_context); - ++query_context->getClientInfo().distributed_depth; + query_context->increaseDistributedDepth(); for (size_t shard_index : collections::range(0, shards_info.size())) { @@ -968,15 +971,14 @@ std::optional StorageDistributed::distributedWriteFromClusterStor String new_query_str; { WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.always_quote_identifiers = true; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ true); new_query->IAST::format(ast_format_settings); new_query_str = buf.str(); } QueryPipeline pipeline; ContextMutablePtr query_context = Context::createCopy(local_context); - ++query_context->getClientInfo().distributed_depth; + query_context->increaseDistributedDepth(); /// Here we take addresses from destination cluster and assume source table exists on these nodes for (const auto & replicas : getCluster()->getShardsAddresses()) @@ -1299,7 +1301,7 @@ ClusterPtr StorageDistributed::getCluster() const } ClusterPtr StorageDistributed::getOptimizedCluster( - ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const + ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const { ClusterPtr cluster = getCluster(); const Settings & settings = local_context->getSettingsRef(); @@ -1308,7 +1310,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster( if (has_sharding_key && sharding_key_is_usable) { - ClusterPtr optimized = skipUnusedShards(cluster, query_ptr, storage_snapshot, local_context); + ClusterPtr optimized = skipUnusedShards(cluster, query_info, storage_snapshot, local_context); if (optimized) return optimized; } @@ -1357,25 +1359,34 @@ IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, c /// using constraints from "PREWHERE" and "WHERE" conditions, otherwise returns `nullptr` ClusterPtr StorageDistributed::skipUnusedShards( ClusterPtr cluster, - const ASTPtr & query_ptr, + const SelectQueryInfo & query_info, const StorageSnapshotPtr & storage_snapshot, ContextPtr local_context) const { - const auto & select = query_ptr->as(); - + const auto & select = query_info.query->as(); if (!select.prewhere() && !select.where()) - { return nullptr; - } + + /// FIXME: support analyzer + if (!query_info.syntax_analyzer_result) + return nullptr; ASTPtr condition_ast; - if (select.prewhere() && select.where()) + /// Remove JOIN from the query since it may contain a condition for other tables. + /// But only the conditions for the left table should be analyzed for shard skipping. { - condition_ast = makeASTFunction("and", select.prewhere()->clone(), select.where()->clone()); - } - else - { - condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone(); + ASTPtr select_without_join_ptr = select.clone(); + ASTSelectQuery select_without_join = select_without_join_ptr->as(); + TreeRewriterResult analyzer_result_without_join = *query_info.syntax_analyzer_result; + + removeJoin(select_without_join, analyzer_result_without_join, local_context); + if (!select_without_join.prewhere() && !select_without_join.where()) + return nullptr; + + if (select_without_join.prewhere() && select_without_join.where()) + condition_ast = makeASTFunction("and", select_without_join.prewhere()->clone(), select_without_join.where()->clone()); + else + condition_ast = select_without_join.prewhere() ? select_without_join.prewhere()->clone() : select_without_join.where()->clone(); } replaceConstantExpressions(condition_ast, local_context, storage_snapshot->metadata->getColumns().getAll(), shared_from_this(), storage_snapshot); @@ -1398,11 +1409,9 @@ ClusterPtr StorageDistributed::skipUnusedShards( return nullptr; } - // Can't get definite answer if we can skip any shards + // Can't get a definite answer if we can skip any shards if (!blocks) - { return nullptr; - } std::set shards; @@ -1427,8 +1436,14 @@ ActionLock StorageDistributed::getActionLock(StorageActionBlockType type) return {}; } -void StorageDistributed::flush() +void StorageDistributed::flushAndPrepareForShutdown() { + if (!getDistributedSettingsRef().flush_on_detach) + { + LOG_INFO(log, "Skip flushing data (due to flush_on_detach=0)"); + return; + } + try { flushClusterNodesAllData(getContext()); diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index f45286341cf..064254f65d4 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -135,7 +135,7 @@ public: void initializeFromDisk(); void shutdown() override; - void flush() override; + void flushAndPrepareForShutdown() override; void drop() override; bool storesDataOnDisk() const override { return data_volume != nullptr; } @@ -182,10 +182,10 @@ private: /// Apply the following settings: /// - optimize_skip_unused_shards /// - force_optimize_skip_unused_shards - ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const; + ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const; ClusterPtr skipUnusedShards( - ClusterPtr cluster, const ASTPtr & query_ptr, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const; + ClusterPtr cluster, const SelectQueryInfo & query_info, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const; /// This method returns optimal query processing stage. /// diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 5cb7575c3ae..5c03540de9a 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include @@ -29,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -56,7 +59,6 @@ #include #include - namespace ProfileEvents { extern const Event CreatedReadBufferOrdinary; @@ -93,6 +95,65 @@ namespace ErrorCodes namespace { +/// Forward-declare to use in listFilesWithFoldedRegexpMatchingImpl() +void listFilesWithRegexpMatchingImpl( + const std::string & path_for_ls, + const std::string & for_match, + size_t & total_bytes_to_read, + std::vector & result, + bool recursive = false); + +/* + * When `{...}` has any `/`s, it must be processed in a different way: + * Basically, a path with globs is processed by listFilesWithRegexpMatchingImpl. In case it detects multi-dir glob {.../..., .../...}, + * listFilesWithFoldedRegexpMatchingImpl is in charge from now on. + * It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob. + * Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob. + * StorageHDFS.cpp has the same logic. +*/ +void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls, + const std::string & processed_suffix, + const std::string & suffix_with_globs, + re2::RE2 & matcher, + size_t & total_bytes_to_read, + const size_t max_depth, + const size_t next_slash_after_glob_pos, + std::vector & result) +{ + if (!max_depth) + return; + + const fs::directory_iterator end; + for (fs::directory_iterator it(path_for_ls); it != end; ++it) + { + const std::string full_path = it->path().string(); + const size_t last_slash = full_path.rfind('/'); + const String dir_or_file_name = full_path.substr(last_slash); + + if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher)) + { + if (next_slash_after_glob_pos == std::string::npos) + { + total_bytes_to_read += it->file_size(); + result.push_back(it->path().string()); + } + else + { + listFilesWithRegexpMatchingImpl(fs::path(full_path) / "" , + suffix_with_globs.substr(next_slash_after_glob_pos), + total_bytes_to_read, result); + } + } + else if (it->is_directory()) + { + listFilesWithFoldedRegexpMatchingImpl(fs::path(full_path), processed_suffix + dir_or_file_name, + suffix_with_globs, matcher, total_bytes_to_read, + max_depth - 1, next_slash_after_glob_pos, result); + } + + } +} + /* Recursive directory listing with matched paths as a result. * Have the same method in StorageHDFS. */ @@ -101,15 +162,42 @@ void listFilesWithRegexpMatchingImpl( const std::string & for_match, size_t & total_bytes_to_read, std::vector & result, - bool recursive = false) + bool recursive) { - const size_t first_glob = for_match.find_first_of("*?{"); + const size_t first_glob_pos = for_match.find_first_of("*?{"); + const bool has_glob = first_glob_pos != std::string::npos; - const size_t end_of_path_without_globs = for_match.substr(0, first_glob).rfind('/'); + const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/'); const std::string suffix_with_globs = for_match.substr(end_of_path_without_globs); /// begin with '/' - const size_t next_slash = suffix_with_globs.find('/', 1); - const std::string current_glob = suffix_with_globs.substr(0, next_slash); + /// slashes_in_glob counter is a upper-bound estimate of recursion depth + /// needed to process complex cases when `/` is included into glob, e.g. /pa{th1/a,th2/b}.csv + size_t slashes_in_glob = 0; + const size_t next_slash_after_glob_pos = [&]() + { + if (!has_glob) + return suffix_with_globs.find('/', 1); + + size_t in_curly = 0; + for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++) + { + if (*it == '{') + ++in_curly; + else if (*it == '/') + { + if (in_curly) + ++slashes_in_glob; + else + return size_t(std::distance(suffix_with_globs.begin(), it)); + } + else if (*it == '}') + --in_curly; + } + return std::string::npos; + }(); + + const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos); + auto regexp = makeRegexpPatternFromGlobs(current_glob); re2::RE2 matcher(regexp); @@ -126,13 +214,22 @@ void listFilesWithRegexpMatchingImpl( if (!fs::exists(prefix_without_globs)) return; + const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos; + + if (slashes_in_glob) + { + listFilesWithFoldedRegexpMatchingImpl(fs::path(prefix_without_globs), "", suffix_with_globs, + matcher, total_bytes_to_read, slashes_in_glob, + next_slash_after_glob_pos, result); + return; + } + const fs::directory_iterator end; for (fs::directory_iterator it(prefix_without_globs); it != end; ++it) { const std::string full_path = it->path().string(); const size_t last_slash = full_path.rfind('/'); const String file_name = full_path.substr(last_slash); - const bool looking_for_directory = next_slash != std::string::npos; /// Condition is_directory means what kind of path is it in current iteration of ls if (!it->is_directory() && !looking_for_directory) @@ -147,15 +244,13 @@ void listFilesWithRegexpMatchingImpl( { if (recursive) { - listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "" , - looking_for_directory ? suffix_with_globs.substr(next_slash) : current_glob , + listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "", + looking_for_directory ? suffix_with_globs.substr(next_slash_after_glob_pos) : current_glob, total_bytes_to_read, result, recursive); } else if (looking_for_directory && re2::RE2::FullMatch(file_name, matcher)) - { /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. - listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash), total_bytes_to_read, result); - } + listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash_after_glob_pos), total_bytes_to_read, result); } } } @@ -285,10 +380,33 @@ std::unique_ptr createReadBuffer( bool use_table_fd, int table_fd, const String & compression_method, - ContextPtr context) + ContextPtr context, + const String & path_to_archive = "") { CompressionMethod method; + if (!path_to_archive.empty()) + { + auto reader = createArchiveReader(path_to_archive); + + if (current_path.find_first_of("*?{") != std::string::npos) + { + auto matcher = std::make_shared(makeRegexpPatternFromGlobs(current_path)); + if (!matcher->ok()) + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, + "Cannot compile regex from glob ({}): {}", current_path, matcher->error()); + + return reader->readFile([my_matcher = std::move(matcher)](const std::string & path) + { + return re2::RE2::FullMatch(path, *my_matcher); + }); + } + else + { + return reader->readFile(current_path); + } + } + if (use_table_fd) method = chooseCompressionMethod("", compression_method); else @@ -296,14 +414,6 @@ std::unique_ptr createReadBuffer( std::unique_ptr nested_buffer = selectReadBuffer(current_path, use_table_fd, table_fd, file_stat, context); - /// For clickhouse-local and clickhouse-client add progress callback to display progress bar. - if (context->getApplicationType() == Context::ApplicationType::LOCAL - || context->getApplicationType() == Context::ApplicationType::CLIENT) - { - auto & in = static_cast(*nested_buffer); - in.setProgressCallback(context); - } - int zstd_window_log_max = static_cast(context->getSettingsRef().zstd_window_log_max); return wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method, zstd_window_log_max); } @@ -385,7 +495,8 @@ ColumnsDescription StorageFile::getTableStructureFromFile( const std::vector & paths, const String & compression_method, const std::optional & format_settings, - ContextPtr context) + ContextPtr context, + const std::vector & paths_to_archive) { if (format == "Distributed") { @@ -405,30 +516,62 @@ ColumnsDescription StorageFile::getTableStructureFromFile( if (context->getSettingsRef().schema_inference_use_cache_for_file) columns_from_cache = tryGetColumnsFromCache(paths, format, format_settings, context); - ReadBufferIterator read_buffer_iterator = [&, it = paths.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr + ReadBufferIterator read_buffer_iterator; + if (paths_to_archive.empty()) { - String path; - struct stat file_stat; - do + read_buffer_iterator = [&, it = paths.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr { - if (it == paths.end()) + String path; + struct stat file_stat; + do { - if (first) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", - format); - return nullptr; + if (it == paths.end()) + { + if (first) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", + format); + return nullptr; + } + + path = *it++; + file_stat = getFileStat(path, false, -1, "File"); } + while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0); - path = *it++; - file_stat = getFileStat(path, false, -1, "File"); - } - while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0); + first = false; + return createReadBuffer(path, file_stat, false, -1, compression_method, context); + }; + } + else + { + read_buffer_iterator = [&, path_it = paths.begin(), archive_it = paths_to_archive.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr + { + String path; + struct stat file_stat; + do + { + if (archive_it == paths_to_archive.end()) + { + if (first) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually", + format); + return nullptr; + } - first = false; - return createReadBuffer(path, file_stat, false, -1, compression_method, context); - }; + path = *archive_it++; + file_stat = getFileStat(path, false, -1, "File"); + } + while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0); + + first = false; + return createReadBuffer(*path_it, file_stat, false, -1, compression_method, context, path); + + }; + } ColumnsDescription columns; if (columns_from_cache) @@ -480,8 +623,17 @@ StorageFile::StorageFile(int table_fd_, CommonArguments args) StorageFile::StorageFile(const std::string & table_path_, const std::string & user_files_path, CommonArguments args) : StorageFile(args) { + if (!args.path_to_archive.empty()) + { + paths_to_archive = getPathsList(args.path_to_archive, user_files_path, args.getContext(), total_bytes_to_read); + paths = {table_path_}; + } + else + { + paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read); + } + is_db_table = false; - paths = getPathsList(table_path_, user_files_path, args.getContext(), total_bytes_to_read); is_path_with_globs = paths.size() > 1; if (!paths.empty()) path_for_partitioned_write = paths.front(); @@ -535,7 +687,7 @@ void StorageFile::setStorageMetadata(CommonArguments args) columns = getTableStructureFromFileDescriptor(args.getContext()); else { - columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext()); + columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext(), paths_to_archive); if (!args.columns.empty() && args.columns != columns) throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Table structure and file structure are different"); } @@ -565,56 +717,65 @@ using StorageFilePtr = std::shared_ptr; class StorageFileSource : public ISource { public: - struct FilesInfo + class FilesIterator { + public: + explicit FilesIterator( + const Strings & files_, std::vector archives_, std::vector> files_in_archive_) + : files(files_), archives(std::move(archives_)), files_in_archive(std::move(files_in_archive_)) + { + } + + String next() + { + auto current_index = index.fetch_add(1, std::memory_order_relaxed); + if (current_index >= files.size()) + return ""; + + return files[current_index]; + } + + std::pair nextFileFromArchive() + { + auto current_index = index.fetch_add(1, std::memory_order_relaxed); + if (current_index >= files_in_archive.size()) + return {"", ""}; + + const auto & [archive_index, filename] = files_in_archive[current_index]; + return {archives[archive_index], filename}; + } + + bool fromArchive() const + { + return !archives.empty(); + } + + private: std::vector files; - - std::atomic next_file_to_read = 0; - - bool need_path_column = false; - bool need_file_column = false; - - size_t total_bytes_to_read = 0; + std::vector archives; + std::vector> files_in_archive; + std::atomic index = 0; }; - using FilesInfoPtr = std::shared_ptr; - - static Block getBlockForSource(const Block & block_for_format, const FilesInfoPtr & files_info) - { - auto res = block_for_format; - if (files_info->need_path_column) - { - res.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_path"}); - } - if (files_info->need_file_column) - { - res.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_file"}); - } - return res; - } + using FilesIteratorPtr = std::shared_ptr; StorageFileSource( + const ReadFromFormatInfo & info, std::shared_ptr storage_, const StorageSnapshotPtr & storage_snapshot_, ContextPtr context_, UInt64 max_block_size_, - FilesInfoPtr files_info_, - ColumnsDescription columns_description_, - const Block & block_for_format_, + FilesIteratorPtr files_iterator_, std::unique_ptr read_buf_) - : ISource(getBlockForSource(block_for_format_, files_info_)) + : ISource(info.source_header, false) , storage(std::move(storage_)) , storage_snapshot(storage_snapshot_) - , files_info(std::move(files_info_)) + , files_iterator(std::move(files_iterator_)) , read_buf(std::move(read_buf_)) - , columns_description(std::move(columns_description_)) - , block_for_format(block_for_format_) + , columns_description(info.columns_description) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) + , block_for_format(info.format_header) , context(context_) , max_block_size(max_block_size_) { @@ -700,11 +861,35 @@ public: { if (!storage->use_table_fd) { - auto current_file = files_info->next_file_to_read.fetch_add(1); - if (current_file >= files_info->files.size()) - return {}; + if (files_iterator->fromArchive()) + { + auto [archive, filename] = files_iterator->nextFileFromArchive(); + if (archive.empty()) + return {}; - current_path = files_info->files[current_file]; + current_path = std::move(filename); + + if (!archive_reader || archive_reader->getPath() != archive) + { + archive_reader = createArchiveReader(archive); + file_enumerator = archive_reader->firstFile(); + } + + if (file_enumerator == nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to find a file in archive {}", archive); + + while (file_enumerator->getFileName() != current_path) + { + if (!file_enumerator->nextFile()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected file {} is missing from archive {}", current_path, archive); + } + } + else + { + current_path = files_iterator->next(); + if (current_path.empty()) + return {}; + } /// Special case for distributed format. Defaults are not needed here. if (storage->format_name == "Distributed") @@ -717,17 +902,25 @@ public: if (!read_buf) { - auto file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName()); - if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) - continue; - read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context); - } + struct stat file_stat; + if (archive_reader == nullptr) + { + file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName()); - size_t file_size = tryGetFileSizeFromReadBuffer(*read_buf).value_or(0); - /// Adjust total_rows_approx_accumulated with new total size. - if (total_files_size) - total_rows_approx_accumulated = static_cast(std::ceil(static_cast(total_files_size + file_size) / total_files_size * total_rows_approx_accumulated)); - total_files_size += file_size; + if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) + continue; + } + + if (archive_reader == nullptr) + { + read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context); + } + else + { + chassert(file_enumerator); + read_buf = archive_reader->readFile(std::move(file_enumerator)); + } + } const Settings & settings = context->getSettingsRef(); chassert(!storage->paths.empty()); @@ -745,6 +938,13 @@ public: }); } + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); @@ -754,31 +954,27 @@ public: if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); + size_t chunk_size = 0; + if (storage->format_name != "Distributed") + chunk_size = input_format->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); /// Enrich with virtual columns. - if (files_info->need_path_column) + + for (const auto & virtual_column : requested_virtual_columns) { - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, current_path); - chunk.addColumn(column->convertToFullColumnIfConst()); + if (virtual_column.name == "_path") + { + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, current_path)->convertToFullColumnIfConst()); + } + else if (virtual_column.name == "_file") + { + size_t last_slash_pos = current_path.find_last_of('/'); + auto file_name = current_path.substr(last_slash_pos + 1); + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, file_name)->convertToFullColumnIfConst()); + } } - if (files_info->need_file_column) - { - size_t last_slash_pos = current_path.find_last_of('/'); - auto file_name = current_path.substr(last_slash_pos + 1); - - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, std::move(file_name)); - chunk.addColumn(column->convertToFullColumnIfConst()); - } - - if (num_rows && total_files_size) - { - size_t chunk_size = input_format->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } return chunk; } @@ -790,7 +986,11 @@ public: reader.reset(); pipeline.reset(); input_format.reset(); - read_buf.reset(); + + if (archive_reader != nullptr) + file_enumerator = archive_reader->nextFile(std::move(read_buf)); + else + read_buf.reset(); } return {}; @@ -800,7 +1000,7 @@ public: private: std::shared_ptr storage; StorageSnapshotPtr storage_snapshot; - FilesInfoPtr files_info; + FilesIteratorPtr files_iterator; String current_path; Block sample_block; std::unique_ptr read_buf; @@ -808,7 +1008,12 @@ private: std::unique_ptr pipeline; std::unique_ptr reader; + std::shared_ptr archive_reader; + std::unique_ptr file_enumerator = nullptr; + ColumnsDescription columns_description; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; Block block_for_format; ContextPtr context; /// TODO Untangle potential issues with context lifetime. @@ -817,12 +1022,6 @@ private: bool finished_generate = false; std::shared_lock shared_lock; - - UInt64 total_rows_approx_accumulated = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_max = 0; - - size_t total_files_size = 0; }; @@ -841,32 +1040,67 @@ Pipe StorageFile::read( } else { - if (paths.size() == 1 && !fs::exists(paths[0])) + const auto & p = paths_to_archive.empty() ? paths : paths_to_archive; + if (p.size() == 1 && !fs::exists(p[0])) { if (context->getSettingsRef().engine_file_empty_if_not_exists) return Pipe(std::make_shared(storage_snapshot->getSampleBlockForColumns(column_names))); else - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", paths[0]); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", p[0]); } } - auto files_info = std::make_shared(); - files_info->files = paths; - files_info->total_bytes_to_read = total_bytes_to_read; + std::vector> files_in_archive; - for (const auto & column : column_names) + size_t files_in_archive_num = 0; + if (!paths_to_archive.empty()) { - if (column == "_path") - files_info->need_path_column = true; - if (column == "_file") - files_info->need_file_column = true; + if (paths.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Multiple paths defined for reading from archive"); + + const auto & path = paths[0]; + + IArchiveReader::NameFilter filter; + if (path.find_first_of("*?{") != std::string::npos) + { + auto matcher = std::make_shared(makeRegexpPatternFromGlobs(path)); + if (!matcher->ok()) + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, + "Cannot compile regex from glob ({}): {}", path, matcher->error()); + + filter = [matcher](const std::string & p) + { + return re2::RE2::FullMatch(p, *matcher); + }; + } + + for (size_t i = 0; i < paths_to_archive.size(); ++i) + { + if (filter) + { + const auto & path_to_archive = paths_to_archive[i]; + auto archive_reader = createArchiveReader(path_to_archive); + auto files = archive_reader->getAllFiles(filter); + for (auto & file : files) + files_in_archive.push_back({i, std::move(file)}); + } + else + { + files_in_archive.push_back({i, path}); + } + } + + files_in_archive_num = files_in_archive.size(); } + auto files_iterator = std::make_shared(paths, paths_to_archive, std::move(files_in_archive)); auto this_ptr = std::static_pointer_cast(shared_from_this()); size_t num_streams = max_num_streams; - if (max_num_streams > paths.size()) - num_streams = paths.size(); + + auto files_to_read = std::max(files_in_archive_num, paths.size()); + if (max_num_streams > files_to_read) + num_streams = files_to_read; Pipes pipes; pipes.reserve(num_streams); @@ -877,33 +1111,10 @@ Pipe StorageFile::read( if (progress_callback) progress_callback(FileProgress(0, total_bytes_to_read)); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); + for (size_t i = 0; i < num_streams; ++i) { - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { - return std::any_of( - virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col) { return col == virtual_col.name; }); - }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - } - - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - /// In case of reading from fd we have to check whether we have already created /// the read buffer from it in Storage constructor (for schema inference) or not. /// If yes, then we should use it in StorageFileSource. Atomic bool flag is needed @@ -913,13 +1124,12 @@ Pipe StorageFile::read( read_buffer = std::move(peekable_read_buffer_from_fd); pipes.emplace_back(std::make_shared( + read_from_format_info, this_ptr, storage_snapshot, context, max_block_size, - files_info, - columns_description, - block_for_format, + files_iterator, std::move(read_buffer))); } @@ -1170,6 +1380,9 @@ SinkToStoragePtr StorageFile::write( ContextPtr context, bool /*async_insert*/) { + if (!use_table_fd && !paths_to_archive.empty()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Writing to archives is not supported"); + if (format_name == "Distributed") throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for Distributed format"); @@ -1343,6 +1556,7 @@ void registerStorageFile(StorageFactory & factory) factory_args.constraints, factory_args.comment, {}, + {}, }; ASTs & engine_args_ast = factory_args.engine_args; @@ -1413,7 +1627,7 @@ void registerStorageFile(StorageFactory & factory) else if (type == Field::Types::UInt64) source_fd = static_cast(literal->value.get()); else if (type == Field::Types::String) - source_path = literal->value.get(); + StorageFile::parseFileSource(literal->value.get(), source_path, storage_args.path_to_archive); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument must be path or file descriptor"); } @@ -1485,4 +1699,32 @@ void StorageFile::addColumnsToCache( schema_cache.addMany(cache_keys, columns); } +void StorageFile::parseFileSource(String source, String & filename, String & path_to_archive) +{ + size_t pos = source.find("::"); + if (pos == String::npos) + { + filename = std::move(source); + return; + } + + std::string_view path_to_archive_view = std::string_view{source}.substr(0, pos); + while (path_to_archive_view.back() == ' ') + path_to_archive_view.remove_suffix(1); + + if (path_to_archive_view.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty"); + + path_to_archive = path_to_archive_view; + + std::string_view filename_view = std::string_view{source}.substr(pos + 2); + while (filename_view.front() == ' ') + filename_view.remove_prefix(1); + + if (filename_view.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty"); + + filename = filename_view; +} + } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index ed50ae73e51..b5c23d87bba 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -22,8 +22,8 @@ public: const ColumnsDescription & columns; const ConstraintsDescription & constraints; const String & comment; - const std::string rename_after_processing; + std::string path_to_archive; }; /// From file descriptor @@ -75,6 +75,8 @@ public: /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. bool supportsSubsetOfColumns() const override; + bool supportsSubcolumns() const override { return true; } + bool prefersLargeBlocks() const override; bool parallelizeOutputAfterReading(ContextPtr context) const override; @@ -88,10 +90,13 @@ public: const std::vector & paths, const String & compression_method, const std::optional & format_settings, - ContextPtr context); + ContextPtr context, + const std::vector & paths_to_archive = {"auto"}); static SchemaCache & getSchemaCache(const ContextPtr & context); + static void parseFileSource(String source, String & filename, String & path_to_archive); + protected: friend class StorageFileSource; friend class StorageFileSink; @@ -121,6 +126,7 @@ private: std::string base_path; std::vector paths; + std::vector paths_to_archive; bool is_db_table = true; /// Table is stored in real database, not user's file bool use_table_fd = false; /// Use table_fd instead of path diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index afe75349864..af285a953dc 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -239,7 +239,7 @@ bool StorageInMemoryMetadata::hasAnyGroupByTTL() const ColumnDependencies StorageInMemoryMetadata::getColumnDependencies( const NameSet & updated_columns, bool include_ttl_target, - const std::function & has_indice_or_projection) const + const HasDependencyCallback & has_dependency) const { if (updated_columns.empty()) return {}; @@ -268,13 +268,13 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies( for (const auto & index : getSecondaryIndices()) { - if (has_indice_or_projection("skp_idx_" + index.name + ".idx") || has_indice_or_projection("skp_idx_" + index.name + ".idx2")) + if (has_dependency(index.name, ColumnDependency::SKIP_INDEX)) add_dependent_columns(index.expression, indices_columns); } for (const auto & projection : getProjections()) { - if (has_indice_or_projection(projection.getDirectoryName())) + if (has_dependency(projection.name, ColumnDependency::PROJECTION)) add_dependent_columns(&projection, projections_columns); } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 4ed7eb8bf29..30b2b303492 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -147,12 +147,14 @@ struct StorageInMemoryMetadata TTLDescriptions getGroupByTTLs() const; bool hasAnyGroupByTTL() const; + using HasDependencyCallback = std::function; + /// Returns columns, which will be needed to calculate dependencies (skip indices, projections, /// TTL expressions) if we update @updated_columns set of columns. ColumnDependencies getColumnDependencies( const NameSet & updated_columns, bool include_ttl_target, - const std::function & has_indice_or_projection) const; + const HasDependencyCallback & has_dependency) const; /// Block with ordinary + materialized columns. Block getSampleBlock() const; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 640706aae17..121d859a3f2 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -177,7 +178,7 @@ void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context) } } -HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context) const +HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context, const Names & required_columns_names) const { auto metadata_snapshot = getInMemoryMetadataPtr(); if (!analyzed_join->sameStrictnessAndKind(strictness, kind)) @@ -237,8 +238,10 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, /// Qualifies will be added by join implementation (TableJoin contains a rename mapping). analyzed_join->setRightKeys(key_names); analyzed_join->setLeftKeys(left_key_names_resorted); - - HashJoinPtr join_clone = std::make_shared(analyzed_join, getRightSampleBlock()); + Block right_sample_block; + for (const auto & name : required_columns_names) + right_sample_block.insert(getRightSampleBlock().getByName(name)); + HashJoinPtr join_clone = std::make_shared(analyzed_join, right_sample_block); RWLockImpl::LockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Read, context); join_clone->setLock(holder); diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 5559b5d1ec8..4626d744a38 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -49,7 +49,7 @@ public: /// Return instance of HashJoin holding lock that protects from insertions to StorageJoin. /// HashJoin relies on structure of hash table that's why we need to return it with locked mutex. - HashJoinPtr getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context) const; + HashJoinPtr getJoinLocked(std::shared_ptr analyzed_join, ContextPtr context, const Names & required_columns_names) const; /// Get result type for function "joinGet(OrNull)" DataTypePtr joinGetCheckAndGetReturnType(const DataTypes & data_types, const String & column_name, bool or_null) const; diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index f2b1b907832..f98728c012e 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -419,7 +419,7 @@ StorageKeeperMap::StorageKeeperMap( } else if (code != Coordination::Error::ZOK) { - throw Coordination::Exception(code, dropped_lock_path); + throw Coordination::Exception::fromPath(code, dropped_lock_path); } else { @@ -918,7 +918,7 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca { auto code = client->tryRemove(delete_request->getPath()); if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE) - throw zkutil::KeeperException(code, delete_request->getPath()); + throw zkutil::KeeperException::fromPath(code, delete_request->getPath()); } } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index d8065b8bb3c..90cf55e53b2 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -777,7 +777,7 @@ void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr num_marks_saved = 0; total_rows = 0; total_bytes = 0; - getContext()->dropMMappedFileCache(); + getContext()->clearMMappedFileCache(); } @@ -946,9 +946,10 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c fs::path data_path_in_backup_fs = data_path_in_backup; auto temp_dir_owner = std::make_shared(disk, "tmp/"); - fs::path temp_dir = temp_dir_owner->getPath(); + fs::path temp_dir = temp_dir_owner->getRelativePath(); disk->createDirectories(temp_dir); + const auto & read_settings = backup_entries_collector.getReadSettings(); bool copy_encrypted = !backup_entries_collector.getBackupSettings().decrypt_files_from_encrypted_disks; /// *.bin @@ -980,7 +981,7 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c /// sizes.json String files_info_path = file_checker.getPath(); backup_entries_collector.addBackupEntry( - data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, copy_encrypted)); + data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, read_settings, copy_encrypted)); /// columns.txt backup_entries_collector.addBackupEntry( diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index 08fbb61960f..e6fcbc203e6 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -41,6 +41,8 @@ public: void drop() override { nested_storage->drop(); } + bool supportsTrivialCountOptimization() const override { return false; } + private: [[noreturn]] static void throwNotAllowed() { diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 16d724d54d8..7354dd56552 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -228,10 +228,13 @@ void StorageMaterializedView::dropInnerTableIfAny(bool sync, ContextPtr local_co { /// We will use `sync` argument wneh this function is called from a DROP query /// and will ignore database_atomic_wait_for_drop_and_detach_synchronously when it's called from drop task. - /// See the comment in StorageMaterializedView::drop + /// See the comment in StorageMaterializedView::drop. + /// DDL queries with StorageMaterializedView are fundamentally broken. + /// Best-effort to make them work: the inner table name is almost always less than the MV name (so it's safe to lock DDLGuard) + bool may_lock_ddl_guard = getStorageID().getQualifiedName() < target_table_id.getQualifiedName(); if (has_inner_table && tryGetTargetTable()) InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), local_context, target_table_id, - sync, /* ignore_sync_setting */ true); + sync, /* ignore_sync_setting */ true, may_lock_ddl_guard); } void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 6ed0583bd44..2ef1d8d3183 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -277,11 +277,13 @@ namespace const std::shared_ptr blocks_, const String & data_path_in_backup, const DiskPtr & temp_disk_, + const ReadSettings & read_settings_, UInt64 max_compress_block_size_) : context(context_) , metadata_snapshot(metadata_snapshot_) , blocks(blocks_) , temp_disk(temp_disk_) + , read_settings(read_settings_) , max_compress_block_size(max_compress_block_size_) { fs::path data_path_in_backup_fs = data_path_in_backup; @@ -314,7 +316,7 @@ namespace backup_entries.resize(file_paths.size()); temp_dir_owner.emplace(temp_disk); - fs::path temp_dir = temp_dir_owner->getPath(); + fs::path temp_dir = temp_dir_owner->getRelativePath(); temp_disk->createDirectories(temp_dir); /// Writing data.bin @@ -371,7 +373,7 @@ namespace file_checker.update(temp_dir / fs::path{file_paths[i]}.filename()); } file_checker.save(); - backup_entries[sizes_json_pos] = {file_paths[sizes_json_pos], std::make_shared(temp_disk, sizes_json_path)}; + backup_entries[sizes_json_pos] = {file_paths[sizes_json_pos], std::make_shared(temp_disk, sizes_json_path, read_settings)}; } /// We don't need to keep `blocks` any longer. @@ -386,6 +388,7 @@ namespace std::shared_ptr blocks; DiskPtr temp_disk; std::optional temp_dir_owner; + ReadSettings read_settings; UInt64 max_compress_block_size; Strings file_paths; size_t data_bin_pos, index_mrk_pos, columns_txt_pos, count_txt_pos, sizes_json_pos; @@ -395,13 +398,16 @@ namespace void StorageMemory::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { auto temp_disk = backup_entries_collector.getContext()->getGlobalTemporaryVolume()->getDisk(0); + const auto & read_settings = backup_entries_collector.getReadSettings(); auto max_compress_block_size = backup_entries_collector.getContext()->getSettingsRef().max_compress_block_size; + backup_entries_collector.addBackupEntries(std::make_shared( backup_entries_collector.getContext(), getInMemoryMetadataPtr(), data.get(), data_path_in_backup, temp_disk, + read_settings, max_compress_block_size)->getBackupEntries()); } @@ -453,10 +459,10 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat if (!dynamic_cast(in.get())) { temp_data_file.emplace(temporary_disk); - auto out = std::make_unique(temp_data_file->getPath()); + auto out = std::make_unique(temp_data_file->getAbsolutePath()); copyData(*in, *out); out.reset(); - in = createReadBufferFromFileBase(temp_data_file->getPath(), {}); + in = createReadBufferFromFileBase(temp_data_file->getAbsolutePath(), {}); } std::unique_ptr in_from_file{static_cast(in.release())}; CompressedReadBufferFromFile compressed_in{std::move(in_from_file)}; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index b0ed242d14d..272f35303bd 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -795,6 +795,10 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( bool filter_by_database_virtual_column /* = false */, bool filter_by_table_virtual_column /* = false */) const { + /// FIXME: filtering does not work with allow_experimental_analyzer due to + /// different column names there (it has "table_name._table" not just + /// "_table") + assert(!filter_by_database_virtual_column || !filter_by_table_virtual_column || query); const Settings & settings = query_context->getSettingsRef(); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 29880f10f28..7e432dfcb47 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -45,6 +45,7 @@ #include #include + namespace DB { @@ -373,7 +374,7 @@ void StorageMergeTree::alter( /// Always execute required mutations synchronously, because alters /// should be executed in sequential order. if (!maybe_mutation_commands.empty()) - waitForMutation(mutation_version); + waitForMutation(mutation_version, false); } { @@ -599,9 +600,22 @@ void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr quer /// Validate partition IDs (if any) before starting mutation getPartitionIdsAffectedByCommands(commands, query_context); - Int64 version = startMutation(commands, query_context); + Int64 version; + { + /// It's important to serialize order of mutations with alter queries because + /// they can depend on each other. + if (auto alter_lock = tryLockForAlter(query_context->getSettings().lock_acquire_timeout); alter_lock == std::nullopt) + { + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, + "Cannot start mutation in {}ms because some metadata-changing ALTER (MODIFY|RENAME|ADD|DROP) is currently executing. " + "You can change this timeout with `lock_acquire_timeout` setting", + query_context->getSettings().lock_acquire_timeout.totalMilliseconds()); + } + version = startMutation(commands, query_context); + } + if (query_context->getSettingsRef().mutations_sync > 0 || query_context->getCurrentTransaction()) - waitForMutation(version); + waitForMutation(version, false); } bool StorageMergeTree::hasLightweightDeletedMask() const @@ -844,7 +858,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( bool aggressive, const String & partition_id, bool final, - String * out_disable_reason, + String & out_disable_reason, TableLockHolder & /* table_lock_holder */, std::unique_lock & lock, const MergeTreeTransactionPtr & txn, @@ -862,7 +876,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( CurrentlyMergingPartsTaggerPtr merging_tagger; MergeList::EntryPtr merge_entry; - auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, const MergeTreeTransaction * tx, String * disable_reason) -> bool + auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, const MergeTreeTransaction * tx, String & disable_reason) -> bool { if (tx) { @@ -871,8 +885,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if ((left && !left->version.isVisible(tx->getSnapshot(), Tx::EmptyTID)) || (right && !right->version.isVisible(tx->getSnapshot(), Tx::EmptyTID))) { - if (disable_reason) - *disable_reason = "Some part is not visible in transaction"; + disable_reason = "Some part is not visible in transaction"; return false; } @@ -880,8 +893,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if ((left && left->version.isRemovalTIDLocked()) || (right && right->version.isRemovalTIDLocked())) { - if (disable_reason) - *disable_reason = "Some part is locked for removal in another cuncurrent transaction"; + disable_reason = "Some part is locked for removal in another cuncurrent transaction"; return false; } } @@ -892,8 +904,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( { if (currently_merging_mutating_parts.contains(right)) { - if (disable_reason) - *disable_reason = "Some part currently in a merging or mutating process"; + disable_reason = "Some part currently in a merging or mutating process"; return false; } else @@ -902,30 +913,26 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (currently_merging_mutating_parts.contains(left) || currently_merging_mutating_parts.contains(right)) { - if (disable_reason) - *disable_reason = "Some part currently in a merging or mutating process"; + disable_reason = "Some part currently in a merging or mutating process"; return false; } if (getCurrentMutationVersion(left, lock) != getCurrentMutationVersion(right, lock)) { - if (disable_reason) - *disable_reason = "Some parts have differ mmutatuon version"; + disable_reason = "Some parts have differ mmutatuon version"; return false; } if (!partsContainSameProjections(left, right)) { - if (disable_reason) - *disable_reason = "Some parts contains differ projections"; + disable_reason = "Some parts contains differ projections"; return false; } auto max_possible_level = getMaxLevelInBetween(left, right); if (max_possible_level > std::max(left->info.level, right->info.level)) { - if (disable_reason) - *disable_reason = fmt::format("There is an outdated part in a gap between two active parts ({}, {}) with merge level {} higher than these active parts have", left->name, right->name, max_possible_level); + disable_reason = fmt::format("There is an outdated part in a gap between two active parts ({}, {}) with merge level {} higher than these active parts have", left->name, right->name, max_possible_level); return false; } @@ -934,14 +941,13 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT; - auto is_background_memory_usage_ok = [](String * disable_reason) -> bool + auto is_background_memory_usage_ok = [](String & disable_reason) -> bool { if (canEnqueueBackgroundTask()) return true; - if (disable_reason) - *disable_reason = fmt::format("Current background tasks memory usage ({}) is more than the limit ({})", - formatReadableSizeWithBinarySuffix(background_memory_tracker.get()), - formatReadableSizeWithBinarySuffix(background_memory_tracker.getSoftLimit())); + disable_reason = fmt::format("Current background tasks memory usage ({}) is more than the limit ({})", + formatReadableSizeWithBinarySuffix(background_memory_tracker.get()), + formatReadableSizeWithBinarySuffix(background_memory_tracker.getSoftLimit())); return false; }; @@ -966,8 +972,8 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( txn, out_disable_reason); } - else if (out_disable_reason) - *out_disable_reason = "Current value of max_source_parts_size is zero"; + else + out_disable_reason = "Current value of max_source_parts_size is zero"; } } else @@ -1001,15 +1007,14 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( /// If final - we will wait for currently processing merges to finish and continue. if (final && select_decision != SelectPartsDecision::SELECTED - && !currently_merging_mutating_parts.empty() - && out_disable_reason) + && !currently_merging_mutating_parts.empty()) { LOG_DEBUG(log, "Waiting for currently running merges ({} parts are merging right now) to perform OPTIMIZE FINAL", currently_merging_mutating_parts.size()); if (std::cv_status::timeout == currently_processing_in_background_condition.wait_for(lock, timeout)) { - *out_disable_reason = fmt::format("Timeout ({} ms) while waiting for already running merges before running OPTIMIZE with FINAL", timeout_ms); + out_disable_reason = fmt::format("Timeout ({} ms) while waiting for already running merges before running OPTIMIZE with FINAL", timeout_ms); break; } } @@ -1025,14 +1030,9 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (select_decision != SelectPartsDecision::SELECTED) { - if (out_disable_reason) - { - if (!out_disable_reason->empty()) - { - *out_disable_reason += ". "; - } - *out_disable_reason += "Cannot select parts for optimization"; - } + if (!out_disable_reason.empty()) + out_disable_reason += ". "; + out_disable_reason += "Cannot select parts for optimization"; return {}; } @@ -1053,7 +1053,7 @@ bool StorageMergeTree::merge( const Names & deduplicate_by_columns, bool cleanup, const MergeTreeTransactionPtr & txn, - String * out_disable_reason, + String & out_disable_reason, bool optimize_skip_merged_partitions) { auto table_lock_holder = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); @@ -1108,7 +1108,7 @@ bool StorageMergeTree::partIsAssignedToBackgroundOperation(const DataPartPtr & p } MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( - const StorageMetadataPtr & metadata_snapshot, String * /* disable_reason */, TableLockHolder & /* table_lock_holder */, + const StorageMetadataPtr & metadata_snapshot, String & /* disable_reason */, TableLockHolder & /* table_lock_holder */, std::unique_lock & /*currently_processing_in_background_mutex_lock*/) { if (current_mutations_by_version.empty()) @@ -1154,16 +1154,25 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( } TransactionID first_mutation_tid = mutations_begin_it->second.tid; - MergeTreeTransactionPtr txn = tryGetTransactionForMutation(mutations_begin_it->second, log); - assert(txn || first_mutation_tid.isPrehistoric()); + MergeTreeTransactionPtr txn; - if (txn) + if (!first_mutation_tid.isPrehistoric()) { + /// Mutate visible parts only /// NOTE Do not mutate visible parts in Outdated state, because it does not make sense: /// mutation will fail anyway due to serialization error. - if (!part->version.isVisible(*txn)) + + /// It's possible that both mutation and transaction are already finished, + /// because that part should not be mutated because it was not visible for that transaction. + if (!part->version.isVisible(first_mutation_tid.start_csn, first_mutation_tid)) continue; + + txn = tryGetTransactionForMutation(mutations_begin_it->second, log); + if (!txn) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find transaction {} that has started mutation {} " + "that is going to be applied to part {}", + first_mutation_tid, mutations_begin_it->second.file_name, part->name); } auto commands = std::make_shared(); @@ -1309,10 +1318,11 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign if (merger_mutator.merges_blocker.isCancelled()) return false; - merge_entry = selectPartsToMerge(metadata_snapshot, false, {}, false, nullptr, shared_lock, lock, txn); + String out_reason; + merge_entry = selectPartsToMerge(metadata_snapshot, false, {}, false, out_reason, shared_lock, lock, txn); if (!merge_entry && !current_mutations_by_version.empty()) - mutate_entry = selectPartsToMutate(metadata_snapshot, nullptr, shared_lock, lock); + mutate_entry = selectPartsToMutate(metadata_snapshot, out_reason, shared_lock, lock); has_mutations = !current_mutations_by_version.empty(); } @@ -1369,8 +1379,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign cleared_count += clearOldWriteAheadLogs(); cleared_count += clearOldMutations(); cleared_count += clearEmptyParts(); - if (getSettings()->merge_tree_enable_clear_old_broken_detached) - cleared_count += clearOldBrokenPartsFromDetachedDirectory(); + cleared_count += clearOldBrokenPartsFromDetachedDirectory(); return cleared_count; /// TODO maybe take into account number of cleared objects when calculating backoff }, common_assignee_trigger, getStorageID()), /* need_trigger */ false); @@ -1515,7 +1524,7 @@ bool StorageMergeTree::optimize( deduplicate_by_columns, cleanup, txn, - &disable_reason, + disable_reason, local_context->getSettingsRef().optimize_skip_merged_partitions)) { constexpr auto message = "Cannot OPTIMIZE table: {}"; @@ -1543,7 +1552,7 @@ bool StorageMergeTree::optimize( deduplicate_by_columns, cleanup, txn, - &disable_reason, + disable_reason, local_context->getSettingsRef().optimize_skip_merged_partitions)) { constexpr auto message = "Cannot OPTIMIZE table: {}"; @@ -1643,11 +1652,7 @@ struct FutureNewEmptyPart MergeTreePartition partition; std::string part_name; - scope_guard tmp_dir_guard; - StorageMergeTree::MutableDataPartPtr data_part; - - std::string getDirName() const { return StorageMergeTree::EMPTY_PART_TMP_PREFIX + part_name; } }; using FutureNewEmptyParts = std::vector; @@ -1678,19 +1683,19 @@ FutureNewEmptyParts initCoverageWithNewEmptyParts(const DataPartsVector & old_pa return future_parts; } -StorageMergeTree::MutableDataPartsVector createEmptyDataParts(MergeTreeData & data, FutureNewEmptyParts & future_parts, const MergeTreeTransactionPtr & txn) +std::pair> createEmptyDataParts( + MergeTreeData & data, FutureNewEmptyParts & future_parts, const MergeTreeTransactionPtr & txn) { - StorageMergeTree::MutableDataPartsVector data_parts; + std::pair> data_parts; for (auto & part: future_parts) - data_parts.push_back(data.createEmptyPart(part.part_info, part.partition, part.part_name, txn)); + { + auto [new_data_part, tmp_dir_holder] = data.createEmptyPart(part.part_info, part.partition, part.part_name, txn); + data_parts.first.emplace_back(std::move(new_data_part)); + data_parts.second.emplace_back(std::move(tmp_dir_holder)); + } return data_parts; } -void captureTmpDirectoryHolders(MergeTreeData & data, FutureNewEmptyParts & future_parts) -{ - for (auto & part : future_parts) - part.tmp_dir_guard = data.getTemporaryPartDirectoryHolder(part.getDirName()); -} void StorageMergeTree::renameAndCommitEmptyParts(MutableDataPartsVector & new_parts, Transaction & transaction) { @@ -1728,15 +1733,24 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont { /// Asks to complete merges and does not allow them to start. /// This protects against "revival" of data for a removed partition after completion of merge. - auto merge_blocker = stopMergesAndWait(); waitForOutdatedPartsToBeLoaded(); + auto merge_blocker = stopMergesAndWait(); Stopwatch watch; ProfileEventsScope profile_events_scope; auto txn = query_context->getCurrentTransaction(); - MergeTreeData::Transaction transaction(*this, txn.get()); + if (txn) { + auto data_parts_lock = lockParts(); + auto parts_to_remove = getVisibleDataPartsVectorUnlocked(query_context, data_parts_lock); + removePartsFromWorkingSet(txn.get(), parts_to_remove, true, data_parts_lock); + LOG_INFO(log, "Removed {} parts: [{}]", parts_to_remove.size(), fmt::join(getPartsNames(parts_to_remove), ", ")); + } + else + { + MergeTreeData::Transaction transaction(*this, txn.get()); + auto operation_data_parts_lock = lockOperationsWithParts(); auto parts = getVisibleDataPartsVector(query_context); @@ -1748,9 +1762,7 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "), transaction.getTID()); - captureTmpDirectoryHolders(*this, future_parts); - - auto new_data_parts = createEmptyDataParts(*this, future_parts, txn); + auto [new_data_parts, tmp_dir_holders] = createEmptyDataParts(*this, future_parts, txn); renameAndCommitEmptyParts(new_data_parts, transaction); PartLog::addNewParts(query_context, PartLog::createPartLogEntries(new_data_parts, watch.elapsed(), profile_events_scope.getSnapshot())); @@ -1780,8 +1792,15 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt /// It's important to create it outside of lock scope because /// otherwise it can lock parts in destructor and deadlock is possible. auto txn = query_context->getCurrentTransaction(); - MergeTreeData::Transaction transaction(*this, txn.get()); + if (txn) { + if (auto part = outdatePart(txn.get(), part_name, /*force=*/ true)) + dropPartsImpl({part}, detach); + } + else + { + MergeTreeData::Transaction transaction(*this, txn.get()); + auto operation_data_parts_lock = lockOperationsWithParts(); auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}); @@ -1791,8 +1810,10 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt if (detach) { auto metadata_snapshot = getInMemoryMetadataPtr(); - LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("", metadata_snapshot); + String part_dir = part->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } { @@ -1802,9 +1823,7 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames({part}), ", "), transaction.getTID()); - captureTmpDirectoryHolders(*this, future_parts); - - auto new_data_parts = createEmptyDataParts(*this, future_parts, txn); + auto [new_data_parts, tmp_dir_holders] = createEmptyDataParts(*this, future_parts, txn); renameAndCommitEmptyParts(new_data_parts, transaction); PartLog::addNewParts(query_context, PartLog::createPartLogEntries(new_data_parts, watch.elapsed(), profile_events_scope.getSnapshot())); @@ -1838,8 +1857,26 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont /// It's important to create it outside of lock scope because /// otherwise it can lock parts in destructor and deadlock is possible. auto txn = query_context->getCurrentTransaction(); - MergeTreeData::Transaction transaction(*this, txn.get()); + if (txn) { + DataPartsVector parts_to_remove; + { + auto data_parts_lock = lockParts(); + if (partition_ast && partition_ast->all) + parts_to_remove = getVisibleDataPartsVectorUnlocked(query_context, data_parts_lock); + else + { + String partition_id = getPartitionIDFromQuery(partition, query_context, &data_parts_lock); + parts_to_remove = getVisibleDataPartsVectorInPartition(query_context, partition_id, data_parts_lock); + } + removePartsFromWorkingSet(txn.get(), parts_to_remove, true, data_parts_lock); + } + dropPartsImpl(std::move(parts_to_remove), detach); + } + else + { + MergeTreeData::Transaction transaction(*this, txn.get()); + auto operation_data_parts_lock = lockOperationsWithParts(); DataPartsVector parts; @@ -1854,12 +1891,16 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont } if (detach) + { for (const auto & part : parts) { auto metadata_snapshot = getInMemoryMetadataPtr(); - LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("", metadata_snapshot); + String part_dir = part->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } + } auto future_parts = initCoverageWithNewEmptyParts(parts); @@ -1868,9 +1909,8 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont fmt::join(getPartsNames(future_parts), ", "), fmt::join(getPartsNames(parts), ", "), transaction.getTID()); - captureTmpDirectoryHolders(*this, future_parts); - auto new_data_parts = createEmptyDataParts(*this, future_parts, txn); + auto [new_data_parts, tmp_dir_holders] = createEmptyDataParts(*this, future_parts, txn); renameAndCommitEmptyParts(new_data_parts, transaction); PartLog::addNewParts(query_context, PartLog::createPartLogEntries(new_data_parts, watch.elapsed(), profile_events_scope.getSnapshot())); @@ -1888,6 +1928,35 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont clearEmptyParts(); } +void StorageMergeTree::dropPartsImpl(DataPartsVector && parts_to_remove, bool detach) +{ + auto metadata_snapshot = getInMemoryMetadataPtr(); + + if (detach) + { + /// If DETACH clone parts to detached/ directory + /// NOTE: no race with background cleanup until we hold pointers to parts + for (const auto & part : parts_to_remove) + { + String part_dir = part->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); + } + } + + if (deduplication_log) + { + for (const auto & part : parts_to_remove) + deduplication_log->dropPart(part->info); + } + + if (detach) + LOG_INFO(log, "Detached {} parts: [{}]", parts_to_remove.size(), fmt::join(getPartsNames(parts_to_remove), ", ")); + else + LOG_INFO(log, "Removed {} parts: [{}]", parts_to_remove.size(), fmt::join(getPartsNames(parts_to_remove), ", ")); +} + PartitionCommandsResultInfo StorageMergeTree::attachPartition( const ASTPtr & partition, const StorageMetadataPtr & /* metadata_snapshot */, bool attach_part, ContextPtr local_context) @@ -1928,7 +1997,7 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( } /// New parts with other data may appear in place of deleted parts. - local_context->dropCaches(); + local_context->clearCaches(); return results; } @@ -2180,6 +2249,7 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ void StorageMergeTree::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) { const auto & backup_settings = backup_entries_collector.getBackupSettings(); + const auto & read_settings = backup_entries_collector.getReadSettings(); auto local_context = backup_entries_collector.getContext(); DataPartsVector data_parts; @@ -2192,7 +2262,7 @@ void StorageMergeTree::backupData(BackupEntriesCollector & backup_entries_collec for (const auto & data_part : data_parts) min_data_version = std::min(min_data_version, data_part->info.getDataVersion() + 1); - auto parts_backup_entries = backupParts(data_parts, data_path_in_backup, backup_settings, local_context); + auto parts_backup_entries = backupParts(data_parts, data_path_in_backup, backup_settings, read_settings, local_context); for (auto & part_backup_entries : parts_backup_entries) backup_entries_collector.addBackupEntries(std::move(part_backup_entries.backup_entries)); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 8099f9c16aa..c77e5140d75 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -176,7 +176,7 @@ private: const Names & deduplicate_by_columns, bool cleanup, const MergeTreeTransactionPtr & txn, - String * out_disable_reason = nullptr, + String & out_disable_reason, bool optimize_skip_merged_partitions = false); void renameAndCommitEmptyParts(MutableDataPartsVector & new_parts, Transaction & transaction); @@ -191,7 +191,7 @@ private: /// and into in-memory structures. Wake up merge-mutation task. Int64 startMutation(const MutationCommands & commands, ContextPtr query_context); /// Wait until mutation with version will finish mutation for all parts - void waitForMutation(Int64 version, bool wait_for_another_mutation = false); + void waitForMutation(Int64 version, bool wait_for_another_mutation); void waitForMutation(const String & mutation_id, bool wait_for_another_mutation) override; void waitForMutation(Int64 version, const String & mutation_id, bool wait_for_another_mutation = false); void setMutationCSN(const String & mutation_id, CSN csn) override; @@ -203,7 +203,7 @@ private: bool aggressive, const String & partition_id, bool final, - String * disable_reason, + String & disable_reason, TableLockHolder & table_lock_holder, std::unique_lock & lock, const MergeTreeTransactionPtr & txn, @@ -212,7 +212,7 @@ private: MergeMutateSelectedEntryPtr selectPartsToMutate( - const StorageMetadataPtr & metadata_snapshot, String * disable_reason, + const StorageMetadataPtr & metadata_snapshot, String & disable_reason, TableLockHolder & table_lock_holder, std::unique_lock & currently_processing_in_background_mutex_lock); /// For current mutations queue, returns maximum version of mutation for a part, @@ -237,6 +237,7 @@ private: void dropPartNoWaitNoThrow(const String & part_name) override; void dropPart(const String & part_name, bool detach, ContextPtr context) override; void dropPartition(const ASTPtr & partition, bool detach, ContextPtr context) override; + void dropPartsImpl(DataPartsVector && parts_to_remove, bool detach); PartitionCommandsResultInfo attachPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, bool part, ContextPtr context) override; void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, ContextPtr context) override; diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 3287e3272e3..21543541f36 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -19,6 +19,8 @@ #include #include +#include + namespace DB { @@ -127,9 +129,7 @@ public: for (const auto j : collections::range(0, num_cols)) { - WriteBufferFromOwnString ostr; - data_types[j]->getDefaultSerialization()->serializeText(*columns[j], i, ostr, FormatSettings{}); - document->add(data_names[j], ostr.str()); + insertValueIntoMongoDB(*document, data_names[j], *data_types[j], *columns[j], i); } documents.push_back(std::move(document)); @@ -151,6 +151,60 @@ public: } private: + + void insertValueIntoMongoDB( + Poco::MongoDB::Document & document, + const std::string & name, + const IDataType & data_type, + const IColumn & column, + size_t idx) + { + WhichDataType which(data_type); + + if (which.isArray()) + { + const ColumnArray & column_array = assert_cast(column); + const ColumnArray::Offsets & offsets = column_array.getOffsets(); + + size_t offset = offsets[idx - 1]; + size_t next_offset = offsets[idx]; + + const IColumn & nested_column = column_array.getData(); + + const auto * array_type = assert_cast(&data_type); + const DataTypePtr & nested_type = array_type->getNestedType(); + + Poco::MongoDB::Array::Ptr array = new Poco::MongoDB::Array(); + for (size_t i = 0; i + offset < next_offset; ++i) + { + insertValueIntoMongoDB(*array, Poco::NumberFormatter::format(i), *nested_type, nested_column, i + offset); + } + + document.add(name, array); + return; + } + + /// MongoDB does not support UInt64 type, so just cast it to Int64 + if (which.isNativeUInt()) + document.add(name, static_cast(column.getUInt(idx))); + else if (which.isNativeInt()) + document.add(name, static_cast(column.getInt(idx))); + else if (which.isFloat32()) + document.add(name, static_cast(column.getFloat32(idx))); + else if (which.isFloat64()) + document.add(name, static_cast(column.getFloat64(idx))); + else if (which.isDate()) + document.add(name, Poco::Timestamp(DateLUT::instance().fromDayNum(DayNum(column.getUInt(idx))) * 1000000)); + else if (which.isDateTime()) + document.add(name, Poco::Timestamp(column.getUInt(idx) * 1000000)); + else + { + WriteBufferFromOwnString ostr; + data_type.getDefaultSerialization()->serializeText(column, idx, ostr, FormatSettings{}); + document.add(name, ostr.str()); + } + } + String collection_name; String db_name; StorageMetadataPtr metadata_snapshot; diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 3e928c3a811..76a439eabaf 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -34,16 +35,6 @@ namespace ErrorCodes extern const int UNKNOWN_TABLE; } -static String backQuoteMySQL(const String & x) -{ - String res(x.size(), '\0'); - { - WriteBufferFromString wb(res); - writeBackQuotedStringMySQL(x, wb); - } - return res; -} - StorageMySQL::StorageMySQL( const StorageID & table_id_, mysqlxx::PoolWithFailover && pool_, @@ -113,6 +104,7 @@ Pipe StorageMySQL::read( column_names_, storage_snapshot->metadata->getColumns().getOrdinary(), IdentifierQuotingStyle::BackticksMySQL, + LiteralEscapingStyle::Regular, remote_database_name, remote_table_name, context_); diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 3551ee36819..7961c44e844 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -122,7 +122,7 @@ Pipe StoragePostgreSQL::read( query_info_, column_names_, storage_snapshot->metadata->getColumns().getOrdinary(), - IdentifierQuotingStyle::DoubleQuotes, remote_table_schema, remote_table_name, context_); + IdentifierQuotingStyle::DoubleQuotes, LiteralEscapingStyle::PostgreSQL, remote_table_schema, remote_table_name, context_); LOG_TRACE(log, "Query: {}", query); Block sample_block; diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 14b7fc15af2..21ed4b91c62 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -139,7 +139,7 @@ public: void startup() override { getNested()->startup(); } void shutdown() override { getNested()->shutdown(); } - void flush() override { getNested()->flush(); } + void flushAndPrepareForShutdown() override { getNested()->flushAndPrepareForShutdown(); } ActionLock getActionLock(StorageActionBlockType action_type) override { return getNested()->getActionLock(action_type); } @@ -149,7 +149,7 @@ public: return getNested()->mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot); } - CheckResults checkData(const ASTPtr & query , ContextPtr context) override { return getNested()->checkData(query, context); } + CheckResults checkData(const ASTPtr & query, ContextPtr context) override { return getNested()->checkData(query, context); } void checkTableCanBeDropped() const override { getNested()->checkTableCanBeDropped(); } bool storesDataOnDisk() const override { return getNested()->storesDataOnDisk(); } Strings getDataPaths() const override { return getNested()->getDataPaths(); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 6894368841f..62db6d2d7b7 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -185,6 +186,7 @@ namespace ErrorCodes extern const int CHECKSUM_DOESNT_MATCH; extern const int NOT_INITIALIZED; extern const int TOO_LARGE_DISTRIBUTED_DEPTH; + extern const int TABLE_IS_DROPPED; } namespace ActionLocks @@ -195,6 +197,7 @@ namespace ActionLocks extern const StorageActionBlockType ReplicationQueue; extern const StorageActionBlockType PartsTTLMerge; extern const StorageActionBlockType PartsMove; + extern const StorageActionBlockType PullReplicationLog; } @@ -710,7 +713,7 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() { auto res = future.get(); if (res.error != Coordination::Error::ZOK && res.error != Coordination::Error::ZNODEEXISTS) - throw Coordination::Exception(fmt::format("Failed to create new nodes at {}", zookeeper_path), res.error); + throw Coordination::Exception(res.error, "Failed to create new nodes at {}", zookeeper_path); } } @@ -746,7 +749,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr } else if (code != Coordination::Error::ZOK) { - throw Coordination::Exception(code, drop_lock_path); + throw Coordination::Exception::fromPath(code, drop_lock_path); } else { @@ -1037,7 +1040,7 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con code = zookeeper->tryMulti(ops, res); if (code != Coordination::Error::ZOK) LOG_WARNING(logger, "Cannot quickly remove nodes without children: {} (replica: {}). Will remove recursively.", - Coordination::errorMessage(code), remote_replica_path); + code, remote_replica_path); /// And finally remove everything else recursively /// It may left some garbage if replica_path subtree is concurrently modified @@ -1145,7 +1148,7 @@ bool StorageReplicatedMergeTree::removeTableNodesFromZooKeeper(zkutil::ZooKeeper auto code = zookeeper->tryMulti(ops, res); if (code != Coordination::Error::ZOK) LOG_WARNING(logger, "Cannot quickly remove nodes without children: {} (table: {}). Will remove recursively.", - Coordination::errorMessage(code), zookeeper_path); + code, zookeeper_path); Strings children; code = zookeeper->tryGetChildren(zookeeper_path, children); @@ -1385,7 +1388,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) } const UInt64 parts_to_fetch_blocks = std::accumulate(parts_to_fetch.cbegin(), parts_to_fetch.cend(), 0, - [&](UInt64 acc, const String& part_name) + [&](UInt64 acc, const String & part_name) { if (const auto part_info = MergeTreePartInfo::tryParsePartName(part_name, format_version)) return acc + part_info->getBlocksCount(); @@ -1896,7 +1899,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che else if (code == Coordination::Error::ZBADVERSION || code == Coordination::Error::ZNONODE || code == Coordination::Error::ZNODEEXISTS) { LOG_DEBUG(log, "State was changed or isn't expected when trying to mark quorum for part {} as failed. Code: {}", - entry.new_part_name, Coordination::errorMessage(code)); + entry.new_part_name, code); } else throw Coordination::Exception(code); @@ -2094,8 +2097,10 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) { if (auto part_to_detach = part.getPartIfItWasActive()) { - LOG_INFO(log, "Detaching {}", part_to_detach->getDataPartStorage().getPartDirectory()); - part_to_detach->makeCloneInDetached("", metadata_snapshot); + String part_dir = part_to_detach->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + part_to_detach->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } } } @@ -2448,10 +2453,13 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (part_desc->checksum_hex != part_desc->src_table_part->checksums.getTotalChecksumHex()) throw Exception(ErrorCodes::UNFINISHED, "Checksums of {} is suddenly changed", part_desc->src_table_part->name); - bool zero_copy_enabled = dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; + /// Don't do hardlinks in case of zero-copy at any side (defensive programming) + bool source_zero_copy_enabled = dynamic_cast(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; + bool our_zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication; + IDataPartStorage::ClonePartParams clone_params { - .copy_instead_of_hardlink = zero_copy_enabled && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport(), + .copy_instead_of_hardlink = (our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport(), .metadata_version_to_write = metadata_snapshot->getMetadataVersion() }; auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( @@ -2822,7 +2830,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo for (const auto & part : parts_to_remove_from_working_set) { LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("clone", metadata_snapshot); + part->makeCloneInDetached("clone", metadata_snapshot, /*disk_transaction*/ {}); } } @@ -3101,7 +3109,7 @@ void StorageReplicatedMergeTree::cloneReplicaIfNeeded(zkutil::ZooKeeperPtr zooke if (get_is_lost.error != Coordination::Error::ZOK) { - LOG_INFO(log, "Not cloning {}, cannot get '/is_lost': {}", source_replica_name, Coordination::errorMessage(get_is_lost.error)); + LOG_INFO(log, "Not cloning {}, cannot get '/is_lost': {}", source_replica_name, get_is_lost.error); continue; } else if (get_is_lost.data != "0") @@ -3112,12 +3120,12 @@ void StorageReplicatedMergeTree::cloneReplicaIfNeeded(zkutil::ZooKeeperPtr zooke if (get_log_pointer.error != Coordination::Error::ZOK) { - LOG_INFO(log, "Not cloning {}, cannot get '/log_pointer': {}", source_replica_name, Coordination::errorMessage(get_log_pointer.error)); + LOG_INFO(log, "Not cloning {}, cannot get '/log_pointer': {}", source_replica_name, get_log_pointer.error); continue; } if (get_queue.error != Coordination::Error::ZOK) { - LOG_INFO(log, "Not cloning {}, cannot get '/queue': {}", source_replica_name, Coordination::errorMessage(get_queue.error)); + LOG_INFO(log, "Not cloning {}, cannot get '/queue': {}", source_replica_name, get_queue.error); continue; } @@ -3306,7 +3314,7 @@ bool StorageReplicatedMergeTree::scheduleDataProcessingJob(BackgroundJobsAssigne auto job_type = selected_entry->log_entry->type; /// Depending on entry type execute in fetches (small) pool or big merge_mutate pool - if (job_type == LogEntry::GET_PART) + if (job_type == LogEntry::GET_PART || job_type == LogEntry::ATTACH_PART) { assignee.scheduleFetchTask(std::make_shared( [this, selected_entry] () mutable @@ -3464,9 +3472,10 @@ void StorageReplicatedMergeTree::mergeSelectingTask() merge_pred.emplace(queue.getMergePredicate(zookeeper, partitions_to_merge_in)); } + String out_reason; if (can_assign_merge && merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, *merge_pred, - merge_with_ttl_allowed, NO_TRANSACTION_PTR, nullptr, &partitions_to_merge_in) == SelectPartsDecision::SELECTED) + merge_with_ttl_allowed, NO_TRANSACTION_PTR, out_reason, &partitions_to_merge_in) == SelectPartsDecision::SELECTED) { create_result = createLogEntryToMergeParts( zookeeper, @@ -3787,12 +3796,12 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n chassert(!broken_part); chassert(!storage_init); part->was_removed_as_broken = true; - part->makeCloneInDetached("broken", getInMemoryMetadataPtr()); + part->makeCloneInDetached("broken", getInMemoryMetadataPtr(), /*disk_transaction*/ {}); broken_part = part; } else { - part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); + part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr(), /*disk_transaction*/ {}); } detached_parts.push_back(part->name); } @@ -3917,7 +3926,10 @@ void StorageReplicatedMergeTree::startBeingLeader() void StorageReplicatedMergeTree::stopBeingLeader() { if (!is_leader) + { + LOG_TRACE(log, "stopBeingLeader called but we are not a leader already"); return; + } LOG_INFO(log, "Stopped being leader"); is_leader = false; @@ -3974,6 +3986,153 @@ String StorageReplicatedMergeTree::findReplicaHavingPart(const String & part_nam return {}; } +void StorageReplicatedMergeTree::addLastSentPart(const MergeTreePartInfo & info) +{ + { + std::lock_guard lock(last_sent_parts_mutex); + last_sent_parts.emplace_back(info); + static constexpr size_t LAST_SENT_PARS_WINDOW_SIZE = 1000; + while (last_sent_parts.size() > LAST_SENT_PARS_WINDOW_SIZE) + last_sent_parts.pop_front(); + } + + last_sent_parts_cv.notify_all(); +} + +void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(StorageReplicatedMergeTree::ShutdownDeadline shutdown_deadline_) +{ + /// Will be true in case in case of query + if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr) + { + LOG_TRACE(log, "Will not wait for unique parts to be fetched by other replicas because shutdown called from DROP/DETACH query"); + return; + } + + if (!shutdown_called.load()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Called waitForUniquePartsToBeFetchedByOtherReplicas before shutdown, it's a bug"); + + auto settings_ptr = getSettings(); + + auto wait_ms = settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds(); + if (wait_ms == 0) + { + LOG_INFO(log, "Will not wait for unique parts to be fetched by other replicas because wait time is zero"); + return; + } + + if (shutdown_deadline_ <= std::chrono::system_clock::now()) + { + LOG_INFO(log, "Will not wait for unique parts to be fetched by other replicas because shutdown_deadline already passed"); + return; + } + + auto zookeeper = getZooKeeperIfTableShutDown(); + + auto unique_parts_set = findReplicaUniqueParts(replica_name, zookeeper_path, format_version, zookeeper, log); + if (unique_parts_set.empty()) + { + LOG_INFO(log, "Will not wait for unique parts to be fetched because we don't have any unique parts"); + return; + } + else + { + LOG_INFO(log, "Will wait for {} unique parts to be fetched", unique_parts_set.size()); + } + + auto wait_predicate = [&] () -> bool + { + for (auto it = unique_parts_set.begin(); it != unique_parts_set.end();) + { + const auto & part = *it; + + bool found = false; + for (const auto & sent_part : last_sent_parts | std::views::reverse) + { + if (sent_part.contains(part)) + { + LOG_TRACE(log, "Part {} was fetched by some replica", part.getPartNameForLogs()); + found = true; + it = unique_parts_set.erase(it); + break; + } + } + if (!found) + break; + } + return unique_parts_set.empty(); + }; + + std::unique_lock lock(last_sent_parts_mutex); + if (!last_sent_parts_cv.wait_until(lock, shutdown_deadline_, wait_predicate)) + LOG_INFO(log, "Failed to wait for unique parts to be fetched in {} ms, {} parts can be left on this replica", wait_ms, unique_parts_set.size()); + else + LOG_INFO(log, "Successfully waited all the parts"); +} + +std::set StorageReplicatedMergeTree::findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_) +{ + if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica_name_ / "is_active")) + { + LOG_INFO(log_, "Our replica is not active, nobody will try to fetch anything"); + return {}; + } + + Strings replicas = zookeeper_->getChildren(fs::path(zookeeper_path_) / "replicas"); + Strings our_parts; + std::vector data_parts_on_replicas; + for (const String & replica : replicas) + { + if (!zookeeper_->exists(fs::path(zookeeper_path_) / "replicas" / replica / "is_active")) + { + LOG_TRACE(log_, "Replica {} is not active, skipping", replica); + continue; + } + + Strings parts = zookeeper_->getChildren(fs::path(zookeeper_path_) / "replicas" / replica / "parts"); + if (replica == replica_name_) + { + LOG_TRACE(log_, "Our replica parts collected {}", replica); + our_parts = parts; + } + else + { + LOG_TRACE(log_, "Fetching parts for replica {}: [{}]", replica, fmt::join(parts, ", ")); + data_parts_on_replicas.emplace_back(format_version_, parts); + } + } + + if (data_parts_on_replicas.empty()) + { + LOG_TRACE(log_, "Has no active replicas, will no try to wait for fetch"); + return {}; + } + + std::set our_unique_parts; + for (const auto & part : our_parts) + { + bool found = false; + for (const auto & active_parts_set : data_parts_on_replicas) + { + if (!active_parts_set.getContainingPart(part).empty()) + { + found = true; + break; + } + } + + if (!found) + { + LOG_TRACE(log_, "Part not {} found on other replicas", part); + our_unique_parts.emplace(MergeTreePartInfo::fromPartName(part, format_version_)); + } + } + + if (!our_parts.empty() && our_unique_parts.empty()) + LOG_TRACE(log_, "All parts found on replicas"); + + return our_unique_parts; +} + String StorageReplicatedMergeTree::findReplicaHavingCoveringPart(LogEntry & entry, bool active) { auto zookeeper = getZooKeeper(); @@ -4150,7 +4309,7 @@ void StorageReplicatedMergeTree::updateQuorum(const String & part_name, bool is_ continue; } else - throw Coordination::Exception(code, quorum_status_path); + throw Coordination::Exception::fromPath(code, quorum_status_path); } else { @@ -4174,7 +4333,7 @@ void StorageReplicatedMergeTree::updateQuorum(const String & part_name, bool is_ continue; } else - throw Coordination::Exception(code, quorum_status_path); + throw Coordination::Exception::fromPath(code, quorum_status_path); } } } @@ -4184,7 +4343,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) { auto zookeeper = getZooKeeper(); - LOG_DEBUG(log, "Cleaning up last parent node for partition {}", partition_id); + LOG_DEBUG(log, "Cleaning up last part node for partition {}", partition_id); /// The name of the previous part for which the quorum was reached. const String quorum_last_part_path = fs::path(zookeeper_path) / "quorum" / "last_part"; @@ -4205,6 +4364,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) if (!parts_with_quorum.added_parts.contains(partition_id)) { /// There is no information about interested part. + LOG_TEST(log, "There is no information about the partition"); break; } @@ -4222,6 +4382,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) else if (code == Coordination::Error::ZNONODE) { /// Node is deleted. It is impossible, but it is Ok. + LOG_WARNING(log, "The last part node {} was deleted", quorum_last_part_path); break; } else if (code == Coordination::Error::ZBADVERSION) @@ -4230,7 +4391,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) continue; } else - throw Coordination::Exception(code, quorum_last_part_path); + throw Coordination::Exception::fromPath(code, quorum_last_part_path); } } @@ -4633,6 +4794,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::fetchExistsPart( void StorageReplicatedMergeTree::startup() { + LOG_TRACE(log, "Starting up table"); startOutdatedDataPartsLoadingTask(); if (attach_thread) { @@ -4654,6 +4816,8 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) since_metadata_err_incr_readonly_metric = true; CurrentMetrics::add(CurrentMetrics::ReadonlyReplica); } + + LOG_TRACE(log, "No connection to ZooKeeper or no metadata in ZooKeeper, will not startup"); return; } @@ -4688,6 +4852,7 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) if (from_attach_thread) { + LOG_TRACE(log, "Trying to startup table from right now"); /// Try activating replica in current thread. restarting_thread.run(); } @@ -4697,9 +4862,18 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) /// NOTE It does not mean that replication is actually started after receiving this event. /// It only means that an attempt to startup replication was made. /// Table may be still in readonly mode if this attempt failed for any reason. - startup_event.wait(); + while (!startup_event.tryWait(10 * 1000)) + LOG_TRACE(log, "Waiting for RestartingThread to startup table"); } + auto lock = std::unique_lock(flush_and_shutdown_mutex, std::defer_lock); + do + { + if (shutdown_prepared_called.load() || shutdown_called.load()) + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Cannot startup table because it is dropped"); + } + while (!lock.try_lock()); + /// And this is just a callback session_expired_callback_handler = EventNotifier::instance().subscribe(Coordination::Error::ZSESSIONEXPIRED, [this]() { @@ -4740,6 +4914,37 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) } +void StorageReplicatedMergeTree::flushAndPrepareForShutdown() +{ + std::lock_guard lock{flush_and_shutdown_mutex}; + if (shutdown_prepared_called.exchange(true)) + return; + + try + { + auto settings_ptr = getSettings(); + /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. + fetcher.blocker.cancelForever(); + merger_mutator.merges_blocker.cancelForever(); + parts_mover.moves_blocker.cancelForever(); + stopBeingLeader(); + + if (attach_thread) + attach_thread->shutdown(); + + restarting_thread.shutdown(/* part_of_full_shutdown */true); + /// Explicitly set the event, because the restarting thread will not set it again + startup_event.set(); + shutdown_deadline.emplace(std::chrono::system_clock::now() + std::chrono::milliseconds(settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds())); + } + catch (...) + { + /// Don't wait anything in case of improper prepare for shutdown + shutdown_deadline.emplace(std::chrono::system_clock::now()); + throw; + } +} + void StorageReplicatedMergeTree::partialShutdown() { ProfileEvents::increment(ProfileEvents::ReplicaPartialShutdown); @@ -4775,21 +4980,28 @@ void StorageReplicatedMergeTree::shutdown() if (shutdown_called.exchange(true)) return; + flushAndPrepareForShutdown(); + + if (!shutdown_deadline.has_value()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Shutdown deadline is not set in shutdown"); + + try + { + waitForUniquePartsToBeFetchedByOtherReplicas(*shutdown_deadline); + } + catch (const Exception & ex) + { + if (ex.code() == ErrorCodes::LOGICAL_ERROR) + throw; + + tryLogCurrentException(log, __PRETTY_FUNCTION__); + } + session_expired_callback_handler.reset(); stopOutdatedDataPartsLoadingTask(); - /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. - fetcher.blocker.cancelForever(); - merger_mutator.merges_blocker.cancelForever(); - parts_mover.moves_blocker.cancelForever(); - mutations_finalizing_task->deactivate(); - stopBeingLeader(); + partialShutdown(); - if (attach_thread) - attach_thread->shutdown(); - - restarting_thread.shutdown(/* part_of_full_shutdown */true); - background_operations_assignee.finish(); part_moves_between_shards_orchestrator.shutdown(); { @@ -4898,67 +5110,102 @@ void StorageReplicatedMergeTree::read( snapshot_data.alter_conversions = {}; }); - /** The `select_sequential_consistency` setting has two meanings: - * 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas. - * 2. Do not read parts that have not yet been written to the quorum of the replicas. - * For this you have to synchronously go to ZooKeeper. - */ - if (local_context->getSettingsRef().select_sequential_consistency) - { - auto max_added_blocks = std::make_shared(getMaxAddedBlocks()); - if (auto plan = reader.read( - column_names, storage_snapshot, query_info, local_context, - max_block_size, num_streams, processed_stage, std::move(max_added_blocks), /*enable_parallel_reading*/false)) - query_plan = std::move(*plan); - return; - } + const auto & settings = local_context->getSettingsRef(); + + /// The `select_sequential_consistency` setting has two meanings: + /// 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas. + /// 2. Do not read parts that have not yet been written to the quorum of the replicas. + /// For this you have to synchronously go to ZooKeeper. + if (settings.select_sequential_consistency) + return readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); if (local_context->canUseParallelReplicasOnInitiator()) + return readParallelReplicasImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); + + readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); +} + +void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) +{ + auto max_added_blocks = std::make_shared(getMaxAddedBlocks()); + auto plan = reader.read(column_names, storage_snapshot, query_info, local_context, + max_block_size, num_streams, processed_stage, std::move(max_added_blocks), + /* enable_parallel_reading= */false); + if (plan) + query_plan = std::move(*plan); +} + +void StorageReplicatedMergeTree::readParallelReplicasImpl( + QueryPlan & query_plan, + const Names & /*column_names*/, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + const size_t /*max_block_size*/, + const size_t /*num_streams*/) +{ + auto table_id = getStorageID(); + + auto parallel_replicas_cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); + + ASTPtr modified_query_ast; + Block header; + if (local_context->getSettingsRef().allow_experimental_analyzer) { - auto table_id = getStorageID(); + auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree); - ASTPtr modified_query_ast; - - Block header; - - if (local_context->getSettingsRef().allow_experimental_analyzer) - { - auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree); - - header = InterpreterSelectQueryAnalyzer::getSampleBlock( - modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); - modified_query_ast = queryNodeToSelectQuery(modified_query_tree); - } - else - { - modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, - table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); - header - = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); - } - - auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); - - ClusterProxy::SelectStreamFactory select_stream_factory = - ClusterProxy::SelectStreamFactory( - header, - {}, - storage_snapshot, - processed_stage); - - ClusterProxy::executeQueryWithParallelReplicas( - query_plan, getStorageID(), /*remove_table_function_ptr*/ nullptr, - select_stream_factory, modified_query_ast, - local_context, query_info, cluster); + header = InterpreterSelectQueryAnalyzer::getSampleBlock( + modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); + modified_query_ast = queryNodeToSelectQuery(modified_query_tree); } else { - if (auto plan = reader.read( - column_names, storage_snapshot, query_info, - local_context, max_block_size, num_streams, - processed_stage, nullptr, /*enable_parallel_reading*/local_context->canUseParallelReplicasOnFollower())) - query_plan = std::move(*plan); + modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, + table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); + header + = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } + + ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory( + header, + {}, + storage_snapshot, + processed_stage); + + ClusterProxy::executeQueryWithParallelReplicas( + query_plan, getStorageID(), + /* table_func_ptr= */ nullptr, + select_stream_factory, modified_query_ast, + local_context, query_info, parallel_replicas_cluster); +} + +void StorageReplicatedMergeTree::readLocalImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + const size_t max_block_size, + const size_t num_streams) +{ + auto plan = reader.read( + column_names, storage_snapshot, query_info, + local_context, max_block_size, num_streams, + processed_stage, + /* max_block_numbers_to_read= */ nullptr, + /* enable_parallel_reading= */ local_context->canUseParallelReplicasOnFollower()); + if (plan) + query_plan = std::move(*plan); } template @@ -5071,15 +5318,14 @@ std::optional StorageReplicatedMergeTree::distributedWriteFromClu String query_str; { WriteBufferFromOwnString buf; - IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true); - ast_format_settings.always_quote_identifiers = true; + IAST::FormatSettings ast_format_settings(buf, /*one_line*/ true, /*hilite*/ false, /*always_quote_identifiers*/ true); query.IAST::format(ast_format_settings); query_str = buf.str(); } QueryPipeline pipeline; ContextMutablePtr query_context = Context::createCopy(local_context); - ++query_context->getClientInfo().distributed_depth; + query_context->increaseDistributedDepth(); for (const auto & replicas : src_cluster->getShardsAddresses()) { @@ -5230,13 +5476,13 @@ bool StorageReplicatedMergeTree::optimize( { select_decision = merger_mutator.selectPartsToMerge( future_merged_part, /* aggressive */ true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, - can_merge, /* merge_with_ttl_allowed */ false, NO_TRANSACTION_PTR, &disable_reason); + can_merge, /* merge_with_ttl_allowed */ false, NO_TRANSACTION_PTR, disable_reason); } else { select_decision = merger_mutator.selectAllPartsToMergeWithinPartition( future_merged_part, can_merge, partition_id, final, metadata_snapshot, NO_TRANSACTION_PTR, - &disable_reason, query_context->getSettingsRef().optimize_skip_merged_partitions); + disable_reason, query_context->getSettingsRef().optimize_skip_merged_partitions); } /// If there is nothing to merge then we treat this merge as successful (needed for optimize final optimization) @@ -5644,7 +5890,7 @@ void StorageReplicatedMergeTree::alter( } else { - throw Coordination::Exception("Alter cannot be assigned because of Zookeeper error", rc); + throw Coordination::Exception::fromMessage(rc, "Alter cannot be assigned because of Zookeeper error"); } } @@ -5889,8 +6135,9 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition( MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, query_context, renamed_parts); /// TODO Allow to use quorum here. - ReplicatedMergeTreeSink output(*this, metadata_snapshot, 0, 0, 0, false, false, false, query_context, - /*is_attach*/true); + ReplicatedMergeTreeSink output(*this, metadata_snapshot, /* quorum */ 0, /* quorum_timeout_ms */ 0, /* max_parts_per_block */ 0, + /* quorum_parallel */ false, query_context->getSettingsRef().insert_deduplicate, + /* majority_quorum */ false, query_context, /*is_attach*/true); for (size_t i = 0; i < loaded_parts.size(); ++i) { @@ -6129,7 +6376,7 @@ bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry( const auto & stop_waiting = [&]() { - bool stop_waiting_itself = waiting_itself && partial_shutdown_called; + bool stop_waiting_itself = waiting_itself && (partial_shutdown_called || shutdown_prepared_called || shutdown_called); bool timeout_exceeded = check_timeout && wait_for_inactive_timeout < time_waiting.elapsedSeconds(); bool stop_waiting_inactive = (!wait_for_inactive || timeout_exceeded) && !getZooKeeper()->exists(fs::path(table_zookeeper_path) / "replicas" / replica / "is_active"); @@ -6531,7 +6778,7 @@ void StorageReplicatedMergeTree::fetchPartition( try { - /// part name , metadata, part_path , true, 0, zookeeper + /// part name, metadata, part_path, true, 0, zookeeper if (!fetchPart(part_name, metadata_snapshot, from_zookeeper_name, part_path, true, 0, zookeeper, /* try_fetch_shared = */ false)) throw Exception(ErrorCodes::UNFINISHED, "Failed to fetch part {} from {}", part_name, from_); } @@ -6805,7 +7052,7 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, Conte continue; } else - throw Coordination::Exception("Unable to create a mutation znode", rc); + throw Coordination::Exception::fromMessage(rc, "Unable to create a mutation znode"); } merge_selecting_task->schedule(); @@ -7249,7 +7496,7 @@ void StorageReplicatedMergeTree::clearBlocksInPartition( { for (size_t i = 0; i < delete_requests.size(); ++i) if (delete_responses[i]->error != Coordination::Error::ZOK) - LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", delete_requests[i]->getPath(), Coordination::errorMessage(delete_responses[i]->error)); + LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", delete_requests[i]->getPath(), delete_responses[i]->error); } LOG_TRACE(log, "Deleted {} deduplication block IDs in partition ID {}", delete_requests.size(), partition_id); @@ -7585,8 +7832,10 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta UInt64 index = lock->getNumber(); MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); + /// Don't do hardlinks in case of zero-copy at any side (defensive programming) bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication || dynamic_cast(dest_table.get())->getSettings()->allow_remote_fs_zero_copy_replication; + IDataPartStorage::ClonePartParams clone_params { .copy_instead_of_hardlink = zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport(), @@ -7773,7 +8022,7 @@ void StorageReplicatedMergeTree::movePartitionToShard( /// canMergeSinglePart is overlapping with dropPart, let's try to use the same code. String out_reason; - if (!merge_pred.canMergeSinglePart(part, &out_reason)) + if (!merge_pred.canMergeSinglePart(part, out_reason)) throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part is busy, reason: {}", out_reason); } @@ -7926,6 +8175,9 @@ ActionLock StorageReplicatedMergeTree::getActionLock(StorageActionBlockType acti if (action_type == ActionLocks::PartsMove) return parts_mover.moves_blocker.cancel(); + if (action_type == ActionLocks::PullReplicationLog) + return queue.pull_log_blocker.cancel(); + return {}; } @@ -8021,14 +8273,14 @@ bool StorageReplicatedMergeTree::dropPartImpl( /// There isn't a lot we can do otherwise. Can't cancel merges because it is possible that a replica already /// finished the merge. String out_reason; - if (!merge_pred.canMergeSinglePart(part, &out_reason)) + if (!merge_pred.canMergeSinglePart(part, out_reason)) { if (throw_if_noop) throw Exception::createDeprecated(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); return false; } - if (merge_pred.partParticipatesInReplaceRange(part, &out_reason)) + if (merge_pred.partParticipatesInReplaceRange(part, out_reason)) { if (throw_if_noop) throw Exception::createDeprecated(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); @@ -8423,7 +8675,7 @@ void StorageReplicatedMergeTree::createTableSharedID() const } else if (code != Coordination::Error::ZOK) { - throw zkutil::KeeperException(code, zookeeper_table_id_path); + throw zkutil::KeeperException::fromPath(code, zookeeper_table_id_path); } } @@ -8763,7 +9015,7 @@ std::pair> getParentLockedBlobs(const ZooKeeperWith zookeeper_ptr->tryGet(fs::path(zero_copy_part_path_prefix) / part_candidate_info_str, files_not_to_remove_str, nullptr, nullptr, &code); if (code != Coordination::Error::ZOK) { - LOG_TRACE(log, "Cannot get parent files from ZooKeeper on path ({}), error {}", (fs::path(zero_copy_part_path_prefix) / part_candidate_info_str).string(), errorMessage(code)); + LOG_TRACE(log, "Cannot get parent files from ZooKeeper on path ({}), error {}", (fs::path(zero_copy_part_path_prefix) / part_candidate_info_str).string(), code); return {true, std::nullopt}; } @@ -8866,7 +9118,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } else { - throw zkutil::KeeperException(ec, zookeeper_part_replica_node); + throw zkutil::KeeperException::fromPath(ec, zookeeper_part_replica_node); } } @@ -8901,7 +9153,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } else { - throw zkutil::KeeperException(error_code, zookeeper_part_uniq_node); + throw zkutil::KeeperException::fromPath(error_code, zookeeper_part_uniq_node); } @@ -8933,7 +9185,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } else { - throw zkutil::KeeperException(error_code, zookeeper_part_uniq_node); + throw zkutil::KeeperException::fromPath(error_code, zookeeper_part_uniq_node); } } else @@ -9173,8 +9425,7 @@ std::optional StorageReplicatedMergeTree::tryCreateZeroCopyExclusi String zc_zookeeper_path = *getZeroCopyPartPath(part_name, disk); /// Just recursively create ancestors for lock - zookeeper->createAncestors(zc_zookeeper_path); - zookeeper->createIfNotExists(zc_zookeeper_path, ""); + zookeeper->createAncestors(zc_zookeeper_path + "/"); /// Create actual lock ZeroCopyLock lock(zookeeper, zc_zookeeper_path, replica_name); @@ -9261,7 +9512,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP } } - MergeTreeData::MutableDataPartPtr new_data_part = createEmptyPart(new_part_info, partition, lost_part_name, NO_TRANSACTION_PTR); + auto [new_data_part, tmp_dir_holder] = createEmptyPart(new_part_info, partition, lost_part_name, NO_TRANSACTION_PTR); new_data_part->setName(lost_part_name); try @@ -9578,6 +9829,7 @@ void StorageReplicatedMergeTree::backupData( /// because we need to coordinate them with other replicas (other replicas can have better parts). const auto & backup_settings = backup_entries_collector.getBackupSettings(); + const auto & read_settings = backup_entries_collector.getReadSettings(); auto local_context = backup_entries_collector.getContext(); DataPartsVector data_parts; @@ -9586,7 +9838,7 @@ void StorageReplicatedMergeTree::backupData( else data_parts = getVisibleDataPartsVector(local_context); - auto parts_backup_entries = backupParts(data_parts, /* data_path_in_backup */ "", backup_settings, local_context); + auto parts_backup_entries = backupParts(data_parts, /* data_path_in_backup */ "", backup_settings, read_settings, local_context); auto coordination = backup_entries_collector.getBackupCoordination(); String shared_id = getTableSharedID(); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 1a1b3c3b10c..78ef39f032f 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -112,8 +112,35 @@ public: bool need_check_structure); void startup() override; - void shutdown() override; + + /// To many shutdown methods.... + /// + /// Partial shutdown called if we loose connection to zookeeper. + /// Table can also recover after partial shutdown and continue + /// to work. This method can be called regularly. void partialShutdown(); + + /// These two methods are called during final table shutdown (DROP/DETACH/overall server shutdown). + /// The shutdown process is split into two methods to make it more soft and fast. In database shutdown() + /// looks like: + /// for (table : tables) + /// table->flushAndPrepareForShutdown() + /// + /// for (table : tables) + /// table->shutdown() + /// + /// So we stop producing all the parts first for all tables (fast operation). And after we can wait in shutdown() + /// for other replicas to download parts. + /// + /// In flushAndPrepareForShutdown we cancel all part-producing operations: + /// merges, fetches, moves and so on. If it wasn't called before shutdown() -- shutdown() will + /// call it (defensive programming). + void flushAndPrepareForShutdown() override; + /// In shutdown we completely terminate table -- remove + /// is_active node and interserver handler. Also optionally + /// wait until other replicas will download some parts from our replica. + void shutdown() override; + ~StorageReplicatedMergeTree() override; static String getDefaultZooKeeperPath(const Poco::Util::AbstractConfiguration & config); @@ -130,7 +157,7 @@ public: const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, - ContextPtr context, + ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, size_t num_streams) override; @@ -340,6 +367,13 @@ public: /// Get a sequential consistent view of current parts. ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock getMaxAddedBlocks() const; + void addLastSentPart(const MergeTreePartInfo & info); + + /// Wait required amount of milliseconds to give other replicas a chance to + /// download unique parts from our replica + using ShutdownDeadline = std::chrono::time_point; + void waitForUniquePartsToBeFetchedByOtherReplicas(ShutdownDeadline shutdown_deadline); + private: std::atomic_bool are_restoring_replica {false}; @@ -444,9 +478,19 @@ private: Poco::Event partial_shutdown_event {false}; /// Poco::Event::EVENT_MANUALRESET std::atomic shutdown_called {false}; - std::atomic flush_called {false}; + std::atomic shutdown_prepared_called {false}; + std::optional shutdown_deadline; + + /// We call flushAndPrepareForShutdown before acquiring DDLGuard, so we can shutdown a table that is being created right now + mutable std::mutex flush_and_shutdown_mutex; + + + mutable std::mutex last_sent_parts_mutex; + std::condition_variable last_sent_parts_cv; + std::deque last_sent_parts; /// Threads. + /// /// A task that keeps track of the updates in the logs of all replicas and loads them into the queue. bool queue_update_in_progress = false; @@ -513,6 +557,36 @@ private: static std::optional distributedWriteFromClusterStorage(const std::shared_ptr & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context); + void readLocalImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams); + + void readLocalSequentialConsistencyImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams); + + void readParallelReplicasImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams); + template void foreachActiveParts(Func && func, bool select_sequential_consistency) const; @@ -699,6 +773,7 @@ private: */ String findReplicaHavingCoveringPart(LogEntry & entry, bool active); String findReplicaHavingCoveringPart(const String & part_name, bool active, String & found_part_name); + static std::set findReplicaUniqueParts(const String & replica_name_, const String & zookeeper_path_, MergeTreeDataFormatVersion format_version_, zkutil::ZooKeeper::Ptr zookeeper_, Poco::Logger * log_); /** Download the specified part from the specified replica. * If `to_detached`, the part is placed in the `detached` directory. diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 17309e6da73..d8654a5da27 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -42,6 +41,7 @@ #include #include +#include #include #include @@ -148,7 +148,8 @@ public: const Block & virtual_header_, ContextPtr context_, KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_) + const S3Settings::RequestSettings & request_settings_, + std::function file_progress_callback_) : WithContext(context_) , client(client_.clone()) , globbed_uri(globbed_uri_) @@ -158,6 +159,7 @@ public: , request_settings(request_settings_) , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, 1) , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, "ListObjects")) + , file_progress_callback(file_progress_callback_) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); @@ -194,11 +196,6 @@ public: return nextAssumeLocked(); } - size_t getTotalSize() const - { - return total_size.load(std::memory_order_relaxed); - } - ~Impl() { list_objects_pool.wait(); @@ -249,7 +246,7 @@ private: if (!outcome.IsSuccess()) { - throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", + throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", quoteString(request.GetBucket()), quoteString(request.GetPrefix()), backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); } @@ -312,15 +309,19 @@ private: buffer.reserve(block.rows()); for (UInt64 idx : idxs.getData()) { - total_size.fetch_add(temp_buffer[idx].info->size, std::memory_order_relaxed); + if (file_progress_callback) + file_progress_callback(FileProgress(0, temp_buffer[idx].info->size)); buffer.emplace_back(std::move(temp_buffer[idx])); } } else { buffer = std::move(temp_buffer); - for (const auto & [_, info] : buffer) - total_size.fetch_add(info->size, std::memory_order_relaxed); + if (file_progress_callback) + { + for (const auto & [_, info] : buffer) + file_progress_callback(FileProgress(0, info->size)); + } } /// Set iterator only after the whole batch is processed @@ -381,7 +382,7 @@ private: ThreadPool list_objects_pool; ThreadPoolCallbackRunner list_objects_scheduler; std::future outcome_future; - std::atomic total_size = 0; + std::function file_progress_callback; }; StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( @@ -391,8 +392,9 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( const Block & virtual_header, ContextPtr context, KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_) - : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, read_keys_, request_settings_)) + const S3Settings::RequestSettings & request_settings_, + std::function file_progress_callback_) + : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, read_keys_, request_settings_, file_progress_callback_)) { } @@ -401,11 +403,6 @@ StorageS3Source::KeyWithInfo StorageS3Source::DisclosedGlobIterator::next() return pimpl->next(); } -size_t StorageS3Source::DisclosedGlobIterator::getTotalSize() const -{ - return pimpl->getTotalSize(); -} - class StorageS3Source::KeysIterator::Impl : WithContext { public: @@ -418,23 +415,26 @@ public: ASTPtr query_, const Block & virtual_header_, ContextPtr context_, - bool need_total_size, - KeysWithInfo * read_keys_) + KeysWithInfo * read_keys_, + std::function file_progress_callback_) : WithContext(context_) + , keys(keys_) + , client(client_.clone()) + , version_id(version_id_) , bucket(bucket_) + , request_settings(request_settings_) , query(query_) , virtual_header(virtual_header_) + , file_progress_callback(file_progress_callback_) { - Strings all_keys = keys_; - /// Create a virtual block with one row to construct filter - if (query && virtual_header && !all_keys.empty()) + if (query && virtual_header && !keys.empty()) { /// Append "idx" column as the filter result virtual_header.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); auto block = virtual_header.cloneEmpty(); - addPathToVirtualColumns(block, fs::path(bucket) / all_keys.front(), 0); + addPathToVirtualColumns(block, fs::path(bucket) / keys.front(), 0); ASTPtr filter_ast; VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); @@ -442,8 +442,8 @@ public: if (filter_ast) { block = virtual_header.cloneEmpty(); - for (size_t i = 0; i < all_keys.size(); ++i) - addPathToVirtualColumns(block, fs::path(bucket) / all_keys[i], i); + for (size_t i = 0; i < keys.size(); ++i) + addPathToVirtualColumns(block, fs::path(bucket) / keys[i], i); VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); const auto & idxs = typeid_cast(*block.getByName("_idx").column); @@ -451,29 +451,17 @@ public: Strings filtered_keys; filtered_keys.reserve(block.rows()); for (UInt64 idx : idxs.getData()) - filtered_keys.emplace_back(std::move(all_keys[idx])); + filtered_keys.emplace_back(std::move(keys[idx])); - all_keys = std::move(filtered_keys); + keys = std::move(filtered_keys); } } - for (auto && key : all_keys) - { - std::optional info; - /// In case all_keys.size() > 1, avoid getting object info now - /// (it will be done anyway eventually, but with delay and in parallel). - /// But progress bar will not work in this case. - if (need_total_size && all_keys.size() == 1) - { - info = S3::getObjectInfo(client_, bucket, key, version_id_, request_settings_); - total_size += info->size; - } - - keys.emplace_back(std::move(key), std::move(info)); - } - if (read_keys_) - *read_keys_ = keys; + { + for (const auto & key : keys) + read_keys_->push_back({key, {}}); + } } KeyWithInfo next() @@ -481,24 +469,27 @@ public: size_t current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= keys.size()) return {}; + auto key = keys[current_index]; + std::optional info; + if (file_progress_callback) + { + info = S3::getObjectInfo(*client, bucket, key, version_id, request_settings); + file_progress_callback(FileProgress(0, info->size)); + } - return keys[current_index]; - } - - size_t getTotalSize() const - { - return total_size; + return {key, info}; } private: - KeysWithInfo keys; + Strings keys; std::atomic_size_t index = 0; - + std::unique_ptr client; + String version_id; String bucket; + S3Settings::RequestSettings request_settings; ASTPtr query; Block virtual_header; - - size_t total_size = 0; + std::function file_progress_callback; }; StorageS3Source::KeysIterator::KeysIterator( @@ -510,11 +501,11 @@ StorageS3Source::KeysIterator::KeysIterator( ASTPtr query, const Block & virtual_header, ContextPtr context, - bool need_total_size, - KeysWithInfo * read_keys) + KeysWithInfo * read_keys, + std::function file_progress_callback_) : pimpl(std::make_shared( client_, version_id_, keys_, bucket_, request_settings_, - query, virtual_header, context, need_total_size, read_keys)) + query, virtual_header, context, read_keys, file_progress_callback_)) { } @@ -523,27 +514,12 @@ StorageS3Source::KeyWithInfo StorageS3Source::KeysIterator::next() return pimpl->next(); } -size_t StorageS3Source::KeysIterator::getTotalSize() const -{ - return pimpl->getTotalSize(); -} - -Block StorageS3Source::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - StorageS3Source::StorageS3Source( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format_, String name_, - const Block & sample_block_, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, const S3Settings::RequestSettings & request_settings_, String compression_hint_, @@ -552,20 +528,21 @@ StorageS3Source::StorageS3Source( const String & version_id_, std::shared_ptr file_iterator_, const size_t download_thread_num_) - : ISource(getHeader(sample_block_, requested_virtual_columns_)) + : ISource(info.source_header, false) , WithContext(context_) , name(std::move(name_)) , bucket(bucket_) , version_id(version_id_) , format(format_) - , columns_desc(columns_) + , columns_desc(info.columns_description) + , requested_columns(info.requested_columns) , max_block_size(max_block_size_) , request_settings(request_settings_) , compression_hint(std::move(compression_hint_)) , client(client_) - , sample_block(sample_block_) + , sample_block(info.format_header) , format_settings(format_settings_) - , requested_virtual_columns(requested_virtual_columns_) + , requested_virtual_columns(info.requested_virtual_columns) , file_iterator(file_iterator_) , download_thread_num(download_thread_num_) , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, 1) @@ -573,10 +550,7 @@ StorageS3Source::StorageS3Source( { reader = createReader(); if (reader) - { - total_objects_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); reader_future = createReaderAsync(); - } } StorageS3Source::ReaderHolder StorageS3Source::createReader() @@ -611,10 +585,17 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader() { return std::make_shared(header, columns_desc, *input_format, getContext()); }); } + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto current_reader = std::make_unique(*pipeline); - return ReaderHolder{fs::path(bucket) / key_with_info.key, std::move(read_buf), input_format, std::move(pipeline), std::move(current_reader)}; + return ReaderHolder{key_with_info.key, bucket, std::move(read_buf), std::move(input_format), std::move(pipeline), std::move(current_reader)}; } std::future StorageS3Source::createReaderAsync() @@ -713,17 +694,10 @@ Chunk StorageS3Source::generate() if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); + size_t chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); const auto & file_path = reader.getPath(); - - if (num_rows && total_objects_size) - { - size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox(*this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } - for (const auto & virtual_column : requested_virtual_columns) { if (virtual_column.name == "_path") @@ -748,13 +722,6 @@ Chunk StorageS3Source::generate() if (!reader) break; - size_t object_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); - /// Adjust total_rows_approx_accumulated with new total size. - if (total_objects_size) - total_rows_approx_accumulated = static_cast( - std::ceil(static_cast(total_objects_size + object_size) / total_objects_size * total_rows_approx_accumulated)); - total_objects_size += object_size; - /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. create_reader_pool.wait(); @@ -974,6 +941,7 @@ StorageS3::StorageS3( FormatFactory::instance().checkFormatName(configuration.format); context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri); + context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration.headers_from_ast); StorageInMemoryMetadata storage_metadata; if (columns_.empty()) @@ -1004,8 +972,8 @@ std::shared_ptr StorageS3::createFileIterator( ContextPtr local_context, ASTPtr query, const Block & virtual_block, - bool need_total_size, - KeysWithInfo * read_keys) + KeysWithInfo * read_keys, + std::function file_progress_callback) { if (distributed_processing) { @@ -1016,22 +984,17 @@ std::shared_ptr StorageS3::createFileIterator( /// Iterate through disclosed globs and make a source for each file return std::make_shared( *configuration.client, configuration.url, query, virtual_block, - local_context, read_keys, configuration.request_settings); + local_context, read_keys, configuration.request_settings, file_progress_callback); } else { return std::make_shared( *configuration.client, configuration.url.version_id, configuration.keys, configuration.url.bucket, configuration.request_settings, query, - virtual_block, local_context, need_total_size, read_keys); + virtual_block, local_context, read_keys, file_progress_callback); } } -bool StorageS3::supportsSubcolumns() const -{ - return FormatFactory::instance().checkIfFormatSupportsSubcolumns(configuration.format); -} - bool StorageS3::supportsSubsetOfColumns() const { return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); @@ -1063,52 +1026,20 @@ Pipe StorageS3::read( Pipes pipes; - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - std::shared_ptr iterator_wrapper = createFileIterator( - query_configuration, distributed_processing, local_context, query_info.query, virtual_block); + query_configuration, distributed_processing, local_context, query_info.query, virtual_block, nullptr, local_context->getFileProgressCallback()); - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - auto fetch_columns = column_names; - const auto & virtuals = getVirtuals(); - std::erase_if( - fetch_columns, - [&](const String & col) - { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); - - if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); - - columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); const size_t max_download_threads = local_context->getSettingsRef().max_download_threads; for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( - requested_virtual_columns, + read_from_format_info, query_configuration.format, getName(), - block_for_format, local_context, format_settings, - columns_description, max_block_size, query_configuration.request_settings, query_configuration.compression_method, @@ -1225,7 +1156,7 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, if (!response.IsSuccess()) { const auto & err = response.GetError(); - throw Exception(ErrorCodes::S3_ERROR, "{}: {}", std::to_string(static_cast(err.GetErrorType())), err.GetMessage()); + throw S3Exception(err.GetMessage(), err.GetErrorType()); } for (const auto & error : response.GetResult().GetErrors()) @@ -1475,7 +1406,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( { KeysWithInfo read_keys; - auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, false, &read_keys); + auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, &read_keys); std::optional columns_from_cache; size_t prev_read_keys_size = read_keys.size(); diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 8d571dd796f..d329f3d620a 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -21,6 +21,7 @@ #include #include #include +#include namespace Aws::S3 { @@ -56,7 +57,6 @@ public: public: virtual ~IIterator() = default; virtual KeyWithInfo next() = 0; - virtual size_t getTotalSize() const = 0; KeyWithInfo operator ()() { return next(); } }; @@ -71,10 +71,10 @@ public: const Block & virtual_header, ContextPtr context, KeysWithInfo * read_keys_ = nullptr, - const S3Settings::RequestSettings & request_settings_ = {}); + const S3Settings::RequestSettings & request_settings_ = {}, + std::function progress_callback_ = {}); KeyWithInfo next() override; - size_t getTotalSize() const override; private: class Impl; @@ -94,11 +94,10 @@ public: ASTPtr query, const Block & virtual_header, ContextPtr context, - bool need_total_size = true, - KeysWithInfo * read_keys = nullptr); + KeysWithInfo * read_keys = nullptr, + std::function progress_callback_ = {}); KeyWithInfo next() override; - size_t getTotalSize() const override; private: class Impl; @@ -113,22 +112,16 @@ public: KeyWithInfo next() override { return {callback(), {}}; } - size_t getTotalSize() const override { return 0; } - private: ReadTaskCallback callback; }; - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - StorageS3Source( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, const String & format, String name_, - const Block & sample_block, ContextPtr context_, std::optional format_settings_, - const ColumnsDescription & columns_, UInt64 max_block_size_, const S3Settings::RequestSettings & request_settings_, String compression_hint_, @@ -145,11 +138,14 @@ public: Chunk generate() override; private: + friend class StorageS3QueueSource; + String name; String bucket; String version_id; String format; ColumnsDescription columns_desc; + NamesAndTypesList requested_columns; UInt64 max_block_size; S3Settings::RequestSettings request_settings; String compression_hint; @@ -161,14 +157,16 @@ private: { public: ReaderHolder( - String path_, + String key_, + String bucket_, std::unique_ptr read_buf_, std::shared_ptr input_format_, std::unique_ptr pipeline_, std::unique_ptr reader_) - : path(std::move(path_)) + : key(std::move(key_)) + , bucket(std::move(bucket_)) , read_buf(std::move(read_buf_)) - , input_format(input_format_) + , input_format(std::move(input_format_)) , pipeline(std::move(pipeline_)) , reader(std::move(reader_)) { @@ -191,21 +189,22 @@ private: pipeline = std::move(other.pipeline); input_format = std::move(other.input_format); read_buf = std::move(other.read_buf); - path = std::move(other.path); + key = std::move(other.key); + bucket = std::move(other.bucket); return *this; } - const std::unique_ptr & getReadBuffer() const { return read_buf; } - - const std::shared_ptr & getFormat() const { return input_format; } - explicit operator bool() const { return reader != nullptr; } PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } - const String & getPath() const { return path; } + String getPath() const { return fs::path(bucket) / key; } + const String & getFile() const { return key; } + + const IInputFormat * getInputFormat() const { return input_format.get(); } private: - String path; + String key; + String bucket; std::unique_ptr read_buf; std::shared_ptr input_format; std::unique_ptr pipeline; @@ -214,7 +213,7 @@ private: ReaderHolder reader; - std::vector requested_virtual_columns; + NamesAndTypesList requested_virtual_columns; std::shared_ptr file_iterator; size_t download_thread_num = 1; @@ -224,11 +223,6 @@ private: ThreadPoolCallbackRunner create_reader_scheduler; std::future reader_future; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; - size_t total_objects_size = 0; - /// Recreate ReadBuffer and Pipeline for each file. ReaderHolder createReader(); std::future createReaderAsync(); @@ -334,6 +328,7 @@ protected: private: friend class StorageS3Cluster; friend class TableFunctionS3Cluster; + friend class StorageS3Queue; Configuration configuration; std::mutex configuration_update_mutex; @@ -353,15 +348,15 @@ private: ContextPtr local_context, ASTPtr query, const Block & virtual_block, - bool need_total_size = true, - KeysWithInfo * read_keys = nullptr); + KeysWithInfo * read_keys = nullptr, + std::function progress_callback = {}); static ColumnsDescription getTableStructureFromDataImpl( const Configuration & configuration, const std::optional & format_settings, ContextPtr ctx); - bool supportsSubcolumns() const override; + bool supportsSubcolumns() const override { return true; } bool supportsSubsetOfColumns() const override; diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 153a3b7f11b..e1ee4466c25 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -44,6 +44,8 @@ StorageS3Cluster::StorageS3Cluster( , s3_configuration{configuration_} { context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri); + context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration_.headers_from_ast); + StorageInMemoryMetadata storage_metadata; updateConfigurationIfChanged(context_); @@ -86,7 +88,7 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context) RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const { auto iterator = std::make_shared( - *s3_configuration.client, s3_configuration.url, query, virtual_block, context); + *s3_configuration.client, s3_configuration.url, query, virtual_block, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { return iterator->next().key; }); return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; } diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index 5c2229875e5..4b4558f6330 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -36,6 +36,8 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + bool supportsSubcolumns() const override { return true; } + protected: void updateConfigurationIfChanged(ContextPtr local_context); diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 89e6ee46b4d..0dc8d8d897b 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -182,6 +182,7 @@ S3Settings::RequestSettings::RequestSettings(const NamedCollection & collection) max_single_read_retries = collection.getOrDefault("max_single_read_retries", max_single_read_retries); max_connections = collection.getOrDefault("max_connections", max_connections); list_object_keys_size = collection.getOrDefault("list_object_keys_size", list_object_keys_size); + allow_native_copy = collection.getOrDefault("allow_native_copy", allow_native_copy); throw_on_zero_files_match = collection.getOrDefault("throw_on_zero_files_match", throw_on_zero_files_match); } @@ -197,6 +198,7 @@ S3Settings::RequestSettings::RequestSettings( max_connections = config.getUInt64(key + "max_connections", settings.s3_max_connections); check_objects_after_upload = config.getBool(key + "check_objects_after_upload", settings.s3_check_objects_after_upload); list_object_keys_size = config.getUInt64(key + "list_object_keys_size", settings.s3_list_object_keys_size); + allow_native_copy = config.getBool(key + "allow_native_copy", allow_native_copy); throw_on_zero_files_match = config.getBool(key + "throw_on_zero_files_match", settings.s3_throw_on_zero_files_match); retry_attempts = config.getUInt64(key + "retry_attempts", settings.s3_retry_attempts); request_timeout_ms = config.getUInt64(key + "request_timeout_ms", settings.s3_request_timeout_ms); diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 991e323acb6..e3d577ca0b3 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -71,11 +71,14 @@ struct S3Settings size_t retry_attempts = 10; size_t request_timeout_ms = 3000; size_t long_request_timeout_ms = 30000; // TODO: Take this from config like request_timeout_ms + bool allow_native_copy = true; bool throw_on_zero_files_match = false; const PartUploadSettings & getUploadSettings() const { return upload_settings; } + void setStorageClassName(const String & storage_class_name) { upload_settings.storage_class_name = storage_class_name; } + RequestSettings() = default; explicit RequestSettings(const Settings & settings); explicit RequestSettings(const NamedCollection & collection); diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index d5ae6f2383f..d5db5763da9 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -91,6 +91,7 @@ Pipe StorageSQLite::read( column_names, storage_snapshot->metadata->getColumns().getOrdinary(), IdentifierQuotingStyle::DoubleQuotes, + LiteralEscapingStyle::Regular, "", remote_table_name, context_); diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 31770c9a32b..c0e85900794 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -113,22 +113,15 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co return *column; } -Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names, const NameToNameMap & parameter_values) const +Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const { Block res; const auto & columns = getMetadataForQuery()->getColumns(); for (const auto & column_name : column_names) { - std::string substituted_column_name = column_name; - - /// substituted_column_name is used for parameterized view (which are created using query parameters - /// and SELECT is used with substitution of these query parameters ) - if (!parameter_values.empty()) - substituted_column_name = StorageView::replaceValueWithQueryParameter(column_name, parameter_values); - - auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); - auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); + auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, column_name); + auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, column_name); if (column && !object_column) { res.insert({column->type->createColumn(), column->type, column_name}); @@ -147,7 +140,7 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names, cons else { throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, - "Column {} not found in table {}", backQuote(substituted_column_name), storage.getStorageID().getNameForLogs()); + "Column {} not found in table {}", backQuote(column_name), storage.getStorageID().getNameForLogs()); } } return res; diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index 946e8a98cf2..a69f9b95955 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -71,7 +71,7 @@ struct StorageSnapshot NameAndTypePair getColumn(const GetColumnsOptions & options, const String & column_name) const; /// Block with ordinary + materialized + aliases + virtuals + subcolumns. - Block getSampleBlockForColumns(const Names & column_names, const NameToNameMap & parameter_values = {}) const; + Block getSampleBlockForColumns(const Names & column_names) const; ColumnsDescription getDescriptionForColumns(const Names & column_names) const; diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index d8bbd523cbf..83336cbd22e 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -426,7 +426,7 @@ void StorageStripeLog::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont num_indices_saved = 0; total_rows = 0; total_bytes = 0; - getContext()->dropMMappedFileCache(); + getContext()->clearMMappedFileCache(); } @@ -544,9 +544,10 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec fs::path data_path_in_backup_fs = data_path_in_backup; auto temp_dir_owner = std::make_shared(disk, "tmp/"); - fs::path temp_dir = temp_dir_owner->getPath(); + fs::path temp_dir = temp_dir_owner->getRelativePath(); disk->createDirectories(temp_dir); + const auto & read_settings = backup_entries_collector.getReadSettings(); bool copy_encrypted = !backup_entries_collector.getBackupSettings().decrypt_files_from_encrypted_disks; /// data.bin @@ -576,7 +577,7 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec /// sizes.json String files_info_path = file_checker.getPath(); backup_entries_collector.addBackupEntry( - data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, copy_encrypted)); + data_path_in_backup_fs / fileName(files_info_path), std::make_unique(disk, files_info_path, read_settings, copy_encrypted)); /// columns.txt backup_entries_collector.addBackupEntry( diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 26cbe1f0233..c20898dd31e 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -79,11 +79,11 @@ public: nested->shutdown(); } - void flush() override + void flushAndPrepareForShutdown() override { std::lock_guard lock{nested_mutex}; if (nested) - nested->flush(); + nested->flushAndPrepareForShutdown(); } void drop() override @@ -156,7 +156,7 @@ public: void checkTableCanBeDropped() const override {} private: - mutable std::mutex nested_mutex; + mutable std::recursive_mutex nested_mutex; mutable GetNestedStorageFunc get_nested; mutable StoragePtr nested; const bool add_conversion; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index e6953afe68e..8f00efebd36 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -1,9 +1,7 @@ #include -#include #include #include #include -#include #include #include @@ -23,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -209,25 +209,15 @@ void StorageURLSource::setCredentials(Poco::Net::HTTPBasicCredentials & credenti } } -Block StorageURLSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) -{ - for (const auto & virtual_column : requested_virtual_columns) - sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); - - return sample_block; -} - StorageURLSource::StorageURLSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, std::shared_ptr uri_iterator_, const std::string & http_method, std::function callback, const String & format, const std::optional & format_settings, String name_, - const Block & sample_block, ContextPtr context, - const ColumnsDescription & columns, UInt64 max_block_size, const ConnectionTimeouts & timeouts, CompressionMethod compression_method, @@ -235,7 +225,13 @@ StorageURLSource::StorageURLSource( const HTTPHeaderEntries & headers_, const URIParams & params, bool glob_url) - : ISource(getHeader(sample_block, requested_virtual_columns_)), name(std::move(name_)), requested_virtual_columns(requested_virtual_columns_), uri_iterator(uri_iterator_) + : ISource(info.source_header, false) + , name(std::move(name_)) + , columns_description(info.columns_description) + , requested_columns(info.requested_columns) + , requested_virtual_columns(info.requested_virtual_columns) + , block_for_format(info.format_header) + , uri_iterator(uri_iterator_) { auto headers = getHeaders(headers_); @@ -271,29 +267,18 @@ StorageURLSource::StorageURLSource( curr_uri = uri_and_buf.first; read_buf = std::move(uri_and_buf.second); - size_t file_size = 0; - try + if (auto file_progress_callback = context->getFileProgressCallback()) { - file_size = getFileSizeFromReadBuffer(*read_buf); - } - catch (...) - { - // we simply continue without updating total_size - } - - if (file_size) - { - /// Adjust total_rows_approx_accumulated with new total size. - if (total_size) - total_rows_approx_accumulated = static_cast(std::ceil(static_cast(total_size + file_size) / total_size * total_rows_approx_accumulated)); - total_size += file_size; + size_t file_size = tryGetFileSizeFromReadBuffer(*read_buf).value_or(0); + LOG_DEBUG(&Poco::Logger::get("URL"), "Send file size {}", file_size); + file_progress_callback(FileProgress(0, file_size)); } // TODO: Pass max_parsing_threads and max_download_threads adjusted for num_streams. input_format = FormatFactory::instance().getInput( format, *read_buf, - sample_block, + block_for_format, context, max_block_size, format_settings, @@ -305,8 +290,20 @@ StorageURLSource::StorageURLSource( QueryPipelineBuilder builder; builder.init(Pipe(input_format)); - builder.addSimpleTransform([&](const Block & cur_header) - { return std::make_shared(cur_header, columns, *input_format, context); }); + if (columns_description.hasDefaults()) + { + builder.addSimpleTransform([&](const Block & cur_header) + { + return std::make_shared(cur_header, columns_description, *input_format, context); + }); + } + + /// Add ExtractColumnsTransform to extract requested columns/subcolumns + /// from chunk read by IInputFormat. + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, requested_columns); + }); pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); @@ -332,14 +329,8 @@ Chunk StorageURLSource::generate() if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); - if (num_rows && total_size) - { - size_t chunk_size = input_format->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } + size_t chunk_size = input_format->getApproxBytesReadForChunk(); + progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); const String & path{curr_uri.getPath()}; @@ -389,7 +380,7 @@ std::pair> StorageURLSource: for (; option != end; ++option) { bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end); - auto request_uri = Poco::URI(*option); + auto request_uri = Poco::URI(*option, context->getSettingsRef().enable_url_encoding); for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); @@ -713,27 +704,6 @@ Pipe IStorageURLBase::read( { auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - columns_description = storage_snapshot->getDescriptionForColumns(column_names); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - - std::unordered_set column_names_set(column_names.begin(), column_names.end()); - std::vector requested_virtual_columns; - for (const auto & virtual_column : getVirtuals()) - { - if (column_names_set.contains(virtual_column.name)) - requested_virtual_columns.push_back(virtual_column); - } - size_t max_download_threads = local_context->getSettingsRef().max_download_threads; std::shared_ptr iterator_wrapper{nullptr}; @@ -777,6 +747,8 @@ Pipe IStorageURLBase::read( num_streams = 1; } + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); + Pipes pipes; pipes.reserve(num_streams); @@ -784,16 +756,20 @@ Pipe IStorageURLBase::read( for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( - requested_virtual_columns, + read_from_format_info, iterator_wrapper, getReadMethod(), - getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), + getReadPOSTDataCallback( + read_from_format_info.columns_description.getNamesOfPhysical(), + read_from_format_info.columns_description, + query_info, + local_context, + processed_stage, + max_block_size), format_name, format_settings, getName(), - block_for_format, local_context, - columns_description, max_block_size, getHTTPTimeouts(local_context), compression_method, @@ -816,19 +792,6 @@ Pipe StorageURLWithFailover::read( size_t max_block_size, size_t /*num_streams*/) { - ColumnsDescription columns_description; - Block block_for_format; - if (supportsSubsetOfColumns()) - { - columns_description = storage_snapshot->getDescriptionForColumns(column_names); - block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - } - else - { - columns_description = storage_snapshot->metadata->getColumns(); - block_for_format = storage_snapshot->metadata->getSampleBlock(); - } - auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); auto iterator_wrapper = std::make_shared([&, done = false]() mutable @@ -839,17 +802,17 @@ Pipe StorageURLWithFailover::read( return uri_options; }); + auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(), getVirtuals()); + auto pipe = Pipe(std::make_shared( - std::vector{}, + read_from_format_info, iterator_wrapper, getReadMethod(), - getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), + getReadPOSTDataCallback(read_from_format_info.columns_description.getNamesOfPhysical(), read_from_format_info.columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), - block_for_format, local_context, - columns_description, max_block_size, getHTTPTimeouts(local_context), compression_method, @@ -1019,6 +982,7 @@ StorageURL::StorageURL( distributed_processing_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); + context_->getHTTPHeaderFilter().checkHeaders(headers); } diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 345f813dd7c..607d0842c40 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB @@ -158,16 +159,14 @@ public: using IteratorWrapper = std::function; StorageURLSource( - const std::vector & requested_virtual_columns_, + const ReadFromFormatInfo & info, std::shared_ptr uri_iterator_, const std::string & http_method, std::function callback, const String & format, const std::optional & format_settings, String name_, - const Block & sample_block, ContextPtr context, - const ColumnsDescription & columns, UInt64 max_block_size, const ConnectionTimeouts & timeouts, CompressionMethod compression_method, @@ -182,8 +181,6 @@ public: static void setCredentials(Poco::Net::HTTPBasicCredentials & credentials, const Poco::URI & request_uri); - static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - static std::pair> getFirstAvailableURIAndReadBuffer( std::vector::const_iterator & option, const std::vector::const_iterator & end, @@ -202,7 +199,10 @@ private: InitializeFunc initialize; String name; - std::vector requested_virtual_columns; + ColumnsDescription columns_description; + NamesAndTypesList requested_columns; + NamesAndTypesList requested_virtual_columns; + Block block_for_format; std::shared_ptr uri_iterator; Poco::URI curr_uri; @@ -212,11 +212,6 @@ private: std::unique_ptr reader; Poco::Net::HTTPBasicCredentials credentials; - - size_t total_size = 0; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; }; class StorageURLSink : public SinkToStorage @@ -276,6 +271,8 @@ public: return storage_snapshot->metadata->getSampleBlock(); } + bool supportsSubcolumns() const override { return true; } + static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args); struct Configuration : public StatelessTableEngineConfiguration diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp index f652a40a561..8804afb7af2 100644 --- a/src/Storages/StorageURLCluster.cpp +++ b/src/Storages/StorageURLCluster.cpp @@ -48,6 +48,7 @@ StorageURLCluster::StorageURLCluster( , uri(uri_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); + context_->getHTTPHeaderFilter().checkHeaders(configuration_.headers); StorageInMemoryMetadata storage_metadata; diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h index 67771416771..bd475d78f65 100644 --- a/src/Storages/StorageURLCluster.h +++ b/src/Storages/StorageURLCluster.h @@ -36,6 +36,8 @@ public: RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + bool supportsSubcolumns() const override { return true; } + private: void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 470def7e197..f0f9b9540de 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -107,7 +107,8 @@ StorageView::StorageView( const StorageID & table_id_, const ASTCreateQuery & query, const ColumnsDescription & columns_, - const String & comment) + const String & comment, + const bool is_parameterized_view_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; @@ -123,8 +124,7 @@ StorageView::StorageView( NormalizeSelectWithUnionQueryVisitor::Data data{SetOperationMode::Unspecified}; NormalizeSelectWithUnionQueryVisitor{data}.visit(description.inner_query); - is_parameterized_view = query.isParameterizedView(); - view_parameter_types = analyzeReceiveQueryParamsWithType(description.inner_query); + is_parameterized_view = is_parameterized_view_ || query.isParameterizedView(); storage_metadata.setSelectQuery(description); setInMemoryMetadata(storage_metadata); } @@ -173,7 +173,7 @@ void StorageView::read( query_plan.addStep(std::move(materializing)); /// And also convert to expected structure. - const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names, query_info.parameterized_view_values); + const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names); const auto & header = query_plan.getCurrentDataStream().header; const auto * select_with_union = current_inner_query->as(); @@ -258,42 +258,6 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ child = view_query; } -String StorageView::replaceQueryParameterWithValue(const String & column_name, const NameToNameMap & parameter_values, const NameToNameMap & parameter_types) -{ - std::string name = column_name; - std::string::size_type pos = 0u; - for (const auto & parameter : parameter_values) - { - if ((pos = name.find(parameter.first)) != std::string::npos) - { - auto parameter_datatype_iterator = parameter_types.find(parameter.first); - size_t parameter_end = pos + parameter.first.size(); - if (parameter_datatype_iterator != parameter_types.end() && name.size() >= parameter_end && (name[parameter_end] == ',' || name[parameter_end] == ')')) - { - String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')"); - name.replace(pos, parameter.first.size(), parameter_name); - break; - } - } - } - return name; -} - -String StorageView::replaceValueWithQueryParameter(const String & column_name, const NameToNameMap & parameter_values) -{ - String name = column_name; - std::string::size_type pos = 0u; - for (const auto & parameter : parameter_values) - { - if ((pos = name.find("_CAST(" + parameter.second)) != std::string::npos) - { - name = name.substr(0,pos) + parameter.first + ")"; - break; - } - } - return name; -} - ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name) { ASTTableExpression * table_expression = getFirstTableExpression(select_query); diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index bebecb79ec0..b8bf5585c0f 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -15,7 +15,8 @@ public: const StorageID & table_id_, const ASTCreateQuery & query, const ColumnsDescription & columns_, - const String & comment); + const String & comment, + const bool is_parameterized_view_=false); std::string getName() const override { return "View"; } bool isView() const override { return true; } @@ -44,17 +45,9 @@ public: static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, const bool parameterized_view); static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name); - static String replaceQueryParameterWithValue (const String & column_name, const NameToNameMap & parameter_values, const NameToNameMap & parameter_types); - static String replaceValueWithQueryParameter (const String & column_name, const NameToNameMap & parameter_values); - - const NameToNameMap & getParameterTypes() const - { - return view_parameter_types; - } protected: bool is_parameterized_view; - NameToNameMap view_parameter_types; }; } diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index b532d1c91f0..1715cde9d1e 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -79,6 +79,7 @@ std::function StorageXDBC::getReadPOSTDataCallback( column_names, columns_description.getOrdinary(), bridge_helper->getIdentifierQuotingStyle(), + LiteralEscapingStyle::Regular, remote_database_name, remote_table_name, local_context); diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index 1d2a3de5101..c3a2e726365 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -30,7 +30,6 @@ endif() add_dependencies(generate-source generate-contributors) set(GENERATED_LICENSES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemLicenses.generated.cpp") -set(GENERATED_TIMEZONES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemTimeZones.generated.cpp") add_custom_command( OUTPUT StorageSystemLicenses.generated.cpp @@ -38,23 +37,13 @@ add_custom_command( WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC}) -list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC}) # Overlength strings set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) -clickhouse_embed_binaries( - TARGET information_schema_metadata - RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/" - RESOURCES schemata.sql tables.sql views.sql columns.sql -) - list (SORT storages_system_sources) # Reproducible build add_library(clickhouse_storages_system ${storages_system_sources}) -add_dependencies(clickhouse_storages_system information_schema_metadata) - target_link_libraries(clickhouse_storages_system PRIVATE dbms common @@ -62,5 +51,6 @@ target_link_libraries(clickhouse_storages_system PRIVATE clickhouse_common_zookeeper clickhouse_parsers Poco::JSON - INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}" ) + +target_include_directories(clickhouse_storages_system PRIVATE InformationSchema) diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 63b9a443f95..e09b27adf32 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -30,6 +32,8 @@ class IStorageSystemOneBlock : public IStorage protected: virtual void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const = 0; + virtual bool supportsColumnsMask() const { return false; } + public: explicit IStorageSystemOneBlock(const StorageID & table_id_) : IStorage(table_id_) { @@ -48,8 +52,15 @@ public: size_t /*num_streams*/) override { storage_snapshot->check(column_names); - Block sample_block = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + + if (supportsColumnsMask()) + { + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); + query_info.columns_mask = std::move(columns_mask); + sample_block = std::move(header); + } + MutableColumns res_columns = sample_block.cloneEmptyColumns(); fillData(res_columns, context, query_info); diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index 4e7a25d7726..796b134ba56 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -8,7 +8,6 @@ const char * auto_config_build[] "SYSTEM", "@CMAKE_SYSTEM_NAME@", "VERSION_GITHASH", "@VERSION_GITHASH@", "VERSION_REVISION", "@VERSION_REVISION@", - "VERSION_DATE", "@VERSION_DATE@", "BUILD_TYPE", "@CMAKE_BUILD_TYPE@", "SYSTEM_PROCESSOR", "@CMAKE_SYSTEM_PROCESSOR@", "CMAKE_VERSION", "@CMAKE_VERSION@", diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index e4ca6a15138..b76ad07abeb 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -315,23 +316,9 @@ Pipe StorageSystemColumns::read( const size_t /*num_streams*/) { storage_snapshot->check(column_names); - - /// Create a mask of what columns are needed in the result. - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = storage_snapshot->metadata->getSampleBlock(); - Block header; - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample_block.getByPosition(i)); - } - } + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); Block block_to_filter; Storages storages; diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f84c554afc0..031c7454ab6 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -42,10 +42,12 @@ const char * auto_contributors[] { "Alex", "Alex Bocharov", "Alex Cao", + "Alex Cheng", "Alex Karo", "Alex Krash", "Alex Ryndin", "Alex Zatelepin", + "AlexBykovski", "Alexander Avdonkin", "Alexander Bezpiatov", "Alexander Burmak", @@ -232,6 +234,7 @@ const char * auto_contributors[] { "CheSema", "Chebarykov Pavel", "Chen Yufei", + "Chen768959", "Cheng Pan", "Chienlung Cheung", "Christian", @@ -485,6 +488,7 @@ const char * auto_contributors[] { "John", "John Hummel", "John Skopis", + "John Spurlock", "Jonatas Freitas", "Jonathan-Ackerman", "Jordi", @@ -659,6 +663,7 @@ const char * auto_contributors[] { "Mikhail Gaidamaka", "Mikhail Guzov", "Mikhail Korotov", + "Mikhail Koviazin", "Mikhail Malafeev", "Mikhail Nacharov", "Mikhail Salosin", @@ -815,6 +820,7 @@ const char * auto_contributors[] { "Roman Vasin", "Roman Vlasenko", "Roman Zhukov", + "Rory Crispin", "Roy Bellingan", "Ruslan", "Ruslan Savchenko", @@ -832,7 +838,9 @@ const char * auto_contributors[] { "Salvatore Mesoraca", "Sami Kerola", "Samuel Chou", + "Samuel Colvin", "San", + "Sanjam Panda", "Saulius Valatka", "Sean Haynes", "Sean Lafferty", @@ -883,6 +891,7 @@ const char * auto_contributors[] { "SmitaRKulkarni", "Snow", "Sofia Antipushina", + "Song Liyong", "Sorck", "Stanislav Dobrovolschii", "Stanislav Pavlovichev", @@ -893,6 +902,7 @@ const char * auto_contributors[] { "Stepan Herold", "Stephan", "Steve-金勇", + "StianBerger", "Stig Bakken", "Storozhuk Kostiantyn", "Stupnikov Andrey", @@ -977,6 +987,7 @@ const char * auto_contributors[] { "Vitaliy Karnienko", "Vitaliy Kozlovskiy", "Vitaliy Lyudvichenko", + "Vitaliy Pashkov", "Vitaliy Zakaznikov", "Vitaly", "Vitaly Artemyev", @@ -1029,6 +1040,7 @@ const char * auto_contributors[] { "Yakov Olkhovskiy", "YalalovSM", "Yangkuan Liu", + "Yarik Briukhovetskyi", "Yatian Xu", "Yatsishin Ilya", "Yağızcan Değirmenci", @@ -1053,6 +1065,7 @@ const char * auto_contributors[] { "Yury Karpovich", "Yury Stankevich", "Yusuke Tanaka", + "Zach Naimon", "ZhiYong Wang", "Zhichang Yu", "Zhichun Wu", @@ -1143,6 +1156,7 @@ const char * auto_contributors[] { "changvvb", "chasingegg", "chen", + "chen768959", "chen9t", "chengy8934", "chenjian", @@ -1179,6 +1193,7 @@ const char * auto_contributors[] { "detailyang", "dfenelonov", "dgrr", + "dheerajathrey", "dimarub2000", "dinosaur", "divanorama", @@ -1329,6 +1344,7 @@ const char * auto_contributors[] { "lanfz", "larryluogit", "laurieliyang", + "lcjh", "lehasm", "leosunli", "leozhang", @@ -1455,6 +1471,7 @@ const char * auto_contributors[] { "pawelsz-rb", "pdai", "pdv-ru", + "pedro.riera", "pengxiangcai", "peshkurov", "peter279k", @@ -1548,8 +1565,10 @@ const char * auto_contributors[] { "teng.ma", "terrylin", "tesw yew isal", + "therealnick233", "tianzhou", "tiger.yan", + "timfursov", "tison", "topvisor", "tpanetti", @@ -1563,6 +1582,7 @@ const char * auto_contributors[] { "usurai", "vahid-sohrabloo", "vdimir", + "velavokr", "velom", "vesslanjin", "vgocoder", @@ -1587,17 +1607,21 @@ const char * auto_contributors[] { "wuxiaobai24", "wzl", "xPoSx", + "xiao", + "xiaolei565", "xiedeyantu", "xieyichen", "xinhuitian", "xlwh", "xmy", + "xuelei", "yakkomajuri", "yakov-olkhovskiy", "yandd", "yang", "yangshuai", "yaqi-zhao", + "yariks5s", "yeer", "ygrek", "yhgcn", diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp index 67867b6c577..bae7a266dcd 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp @@ -290,7 +290,7 @@ void StorageSystemDDLWorkerQueue::fillData(MutableColumns & res_columns, Context } else { - throw Coordination::Exception(maybe_finished_hosts.error, fs::path(task.entry_path) / "finished"); + throw Coordination::Exception::fromPath(maybe_finished_hosts.error, fs::path(task.entry_path) / "finished"); } /// Process active nodes @@ -322,7 +322,7 @@ void StorageSystemDDLWorkerQueue::fillData(MutableColumns & res_columns, Context } else { - throw Coordination::Exception(maybe_active_hosts.error, fs::path(task.entry_path) / "active"); + throw Coordination::Exception::fromPath(maybe_active_hosts.error, fs::path(task.entry_path) / "active"); } /// Process the rest hosts diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index 2649cf71182..0c4eb197efd 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -185,21 +186,9 @@ Pipe StorageSystemDataSkippingIndices::read( size_t /* num_streams */) { storage_snapshot->check(column_names); - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = storage_snapshot->metadata->getSampleBlock(); - Block header; - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample_block.getByPosition(i)); - } - } + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); MutableColumnPtr column = ColumnString::create(); diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index a3d05281b28..1fa94fab7bf 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include @@ -69,27 +71,69 @@ static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database return engine_full; } -void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +static ColumnPtr getFilteredDatabases(const Databases & databases, const SelectQueryInfo & query_info, ContextPtr context) +{ + MutableColumnPtr name_column = ColumnString::create(); + MutableColumnPtr engine_column = ColumnString::create(); + MutableColumnPtr uuid_column = ColumnUUID::create(); + + for (const auto & [database_name, database] : databases) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; /// We don't want to show the internal database for temporary tables in system.tables + + name_column->insert(database_name); + engine_column->insert(database->getEngineName()); + uuid_column->insert(database->getUUID()); + } + + Block block + { + ColumnWithTypeAndName(std::move(name_column), std::make_shared(), "name"), + ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine"), + ColumnWithTypeAndName(std::move(uuid_column), std::make_shared(), "uuid") + }; + VirtualColumnUtils::filterBlockWithQuery(query_info.query, block, context); + return block.getByPosition(0).column; +} + +void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const { const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_DATABASES); const auto databases = DatabaseCatalog::instance().getDatabases(); - for (const auto & [database_name, database] : databases) + ColumnPtr filtered_databases_column = getFilteredDatabases(databases, query_info, context); + + for (size_t i = 0; i < filtered_databases_column->size(); ++i) { + auto database_name = filtered_databases_column->getDataAt(i).toString(); + if (check_access_for_databases && !access->isGranted(AccessType::SHOW_DATABASES, database_name)) continue; if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) continue; /// filter out the internal database for temporary tables in system.databases, asynchronous metric "NumberOfDatabases" behaves the same way - res_columns[0]->insert(database_name); - res_columns[1]->insert(database->getEngineName()); - res_columns[2]->insert(context->getPath() + database->getDataPath()); - res_columns[3]->insert(database->getMetadataPath()); - res_columns[4]->insert(database->getUUID()); - res_columns[5]->insert(getEngineFull(context, database)); - res_columns[6]->insert(database->getDatabaseComment()); + const auto & database = databases.at(database_name); + + size_t src_index = 0; + size_t res_index = 0; + const auto & columns_mask = query_info.columns_mask; + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database_name); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getEngineName()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(context->getPath() + database->getDataPath()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getMetadataPath()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getUUID()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(getEngineFull(context, database)); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getDatabaseComment()); } } diff --git a/src/Storages/System/StorageSystemDatabases.h b/src/Storages/System/StorageSystemDatabases.h index 37c5f97d497..29dd786ca0a 100644 --- a/src/Storages/System/StorageSystemDatabases.h +++ b/src/Storages/System/StorageSystemDatabases.h @@ -26,6 +26,8 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; + bool supportsColumnsMask() const override { return true; } + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; }; diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 97af4094e42..c5d2ba94e09 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -81,13 +82,11 @@ struct WorkerState class DetachedPartsSource : public ISource { public: - DetachedPartsSource(Block header_, std::shared_ptr state_, std::vector columns_mask_, UInt64 block_size_, - bool has_bytes_on_disk_column_) + DetachedPartsSource(Block header_, std::shared_ptr state_, std::vector columns_mask_, UInt64 block_size_) : ISource(std::move(header_)) , state(state_) , columns_mask(std::move(columns_mask_)) , block_size(block_size_) - , has_bytes_on_disk_column(has_bytes_on_disk_column_) {} String getName() const override { return "DataPartsSource"; } @@ -127,7 +126,6 @@ private: std::shared_ptr state; const std::vector columns_mask; const UInt64 block_size; - const bool has_bytes_on_disk_column; const size_t support_threads = 35; StoragesInfo current_info; @@ -149,9 +147,6 @@ private: void calculatePartSizeOnDisk(size_t begin, std::vector> & parts_sizes) { - if (!has_bytes_on_disk_column) - return; - WorkerState worker_state; for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) @@ -211,7 +206,9 @@ private: auto begin = detached_parts.size() - rows; std::vector> parts_sizes(rows); - calculatePartSizeOnDisk(begin, parts_sizes); + constexpr size_t bytes_on_disk_col_idx = 4; + if (columns_mask[bytes_on_disk_col_idx]) + calculatePartSizeOnDisk(begin, parts_sizes); for (auto p_id = begin; p_id < detached_parts.size(); ++p_id) { @@ -229,7 +226,7 @@ private: new_columns[res_index++]->insert(p.dir_name); if (columns_mask[src_index++]) { - chassert(has_bytes_on_disk_column); + chassert(src_index - 1 == bytes_on_disk_col_idx); size_t bytes_on_disk = parts_sizes.at(p_id - begin).load(); new_columns[res_index++]->insert(bytes_on_disk); } @@ -285,21 +282,7 @@ Pipe StorageSystemDetachedParts::read( storage_snapshot->check(column_names); Block sample_block = storage_snapshot->metadata->getSampleBlock(); - NameSet names_set(column_names.begin(), column_names.end()); - - Block header; - std::vector columns_mask(sample_block.columns()); - - for (size_t i = 0; i < columns_mask.size(); ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample_block.getByPosition(i)); - } - } - - bool has_bytes_on_disk_column = names_set.contains("bytes_on_disk"); + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names); auto state = std::make_shared(StoragesInfoStream(query_info, context)); @@ -307,7 +290,7 @@ Pipe StorageSystemDetachedParts::read( for (size_t i = 0; i < num_streams; ++i) { - auto source = std::make_shared(header.cloneEmpty(), state, columns_mask, max_block_size, has_bytes_on_disk_column); + auto source = std::make_shared(header.cloneEmpty(), state, columns_mask, max_block_size); pipe.addSource(std::move(source)); } diff --git a/src/Storages/System/StorageSystemEvents.cpp b/src/Storages/System/StorageSystemEvents.cpp index b9b07cfe0ac..a914c60abf4 100644 --- a/src/Storages/System/StorageSystemEvents.cpp +++ b/src/Storages/System/StorageSystemEvents.cpp @@ -16,6 +16,13 @@ NamesAndTypesList StorageSystemEvents::getNamesAndTypes() }; } +NamesAndAliases StorageSystemEvents::getNamesAndAliases() +{ + return { + {"name", std::make_shared(), "event"} + }; +} + void StorageSystemEvents::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) diff --git a/src/Storages/System/StorageSystemEvents.h b/src/Storages/System/StorageSystemEvents.h index ea0322c13b8..b2e4bac072a 100644 --- a/src/Storages/System/StorageSystemEvents.h +++ b/src/Storages/System/StorageSystemEvents.h @@ -17,6 +17,8 @@ public: static NamesAndTypesList getNamesAndTypes(); + static NamesAndAliases getNamesAndAliases(); + protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp b/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp deleted file mode 100644 index 3bb92814a2f..00000000000 --- a/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include - -#if USE_ROCKSDB -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -NamesAndTypesList StorageSystemMergeTreeMetadataCache::getNamesAndTypes() -{ - return { - {"key", std::make_shared()}, - {"value", std::make_shared()}, - }; -} - -static bool extractKeyImpl(const IAST & elem, String & res, bool & precise) -{ - const auto * function = elem.as(); - if (!function) - return false; - - if (function->name == "and") - { - for (const auto & child : function->arguments->children) - { - bool tmp_precise = false; - if (extractKeyImpl(*child, res, tmp_precise)) - { - precise = tmp_precise; - return true; - } - } - return false; - } - - if (function->name == "equals" || function->name == "like") - { - const auto & args = function->arguments->as(); - const IAST * value; - - if (args.children.size() != 2) - return false; - - const ASTIdentifier * ident; - if ((ident = args.children.at(0)->as())) - value = args.children.at(1).get(); - else if ((ident = args.children.at(1)->as())) - value = args.children.at(0).get(); - else - return false; - - if (ident->name() != "key") - return false; - - const auto * literal = value->as(); - if (!literal) - return false; - - if (literal->value.getType() != Field::Types::String) - return false; - - res = literal->value.safeGet(); - precise = function->name == "equals"; - return true; - } - return false; -} - - -/// Retrieve from the query a condition of the form `key= 'key'`, from conjunctions in the WHERE clause. -static String extractKey(const ASTPtr & query, bool& precise) -{ - const auto & select = query->as(); - if (!select.where()) - return ""; - - String res; - return extractKeyImpl(*select.where(), res, precise) ? res : ""; -} - - -void StorageSystemMergeTreeMetadataCache::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const -{ - bool precise = false; - String key = extractKey(query_info.query, precise); - if (key.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' " - "or key LIKE 'prefix%' in WHERE clause."); - - auto cache = context->getMergeTreeMetadataCache(); - if (precise) - { - String value; - if (cache->get(key, value) != MergeTreeMetadataCache::Status::OK()) - return; - - size_t col_num = 0; - res_columns[col_num++]->insert(key); - res_columns[col_num++]->insert(value); - } - else - { - String target = extractFixedPrefixFromLikePattern(key, /*requires_perfect_prefix*/ false); - if (target.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' " - "or key LIKE 'prefix%' in WHERE clause."); - - Strings keys; - Strings values; - keys.reserve(4096); - values.reserve(4096); - cache->getByPrefix(target, keys, values); - if (keys.empty()) - return; - - assert(keys.size() == values.size()); - for (size_t i = 0; i < keys.size(); ++i) - { - size_t col_num = 0; - res_columns[col_num++]->insert(keys[i]); - res_columns[col_num++]->insert(values[i]); - } - } -} - -} -#endif diff --git a/src/Storages/System/StorageSystemMergeTreeMetadataCache.h b/src/Storages/System/StorageSystemMergeTreeMetadataCache.h deleted file mode 100644 index 4603583227e..00000000000 --- a/src/Storages/System/StorageSystemMergeTreeMetadataCache.h +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_ROCKSDB -#include - - -namespace DB -{ -class Context; - - -/// Implements `merge_tree_metadata_cache` system table, which allows you to view the metadata cache data in rocksdb for testing purposes. -class StorageSystemMergeTreeMetadataCache : public IStorageSystemOneBlock -{ -public: - std::string getName() const override { return "SystemMergeTreeMetadataCache"; } - - static NamesAndTypesList getNamesAndTypes(); - -protected: - using IStorageSystemOneBlock::IStorageSystemOneBlock; - - void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; -}; - -} -#endif diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.cpp b/src/Storages/System/StorageSystemMergeTreeSettings.cpp index 6de3fb800f4..0ddd4546208 100644 --- a/src/Storages/System/StorageSystemMergeTreeSettings.cpp +++ b/src/Storages/System/StorageSystemMergeTreeSettings.cpp @@ -21,6 +21,7 @@ NamesAndTypesList SystemMergeTreeSettings::getNamesAndTypes() {"max", std::make_shared(std::make_shared())}, {"readonly", std::make_shared()}, {"type", std::make_shared()}, + {"is_obsolete", std::make_shared()}, }; } @@ -52,6 +53,7 @@ void SystemMergeTreeSettings::fillData(MutableColumns & res_columns, res_columns[5]->insert(max); res_columns[6]->insert(writability == SettingConstraintWritability::CONST); res_columns[7]->insert(setting.getTypeName()); + res_columns[8]->insert(setting.isObsolete()); } } diff --git a/src/Storages/System/StorageSystemMetrics.cpp b/src/Storages/System/StorageSystemMetrics.cpp index 6007c8a7c71..ced363fed17 100644 --- a/src/Storages/System/StorageSystemMetrics.cpp +++ b/src/Storages/System/StorageSystemMetrics.cpp @@ -17,6 +17,13 @@ NamesAndTypesList StorageSystemMetrics::getNamesAndTypes() }; } +NamesAndAliases StorageSystemMetrics::getNamesAndAliases() +{ + return { + {"name", std::make_shared(), "metric"} + }; +} + void StorageSystemMetrics::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const { for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) diff --git a/src/Storages/System/StorageSystemMetrics.h b/src/Storages/System/StorageSystemMetrics.h index def95e0a934..e3e2c07014f 100644 --- a/src/Storages/System/StorageSystemMetrics.h +++ b/src/Storages/System/StorageSystemMetrics.h @@ -18,6 +18,8 @@ public: static NamesAndTypesList getNamesAndTypes(); + static NamesAndAliases getNamesAndAliases(); + protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 7399bd789a7..ac38c9c97b1 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -57,6 +57,7 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"bytes_on_disk", std::make_shared()}, {"data_compressed_bytes", std::make_shared()}, {"data_uncompressed_bytes", std::make_shared()}, + {"primary_key_size", std::make_shared()}, {"marks_bytes", std::make_shared()}, {"secondary_indices_compressed_bytes", std::make_shared()}, {"secondary_indices_uncompressed_bytes", std::make_shared()}, @@ -119,7 +120,7 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"has_lightweight_delete", std::make_shared()}, - {"last_removal_attemp_time", std::make_shared()}, + {"last_removal_attempt_time", std::make_shared()}, {"removal_state", std::make_shared()}, } ) @@ -168,6 +169,8 @@ void StorageSystemParts::processNextStorage( columns[res_index++]->insert(columns_size.data_compressed); if (columns_mask[src_index++]) columns[res_index++]->insert(columns_size.data_uncompressed); + if (columns_mask[src_index++]) + columns[res_index++]->insert(part->getIndexSizeFromFile()); if (columns_mask[src_index++]) columns[res_index++]->insert(columns_size.marks); if (columns_mask[src_index++]) @@ -343,7 +346,7 @@ void StorageSystemParts::processNextStorage( if (columns_mask[src_index++]) columns[res_index++]->insert(part->hasLightweightDelete()); if (columns_mask[src_index++]) - columns[res_index++]->insert(static_cast(part->last_removal_attemp_time.load(std::memory_order_relaxed))); + columns[res_index++]->insert(static_cast(part->last_removal_attempt_time.load(std::memory_order_relaxed))); if (columns_mask[src_index++]) columns[res_index++]->insert(getRemovalStateDescription(part->removal_state.load(std::memory_order_relaxed))); diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 0979b9d9371..513af6cfc46 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -254,21 +255,10 @@ Pipe StorageSystemPartsBase::read( StoragesInfoStream stream(query_info, context); /// Create the result. - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample = storage_snapshot->metadata->getSampleBlock(); - Block header; - std::vector columns_mask(sample.columns()); - for (size_t i = 0; i < sample.columns(); ++i) - { - if (names_set.contains(sample.getByPosition(i).name)) - { - columns_mask[i] = 1; - header.insert(sample.getByPosition(i)); - } - } + auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample, column_names); + MutableColumns res_columns = header.cloneEmptyColumns(); if (has_state_column) res_columns.push_back(ColumnString::create()); diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp index 117fb4e8a5c..03757101ddf 100644 --- a/src/Storages/System/StorageSystemQueryCache.cpp +++ b/src/Storages/System/StorageSystemQueryCache.cpp @@ -44,13 +44,13 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr if (!key.is_shared && key.user_name != user_name) continue; - res_columns[0]->insert(key.queryStringFromAst()); /// approximates the original query string + res_columns[0]->insert(key.query_string); /// approximates the original query string res_columns[1]->insert(QueryCache::QueryCacheEntryWeight()(*query_result)); res_columns[2]->insert(key.expires_at < std::chrono::system_clock::now()); res_columns[3]->insert(key.is_shared); res_columns[4]->insert(key.is_compressed); res_columns[5]->insert(std::chrono::system_clock::to_time_t(key.expires_at)); - res_columns[6]->insert(key.ast->getTreeHash().first); + res_columns[6]->insert(key.ast->getTreeHash().low64); } } diff --git a/src/Storages/System/StorageSystemRoleGrants.cpp b/src/Storages/System/StorageSystemRoleGrants.cpp index cf5a24f88cd..8fcd0fd7cf5 100644 --- a/src/Storages/System/StorageSystemRoleGrants.cpp +++ b/src/Storages/System/StorageSystemRoleGrants.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ NamesAndTypesList StorageSystemRoleGrants::getNamesAndTypes() {"user_name", std::make_shared(std::make_shared())}, {"role_name", std::make_shared(std::make_shared())}, {"granted_role_name", std::make_shared()}, + {"granted_role_id", std::make_shared()}, {"granted_role_is_default", std::make_shared()}, {"with_admin_option", std::make_shared()}, }; @@ -45,12 +47,14 @@ void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr auto & column_role_name = assert_cast(assert_cast(*res_columns[column_index]).getNestedColumn()); auto & column_role_name_null_map = assert_cast(*res_columns[column_index++]).getNullMapData(); auto & column_granted_role_name = assert_cast(*res_columns[column_index++]); + auto & column_granted_role_id = assert_cast(*res_columns[column_index++]).getData(); auto & column_is_default = assert_cast(*res_columns[column_index++]).getData(); auto & column_admin_option = assert_cast(*res_columns[column_index++]).getData(); auto add_row = [&](const String & grantee_name, AccessEntityType grantee_type, const String & granted_role_name, + const UUID & granted_role_id, bool is_default, bool with_admin_option) { @@ -72,6 +76,7 @@ void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr assert(false); column_granted_role_name.insertData(granted_role_name.data(), granted_role_name.length()); + column_granted_role_id.push_back(granted_role_id.toUnderType()); column_is_default.push_back(is_default); column_admin_option.push_back(with_admin_option); }; @@ -90,7 +95,7 @@ void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, ContextPtr continue; bool is_default = !default_roles || default_roles->match(role_id); - add_row(grantee_name, grantee_type, *role_name, is_default, element.admin_option); + add_row(grantee_name, grantee_type, *role_name, role_id, is_default, element.admin_option); } } }; diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp index ad52c6896ac..290b575465c 100644 --- a/src/Storages/System/StorageSystemServerSettings.cpp +++ b/src/Storages/System/StorageSystemServerSettings.cpp @@ -15,6 +15,7 @@ NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes() {"changed", std::make_shared()}, {"description", std::make_shared()}, {"type", std::make_shared()}, + {"is_obsolete", std::make_shared()}, }; } @@ -33,6 +34,7 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context res_columns[3]->insert(setting.isValueChanged()); res_columns[4]->insert(setting.getDescription()); res_columns[5]->insert(setting.getTypeName()); + res_columns[6]->insert(setting.isObsolete()); } } diff --git a/src/Storages/System/StorageSystemSettings.cpp b/src/Storages/System/StorageSystemSettings.cpp index c54f7eef25f..dcb54eac0a0 100644 --- a/src/Storages/System/StorageSystemSettings.cpp +++ b/src/Storages/System/StorageSystemSettings.cpp @@ -21,6 +21,7 @@ NamesAndTypesList StorageSystemSettings::getNamesAndTypes() {"type", std::make_shared()}, {"default", std::make_shared()}, {"alias_for", std::make_shared()}, + {"is_obsolete", std::make_shared()}, }; } @@ -51,6 +52,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co res_columns[6]->insert(writability == SettingConstraintWritability::CONST); res_columns[7]->insert(setting.getTypeName()); res_columns[8]->insert(setting.getDefaultValueString()); + res_columns[10]->insert(setting.isObsolete()); }; const auto & settings_to_aliases = Settings::Traits::settingsToAliases(); diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index 887d7f2a5d4..8d703632c68 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -275,15 +275,6 @@ Pipe StorageSystemStackTrace::read( Block sample_block = storage_snapshot->metadata->getSampleBlock(); - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - } - } - bool send_signal = names_set.contains("trace") || names_set.contains("query_id"); bool read_thread_names = names_set.contains("thread_name"); diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index e00d2d95568..715c98ee92a 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -107,6 +108,22 @@ static ColumnPtr getFilteredTables(const ASTPtr & query, const ColumnPtr & filte return block.getByPosition(0).column; } +/// Avoid heavy operation on tables if we only queried columns that we can get without table object. +/// Otherwise it will require table initialization for Lazy database. +static bool needTable(const DatabasePtr & database, const Block & header) +{ + if (database->getEngineName() != "Lazy") + return true; + + static const std::set columns_without_table = { "database", "name", "uuid", "metadata_modification_time" }; + for (const auto & column : header.getColumnsWithTypeAndName()) + { + if (columns_without_table.find(column.name) == columns_without_table.end()) + return true; + } + return false; +} + class TablesBlockSource : public ISource { @@ -265,6 +282,8 @@ protected: if (!tables_it || !tables_it->isValid()) tables_it = database->getTablesIterator(context); + const bool need_table = needTable(database, getPort().getHeader()); + for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next()) { auto table_name = tables_it->name(); @@ -274,23 +293,27 @@ protected: if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) continue; - StoragePtr table = tables_it->table(); - if (!table) - // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) - continue; - + StoragePtr table = nullptr; TableLockHolder lock; - /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) - /// and it's not protected internally by other mutexes - static const size_t DATA_PATHS_INDEX = 5; - if (columns_mask[DATA_PATHS_INDEX]) + if (need_table) { - lock = table->tryLockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); - if (!lock) - // Table was dropped while acquiring the lock, skipping table + table = tables_it->table(); + if (!table) + // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) continue; - } + /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) + /// and it's not protected internally by other mutexes + static const size_t DATA_PATHS_INDEX = 5; + if (columns_mask[DATA_PATHS_INDEX]) + { + lock = table->tryLockForShare(context->getCurrentQueryId(), + context->getSettingsRef().lock_acquire_timeout); + if (!lock) + // Table was dropped while acquiring the lock, skipping table + continue; + } + } ++rows_count; size_t src_index = 0; @@ -307,6 +330,7 @@ protected: if (columns_mask[src_index++]) { + chassert(table != nullptr); res_columns[res_index++]->insert(table->getName()); } @@ -396,7 +420,9 @@ protected: else src_index += 3; - StorageMetadataPtr metadata_snapshot = table->getInMemoryMetadataPtr(); + StorageMetadataPtr metadata_snapshot; + if (table) + metadata_snapshot = table->getInMemoryMetadataPtr(); ASTPtr expression_ptr; if (columns_mask[src_index++]) @@ -433,7 +459,7 @@ protected: if (columns_mask[src_index++]) { - auto policy = table->getStoragePolicy(); + auto policy = table ? table->getStoragePolicy() : nullptr; if (policy) res_columns[res_index++]->insert(policy->getName()); else @@ -444,7 +470,7 @@ protected: settings.select_sequential_consistency = 0; if (columns_mask[src_index++]) { - auto total_rows = table->totalRows(settings); + auto total_rows = table ? table->totalRows(settings) : std::nullopt; if (total_rows) res_columns[res_index++]->insert(*total_rows); else @@ -489,7 +515,7 @@ protected: if (columns_mask[src_index++]) { - auto lifetime_rows = table->lifetimeRows(); + auto lifetime_rows = table ? table->lifetimeRows() : std::nullopt; if (lifetime_rows) res_columns[res_index++]->insert(*lifetime_rows); else @@ -498,7 +524,7 @@ protected: if (columns_mask[src_index++]) { - auto lifetime_bytes = table->lifetimeBytes(); + auto lifetime_bytes = table ? table->lifetimeBytes() : std::nullopt; if (lifetime_bytes) res_columns[res_index++]->insert(*lifetime_bytes); else @@ -587,23 +613,9 @@ Pipe StorageSystemTables::read( const size_t /*num_streams*/) { storage_snapshot->check(column_names); - - /// Create a mask of what columns are needed in the result. - - NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = storage_snapshot->metadata->getSampleBlock(); - Block res_block; - std::vector columns_mask(sample_block.columns()); - for (size_t i = 0, size = columns_mask.size(); i < size; ++i) - { - if (names_set.contains(sample_block.getByPosition(i).name)) - { - columns_mask[i] = 1; - res_block.insert(sample_block.getByPosition(i)); - } - } + auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names); ColumnPtr filtered_databases_column = getFilteredDatabases(query_info, context); ColumnPtr filtered_tables_column = getFilteredTables(query_info.query, filtered_databases_column, context); diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 61a91685324..074a648d235 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -3,14 +3,23 @@ #include #include #include -#include +#include + +#include "config.h" + +/// Embedded SQL definitions +INCBIN(resource_schemata_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/schemata.sql"); +INCBIN(resource_tables_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/tables.sql"); +INCBIN(resource_views_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/views.sql"); +INCBIN(resource_columns_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/columns.sql"); + namespace DB { /// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt -static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name) +static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query) { try { @@ -18,15 +27,13 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE); if (database.getEngineName() != "Memory") return; - bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE; String metadata_resource_name = view_name + ".sql"; - auto attach_query = getResource(metadata_resource_name); - if (attach_query.empty()) + if (query.empty()) return; ParserCreateQuery parser; - ASTPtr ast = parseQuery(parser, attach_query.data(), attach_query.data() + attach_query.size(), + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "Attach query from embedded resource " + metadata_resource_name, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); @@ -34,13 +41,18 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d assert(view_name == ast_create.getTable()); ast_create.attach = false; ast_create.setDatabase(database.getDatabaseName()); - if (is_uppercase) - ast_create.setTable(Poco::toUpper(view_name)); StoragePtr view = createTableFromAST(ast_create, database.getDatabaseName(), database.getTableDataPath(ast_create), context, true).second; - database.createTable(context, ast_create.getTable(), view, ast); + ASTPtr ast_upper = ast_create.clone(); + auto & ast_create_upper = ast_upper->as(); + ast_create_upper.setTable(Poco::toUpper(view_name)); + StoragePtr view_upper = createTableFromAST(ast_create_upper, database.getDatabaseName(), + database.getTableDataPath(ast_create_upper), context, true).second; + + database.createTable(context, ast_create_upper.getTable(), view_upper, ast_upper); + } catch (...) { @@ -50,10 +62,10 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database) { - createInformationSchemaView(context, information_schema_database, "schemata"); - createInformationSchemaView(context, information_schema_database, "tables"); - createInformationSchemaView(context, information_schema_database, "views"); - createInformationSchemaView(context, information_schema_database, "columns"); + createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast(gresource_schemata_sqlData), gresource_schemata_sqlSize)); + createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast(gresource_tables_sqlData), gresource_tables_sqlSize)); + createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast(gresource_views_sqlData), gresource_views_sqlSize)); + createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast(gresource_columns_sqlData), gresource_columns_sqlSize)); } } diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 84965b3196b..f0c67e0f787 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -90,7 +90,6 @@ #if USE_ROCKSDB #include -#include #endif @@ -150,7 +149,6 @@ void attachSystemTablesLocal(ContextPtr context, IDatabase & system_database) #endif #if USE_ROCKSDB attach(context, system_database, "rocksdb"); - attach(context, system_database, "merge_tree_metadata_cache"); #endif } diff --git a/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp b/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp new file mode 100644 index 00000000000..c29ccb590ed --- /dev/null +++ b/src/Storages/System/getQueriedColumnsMaskAndHeader.cpp @@ -0,0 +1,24 @@ +#include + +namespace DB +{ + +std::pair, Block> getQueriedColumnsMaskAndHeader(const Block & sample_block, const Names & column_names) +{ + std::vector columns_mask(sample_block.columns()); + Block header; + + NameSet names_set(column_names.begin(), column_names.end()); + for (size_t i = 0; i < columns_mask.size(); ++i) + { + if (names_set.contains(sample_block.getByPosition(i).name)) + { + columns_mask[i] = 1; + header.insert(sample_block.getByPosition(i)); + } + } + + return std::make_pair(columns_mask, header); +} + +} diff --git a/src/Storages/System/getQueriedColumnsMaskAndHeader.h b/src/Storages/System/getQueriedColumnsMaskAndHeader.h new file mode 100644 index 00000000000..0781a92fa60 --- /dev/null +++ b/src/Storages/System/getQueriedColumnsMaskAndHeader.h @@ -0,0 +1,11 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +std::pair, Block> getQueriedColumnsMaskAndHeader(const Block & sample_block, const Names & column_names); + +} diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 907fc0cd22c..79be1f98a0f 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -63,14 +64,31 @@ bool isValidFunction(const ASTPtr & expression, const std::function & is_constant, ASTs & result) { const auto * function = expression->as(); - if (function && (function->name == "and" || function->name == "indexHint")) + + if (function) { - bool ret = true; - for (const auto & child : function->arguments->children) - ret &= extractFunctions(child, is_constant, result); - return ret; + if (function->name == "and" || function->name == "indexHint") + { + bool ret = true; + for (const auto & child : function->arguments->children) + ret &= extractFunctions(child, is_constant, result); + return ret; + } + else if (function->name == "or") + { + bool ret = true; + ASTs or_args; + for (const auto & child : function->arguments->children) + ret &= extractFunctions(child, is_constant, or_args); + /// We can keep condition only if it still OR condition (i.e. we + /// have dependent conditions for columns at both sides) + if (or_args.size() == 2) + result.push_back(makeASTForLogicalOr(std::move(or_args))); + return ret; + } } - else if (isValidFunction(expression, is_constant)) + + if (isValidFunction(expression, is_constant)) { result.push_back(expression->clone()); return true; @@ -80,13 +98,13 @@ bool extractFunctions(const ASTPtr & expression, const std::functionmakeQueryContext(); cleanup_context->setCurrentQueryId(""); - cleanup_context->getClientInfo().is_replicated_database_internal = true; + cleanup_context->setQueryKindReplicatedDatabaseInternal(); InterpreterAlterQuery interpreter_alter(alter_query, cleanup_context); interpreter_alter.execute(); diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 1ee7d747fcc..74f2709f458 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -130,7 +130,7 @@ public: return true; } - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto * function_node = node->as(); auto * join_node = node->as(); @@ -232,8 +232,8 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, ContextMutablePtr & mutable_context, size_t subquery_depth) { - auto subquery_hash = subquery_node->getTreeHash(); - String temporary_table_name = fmt::format("_data_{}_{}", subquery_hash.first, subquery_hash.second); + const auto subquery_hash = subquery_node->getTreeHash(); + const auto temporary_table_name = fmt::format("_data_{}", toString(subquery_hash)); const auto & external_tables = mutable_context->getExternalTables(); auto external_table_it = external_tables.find(temporary_table_name); diff --git a/src/Storages/fuzzers/mergetree_checksum_fuzzer.cpp b/src/Storages/fuzzers/mergetree_checksum_fuzzer.cpp index 9a5a68f09cc..0f2ce8a2e44 100644 --- a/src/Storages/fuzzers/mergetree_checksum_fuzzer.cpp +++ b/src/Storages/fuzzers/mergetree_checksum_fuzzer.cpp @@ -1,4 +1,3 @@ -#include #include #include diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index e5fc01be9f4..ebd02f424fa 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -39,7 +38,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( if (shard_info.isLocal()) { TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_func_ptr, context); - return table_function_ptr->getActualTableStructure(context); + return table_function_ptr->getActualTableStructure(context, /*is_insert_query*/ true); } auto table_func_name = queryToString(table_func_ptr); @@ -58,7 +57,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( } ColumnsDescription res; - auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), table_id); + auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), table_id); /// Ignore limit for result number of rows (that could be set during handling CSE/CTE), /// since this is a service query and should not lead to query failure. @@ -177,7 +176,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( const auto & shards_info = cluster.getShardsInfo(); auto query = "DESC TABLE " + remote_table_id.getFullTableName(); - auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), remote_table_id); + auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), remote_table_id); new_context->setSetting("describe_extend_object_types", true); /// Expect only needed columns from the result of DESC TABLE. diff --git a/src/Storages/prepareReadingFromFormat.cpp b/src/Storages/prepareReadingFromFormat.cpp new file mode 100644 index 00000000000..6be4213ec6b --- /dev/null +++ b/src/Storages/prepareReadingFromFormat.cpp @@ -0,0 +1,78 @@ +#include +#include + +namespace DB +{ + +ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals) +{ + ReadFromFormatInfo info; + /// Collect requested virtual columns and remove them from requested columns. + Strings columns_to_read; + for (const auto & column_name : requested_columns) + { + bool is_virtual = false; + for (const auto & virtual_column : virtuals) + { + if (column_name == virtual_column.name) + { + info.requested_virtual_columns.push_back(virtual_column); + is_virtual = true; + break; + } + } + + if (!is_virtual) + columns_to_read.push_back(column_name); + } + + /// Create header for Source that will contain all requested columns including virtual columns at the end + /// (because they will be added to the chunk after reading regular columns). + info.source_header = storage_snapshot->getSampleBlockForColumns(columns_to_read); + for (const auto & requested_virtual_column : info.requested_virtual_columns) + info.source_header.insert({requested_virtual_column.type->createColumn(), requested_virtual_column.type, requested_virtual_column.name}); + + /// Set requested columns that should be read from data. + info.requested_columns = storage_snapshot->getColumnsByNames(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns(), columns_to_read); + + if (supports_subset_of_columns) + { + /// If only virtual columns were requested, just read the smallest column. + if (columns_to_read.empty()) + { + columns_to_read.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); + } + /// We need to replace all subcolumns with their nested columns (e.g `a.b`, `a.b.c`, `x.y` -> `a`, `x`), + /// because most formats cannot extract subcolumns on their own. + /// All requested subcolumns will be extracted after reading. + else + { + std::unordered_set columns_to_read_set; + /// Save original order of columns. + std::vector new_columns_to_read; + for (const auto & column_to_read : info.requested_columns) + { + auto name = column_to_read.getNameInStorage(); + if (!columns_to_read_set.contains(name)) + { + columns_to_read_set.insert(name); + new_columns_to_read.push_back(name); + } + } + columns_to_read = std::move(new_columns_to_read); + } + info.columns_description = storage_snapshot->getDescriptionForColumns(columns_to_read); + } + /// If format doesn't support reading subset of columns, read all columns. + /// Requested columns/subcolumns will be extracted after reading. + else + { + info.columns_description = storage_snapshot->metadata->getColumns(); + } + + /// Create header for InputFormat with columns that will be read from the data. + info.format_header = storage_snapshot->getSampleBlockForColumns(info.columns_description.getNamesOfPhysical()); + return info; +} + +} diff --git a/src/Storages/prepareReadingFromFormat.h b/src/Storages/prepareReadingFromFormat.h new file mode 100644 index 00000000000..c5f3959a550 --- /dev/null +++ b/src/Storages/prepareReadingFromFormat.h @@ -0,0 +1,26 @@ +#pragma once +#include +#include + +namespace DB +{ + struct ReadFromFormatInfo + { + /// Header that will return Source from storage. + /// It contains all requested columns including virtual columns; + Block source_header; + /// Header that will be passed to IInputFormat to read data from file. + /// It can contain more columns than were requested if format doesn't support + /// reading subset of columns. + Block format_header; + /// Description of columns for format_header. Used for inserting defaults. + ColumnsDescription columns_description; + /// The list of requested columns without virtual columns. + NamesAndTypesList requested_columns; + /// The list of requested virtual columns. + NamesAndTypesList requested_virtual_columns; + }; + + /// Get all needed information for reading from data in some input format. + ReadFromFormatInfo prepareReadingFromFormat(const Strings & requested_columns, const StorageSnapshotPtr & storage_snapshot, bool supports_subset_of_columns, const NamesAndTypesList & virtuals); +} diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index a4f1d963704..e5b1c8e8744 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -34,6 +34,8 @@ void registerStorageS3(StorageFactory & factory); void registerStorageCOS(StorageFactory & factory); void registerStorageOSS(StorageFactory & factory); void registerStorageHudi(StorageFactory & factory); +void registerStorageS3Queue(StorageFactory & factory); + #if USE_PARQUET void registerStorageDeltaLake(StorageFactory & factory); #endif @@ -133,6 +135,7 @@ void registerStorages() registerStorageCOS(factory); registerStorageOSS(factory); registerStorageHudi(factory); + registerStorageS3Queue(factory); #if USE_PARQUET registerStorageDeltaLake(factory); diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 5c1442ece11..749a154c19d 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -127,7 +127,8 @@ static void checkOld( std::string transformed_query = transformQueryForExternalDatabase( query_info, query_info.syntax_analyzer_result->requiredSourceColumns(), - state.getColumns(0), IdentifierQuotingStyle::DoubleQuotes, "test", "table", state.context); + state.getColumns(0), IdentifierQuotingStyle::DoubleQuotes, + LiteralEscapingStyle::Regular, "test", "table", state.context); EXPECT_EQ(transformed_query, expected) << query; } @@ -180,7 +181,8 @@ static void checkNewAnalyzer( query_info.table_expression = findTableExpression(query_node->getJoinTree(), "table"); std::string transformed_query = transformQueryForExternalDatabase( - query_info, column_names, state.getColumns(0), IdentifierQuotingStyle::DoubleQuotes, "test", "table", state.context); + query_info, column_names, state.getColumns(0), IdentifierQuotingStyle::DoubleQuotes, + LiteralEscapingStyle::Regular, "test", "table", state.context); EXPECT_EQ(transformed_query, expected) << query; } diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 548b55749d7..84a696a1e9c 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -258,6 +258,7 @@ String transformQueryForExternalDatabaseImpl( Names used_columns, const NamesAndTypesList & available_columns, IdentifierQuotingStyle identifier_quoting_style, + LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, ContextPtr context) @@ -334,9 +335,11 @@ String transformQueryForExternalDatabaseImpl( dropAliases(select_ptr); WriteBufferFromOwnString out; - IAST::FormatSettings settings(out, true); - settings.identifier_quoting_style = identifier_quoting_style; - settings.always_quote_identifiers = identifier_quoting_style != IdentifierQuotingStyle::None; + IAST::FormatSettings settings( + out, /*one_line*/ true, /*hilite*/ false, + /*always_quote_identifiers*/ identifier_quoting_style != IdentifierQuotingStyle::None, + /*identifier_quoting_style*/ identifier_quoting_style, /*show_secrets_*/ true, + /*literal_escaping_style*/ literal_escaping_style); select->format(settings); @@ -350,6 +353,7 @@ String transformQueryForExternalDatabase( const Names & column_names, const NamesAndTypesList & available_columns, IdentifierQuotingStyle identifier_quoting_style, + LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, ContextPtr context) @@ -374,6 +378,7 @@ String transformQueryForExternalDatabase( column_names, available_columns, identifier_quoting_style, + literal_escaping_style, database, table, context); @@ -385,6 +390,7 @@ String transformQueryForExternalDatabase( query_info.syntax_analyzer_result->requiredSourceColumns(), available_columns, identifier_quoting_style, + literal_escaping_style, database, table, context); diff --git a/src/Storages/transformQueryForExternalDatabase.h b/src/Storages/transformQueryForExternalDatabase.h index 0f2b0a5822f..fb6af21907e 100644 --- a/src/Storages/transformQueryForExternalDatabase.h +++ b/src/Storages/transformQueryForExternalDatabase.h @@ -31,6 +31,7 @@ String transformQueryForExternalDatabase( const Names & column_names, const NamesAndTypesList & available_columns, IdentifierQuotingStyle identifier_quoting_style, + LiteralEscapingStyle literal_escaping_style, const String & database, const String & table, ContextPtr context); diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp index fb7635181dc..ebebee13092 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.cpp +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -49,13 +49,14 @@ namespace DB actual_columns = parseColumnsListFromString(table_structure, context_); } - ColumnsDescription TableFunctionHive::getActualTableStructure(ContextPtr /*context_*/) const { return actual_columns; } + ColumnsDescription TableFunctionHive::getActualTableStructure(ContextPtr /*context_*/, bool /*is_insert_query*/) const { return actual_columns; } StoragePtr TableFunctionHive::executeImpl( const ASTPtr & /*ast_function_*/, ContextPtr context_, const std::string & table_name_, - ColumnsDescription /*cached_columns_*/) const + ColumnsDescription /*cached_columns_*/, + bool /*is_insert_query*/) const { const Settings & settings = context_->getSettings(); ParserExpression partition_by_parser; diff --git a/src/TableFunctions/Hive/TableFunctionHive.h b/src/TableFunctions/Hive/TableFunctionHive.h index ec09a87a876..5e48be46ce1 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.h +++ b/src/TableFunctions/Hive/TableFunctionHive.h @@ -17,10 +17,10 @@ public: bool hasStaticStructure() const override { return true; } StoragePtr executeImpl( - const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return storage_type_name; } - ColumnsDescription getActualTableStructure(ContextPtr) const override; + ColumnsDescription getActualTableStructure(ContextPtr, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function_, ContextPtr context_) override; private: diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index df19e0ebad3..137e1dc27fe 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -34,15 +34,15 @@ StoragePtr ITableFunction::execute(const ASTPtr & ast_function, ContextPtr conte auto context_to_use = use_global_context ? context->getGlobalContext() : context; if (cached_columns.empty()) - return executeImpl(ast_function, context, table_name, std::move(cached_columns)); + return executeImpl(ast_function, context, table_name, std::move(cached_columns), is_insert_query); - if (hasStaticStructure() && cached_columns == getActualTableStructure(context)) - return executeImpl(ast_function, context_to_use, table_name, std::move(cached_columns)); + if (hasStaticStructure() && cached_columns == getActualTableStructure(context,is_insert_query)) + return executeImpl(ast_function, context_to_use, table_name, std::move(cached_columns), is_insert_query); auto this_table_function = shared_from_this(); auto get_storage = [=]() -> StoragePtr { - return this_table_function->executeImpl(ast_function, context_to_use, table_name, cached_columns); + return this_table_function->executeImpl(ast_function, context_to_use, table_name, cached_columns, is_insert_query); }; /// It will request actual table structure and create underlying storage lazily diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index fe71005cb9c..028bbb99c15 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -58,7 +58,7 @@ public: virtual void parseArguments(const ASTPtr & /*ast_function*/, ContextPtr /*context*/) {} /// Returns actual table structure probably requested from remote server, may fail - virtual ColumnsDescription getActualTableStructure(ContextPtr /*context*/) const = 0; + virtual ColumnsDescription getActualTableStructure(ContextPtr /*context*/, bool is_insert_query) const = 0; /// Check if table function needs a structure hint from SELECT query in case of /// INSERT INTO FUNCTION ... SELECT ... and INSERT INTO ... SELECT ... FROM table_function(...) @@ -89,7 +89,7 @@ protected: private: virtual StoragePtr executeImpl( - const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const = 0; + const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const = 0; virtual const char * getStorageTypeName() const = 0; }; diff --git a/src/TableFunctions/ITableFunctionCluster.h b/src/TableFunctions/ITableFunctionCluster.h index a8329684ee6..7e81d6d21b7 100644 --- a/src/TableFunctions/ITableFunctionCluster.h +++ b/src/TableFunctions/ITableFunctionCluster.h @@ -17,7 +17,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_GET; + extern const int CLUSTER_DOESNT_EXIST; extern const int LOGICAL_ERROR; } @@ -59,7 +59,7 @@ protected: cluster_name = checkAndGetLiteralArgument(args[0], "cluster_name"); if (!context->tryGetCluster(cluster_name)) - throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name); + throw Exception(ErrorCodes::CLUSTER_DOESNT_EXIST, "Requested cluster '{}' not found", cluster_name); /// Just cut the first arg (cluster_name) and try to parse other table function arguments as is args.erase(args.begin()); diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index f87838cfb56..6d50e9138ff 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -26,7 +26,8 @@ protected: const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, - ColumnsDescription /*cached_columns*/) const override + ColumnsDescription /*cached_columns*/, + bool /*is_insert_query*/) const override { ColumnsDescription columns; if (TableFunction::configuration.structure != "auto") @@ -42,7 +43,7 @@ protected: const char * getStorageTypeName() const override { return Storage::name; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override + ColumnsDescription getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const override { if (TableFunction::configuration.structure == "auto") { diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index a60ab70d570..487826dc363 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -110,7 +110,7 @@ void ITableFunctionFileLike::addColumnsStructureToArguments(ASTs & args, const S } } -StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { ColumnsDescription columns; if (structure != "auto") diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index 8300cc27591..902c7b01c4d 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -42,13 +42,14 @@ protected: virtual String getFormatFromFirstArgument(); String filename; + String path_to_archive; String format = "auto"; String structure = "auto"; String compression_method = "auto"; ColumnsDescription structure_hint; private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; virtual StoragePtr getStorage( const String & source, const String & format, const ColumnsDescription & columns, ContextPtr global_context, diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp index 1fb0f392e33..59702259b35 100644 --- a/src/TableFunctions/ITableFunctionXDBC.cpp +++ b/src/TableFunctions/ITableFunctionXDBC.cpp @@ -61,7 +61,7 @@ void ITableFunctionXDBC::startBridgeIfNot(ContextPtr context) const } } -ColumnsDescription ITableFunctionXDBC::getActualTableStructure(ContextPtr context) const +ColumnsDescription ITableFunctionXDBC::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { startBridgeIfNot(context); @@ -92,10 +92,10 @@ ColumnsDescription ITableFunctionXDBC::getActualTableStructure(ContextPtr contex return ColumnsDescription{columns}; } -StoragePtr ITableFunctionXDBC::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr ITableFunctionXDBC::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { startBridgeIfNot(context); - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); auto result = std::make_shared( StorageID(getDatabaseName(), table_name), schema_name, remote_table_name, columns, ConstraintsDescription{}, String{}, context, helper); result->startup(); diff --git a/src/TableFunctions/ITableFunctionXDBC.h b/src/TableFunctions/ITableFunctionXDBC.h index 984a6a1957f..da0fa83033b 100644 --- a/src/TableFunctions/ITableFunctionXDBC.h +++ b/src/TableFunctions/ITableFunctionXDBC.h @@ -16,7 +16,7 @@ namespace DB class ITableFunctionXDBC : public ITableFunction { private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; /* A factory method to create bridge helper, that will assist in remote interaction */ virtual BridgeHelperPtr createBridgeHelper(ContextPtr context, @@ -24,7 +24,7 @@ private: const std::string & connection_string_, bool use_connection_pooling_) const = 0; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index d2a96173491..a4ece2f6a07 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -39,15 +39,13 @@ namespace bool isConnectionString(const std::string & candidate) { - return candidate.starts_with("DefaultEndpointsProtocol"); + return !candidate.starts_with("http"); } } -StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context, bool get_format_from_file) +void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context) { - StorageAzureBlob::Configuration configuration; - /// Supported signatures: /// /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) @@ -59,87 +57,80 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp configuration.blobs_paths = {configuration.blob_path}; - if (configuration.format == "auto" && get_format_from_file) + if (configuration.format == "auto") configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); - - return configuration; } - - if (engine_args.size() < 3 || engine_args.size() > 8) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage Azure requires 3 to 7 arguments: " - "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - std::unordered_map engine_args_to_idx; - - configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); - configuration.is_connection_string = isConnectionString(configuration.connection_url); - - configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); - configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); - - auto is_format_arg = [] (const std::string & s) -> bool + else { - return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); - }; + if (engine_args.size() < 3 || engine_args.size() > 8) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Azure requires 3 to 7 arguments: " + "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); - if (engine_args.size() == 4) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name/structure"); - if (is_format_arg(fourth_arg)) + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); + + std::unordered_map engine_args_to_idx; + + configuration.connection_url = checkAndGetLiteralArgument(engine_args[0], "connection_string/storage_account_url"); + configuration.is_connection_string = isConnectionString(configuration.connection_url); + + configuration.container = checkAndGetLiteralArgument(engine_args[1], "container"); + configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); + + auto is_format_arg + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; + + if (engine_args.size() == 4) { - configuration.format = fourth_arg; + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name/structure"); + if (is_format_arg(fourth_arg)) + { + configuration.format = fourth_arg; + } + else + { + configuration.structure = fourth_arg; + } } - else + else if (engine_args.size() == 5) { - configuration.structure = fourth_arg; + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + configuration.format = fourth_arg; + configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + } + else + { + configuration.account_name = fourth_arg; + configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + } } - } - else if (engine_args.size() == 5) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) + else if (engine_args.size() == 6) { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); + if (is_format_arg(fourth_arg)) + { + configuration.format = fourth_arg; + configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); + configuration.structure = checkAndGetLiteralArgument(engine_args[5], "structure"); + } + else + { + configuration.account_name = fourth_arg; + configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); + auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name/structure"); + if (is_format_arg(sixth_arg)) + configuration.format = sixth_arg; + else + configuration.structure = sixth_arg; + } } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - } - } - else if (engine_args.size() == 6) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - configuration.format = fourth_arg; - configuration.compression_method = checkAndGetLiteralArgument(engine_args[4], "compression"); - configuration.structure = checkAndGetLiteralArgument(engine_args[5], "structure"); - } - else - { - configuration.account_name = fourth_arg; - configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); - auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); - if (!is_format_arg(sixth_arg)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg); - configuration.format = sixth_arg; - } - } - else if (engine_args.size() == 7) - { - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format, compression and structure must be last arguments"); - } - else + else if (engine_args.size() == 7) { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); configuration.account_name = fourth_arg; configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); @@ -148,17 +139,9 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp configuration.format = sixth_arg; configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); } - } - else if (engine_args.size() == 8) - { - - auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (is_format_arg(fourth_arg)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); - } - else + else if (engine_args.size() == 8) { + auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); configuration.account_name = fourth_arg; configuration.account_key = checkAndGetLiteralArgument(engine_args[4], "account_key"); auto sixth_arg = checkAndGetLiteralArgument(engine_args[5], "format/account_name"); @@ -168,14 +151,12 @@ StorageAzureBlob::Configuration TableFunctionAzureBlobStorage::parseArgumentsImp configuration.compression_method = checkAndGetLiteralArgument(engine_args[6], "compression"); configuration.structure = checkAndGetLiteralArgument(engine_args[7], "structure"); } + + configuration.blobs_paths = {configuration.blob_path}; + + if (configuration.format == "auto") + configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); } - - configuration.blobs_paths = {configuration.blob_path}; - - if (configuration.format == "auto" && get_format_from_file) - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true); - - return configuration; } void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context) @@ -190,19 +171,99 @@ void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function, auto & args = args_func.at(0)->children; - configuration = parseArgumentsImpl(args, context); + parseArgumentsImpl(args, context); } -ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(ContextPtr context) const +void TableFunctionAzureBlobStorage::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context) +{ + if (tryGetNamedCollectionWithOverrides(args, context)) + { + /// In case of named collection, just add key-value pair "structure='...'" + /// at the end of arguments to override existed structure. + ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; + auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); + args.push_back(equal_func); + } + else + { + if (args.size() < 3 || args.size() > 8) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Azure requires 3 to 7 arguments: " + "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])"); + + auto structure_literal = std::make_shared(structure); + + auto is_format_arg + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); }; + + + if (args.size() == 3) + { + /// Add format=auto & compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + else if (args.size() == 4) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name/structure"); + if (is_format_arg(fourth_arg)) + { + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + else + { + args.back() = structure_literal; + } + } + else if (args.size() == 5) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + if (!is_format_arg(fourth_arg)) + { + /// Add format=auto & compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(std::make_shared("auto")); + } + args.push_back(structure_literal); + } + else if (args.size() == 6) + { + auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/account_name"); + if (!is_format_arg(fourth_arg)) + { + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + else + { + args.back() = structure_literal; + } + } + else if (args.size() == 7) + { + args.push_back(structure_literal); + } + else if (args.size() == 8) + { + args.back() = structure_literal; + } + } +} + +ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(ContextPtr context, bool is_insert_query) const { if (configuration.structure == "auto") { context->checkAccess(getSourceAccessType()); - auto client = StorageAzureBlob::createClient(configuration); + auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); auto settings = StorageAzureBlob::createSettings(context); auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings)); - return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context); + return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context, false); } return parseColumnsListFromString(configuration.structure, context); @@ -213,9 +274,9 @@ bool TableFunctionAzureBlobStorage::supportsReadingSubsetOfColumns() return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); } -StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto client = StorageAzureBlob::createClient(configuration); + auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); auto settings = StorageAzureBlob::createSettings(context); ColumnsDescription columns; @@ -234,6 +295,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct String{}, /// No format_settings for table function Azure std::nullopt, + /* distributed_processing */ false, nullptr); storage->startup(); diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.h b/src/TableFunctions/TableFunctionAzureBlobStorage.h index 0ac3f9771c7..83c4449fd17 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.h +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.h @@ -13,13 +13,23 @@ namespace DB class Context; -/* AzureBlob(source, [access_key_id, secret_access_key,] [format, structure, compression]) - creates a temporary storage for a file in AzureBlob. +/* AzureBlob(source, [access_key_id, secret_access_key,] [format, compression, structure]) - creates a temporary storage for a file in AzureBlob. */ class TableFunctionAzureBlobStorage : public ITableFunction { public: static constexpr auto name = "azureBlobStorage"; - static constexpr auto signature = "- connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]\n"; + + static constexpr auto signature = " - connection_string, container_name, blobpath\n" + " - connection_string, container_name, blobpath, structure \n" + " - connection_string, container_name, blobpath, format \n" + " - connection_string, container_name, blobpath, format, compression \n" + " - connection_string, container_name, blobpath, format, compression, structure \n" + " - storage_account_url, container_name, blobpath, account_name, account_key\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n" + " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n"; static size_t getMaxNumberOfArguments() { return 8; } @@ -46,7 +56,9 @@ public: return {"_path", "_file"}; } - static StorageAzureBlob::Configuration parseArgumentsImpl(ASTs & args, const ContextPtr & context, bool get_format_from_file = true); + virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); + + static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context); protected: @@ -54,11 +66,12 @@ protected: const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, - ColumnsDescription cached_columns) const override; + ColumnsDescription cached_columns, + bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Azure"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; mutable StorageAzureBlob::Configuration configuration; diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp new file mode 100644 index 00000000000..eee585967c2 --- /dev/null +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp @@ -0,0 +1,85 @@ +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include + +#include "registerTableFunctions.h" + +#include + + +namespace DB +{ + +StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( + const ASTPtr & /*function*/, ContextPtr context, + const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const +{ + StoragePtr storage; + ColumnsDescription columns; + bool structure_argument_was_provided = configuration.structure != "auto"; + + if (structure_argument_was_provided) + { + columns = parseColumnsListFromString(configuration.structure, context); + } + else if (!structure_hint.empty()) + { + columns = structure_hint; + } + + auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); + auto settings = StorageAzureBlob::createSettings(context); + + if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) + { + /// On worker node this filename won't contains globs + storage = std::make_shared( + configuration, + std::make_unique(table_name, std::move(client), std::move(settings)), + context, + StorageID(getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + /* comment */String{}, + /* format_settings */std::nullopt, /// No format_settings + /* distributed_processing */ true, + /*partition_by_=*/nullptr); + } + else + { + storage = std::make_shared( + cluster_name, + configuration, + std::make_unique(table_name, std::move(client), std::move(settings)), + StorageID(getDatabaseName(), table_name), + columns, + ConstraintsDescription{}, + context, + structure_argument_was_provided); + } + + storage->startup(); + + return storage; +} + + +void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory) +{ + factory.registerFunction( + {.documentation + = {.description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)", + .examples{{"azureBlobStorageCluster", "SELECT * FROM azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}}, + .allow_readonly = false} + ); +} + + +} + +#endif diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h new file mode 100644 index 00000000000..58f79328f63 --- /dev/null +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h @@ -0,0 +1,55 @@ +#pragma once + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include +#include +#include + + +namespace DB +{ + +class Context; + +/** + * azureBlobStorageCluster(cluster_name, source, [access_key_id, secret_access_key,] format, compression_method, structure) + * A table function, which allows to process many files from Azure Blob Storage on a specific cluster + * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks + * in Azure Blob Storage file path and dispatch each file dynamically. + * On worker node it asks initiator about next task to process, processes it. + * This is repeated until the tasks are finished. + */ +class TableFunctionAzureBlobStorageCluster : public ITableFunctionCluster +{ +public: + static constexpr auto name = "azureBlobStorageCluster"; + static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]"; + + String getName() const override + { + return name; + } + + String getSignature() const override + { + return signature; + } + +protected: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; + + const char * getStorageTypeName() const override { return "AzureBlobStorageCluster"; } +}; + +} + +#endif diff --git a/src/TableFunctions/TableFunctionDictionary.cpp b/src/TableFunctions/TableFunctionDictionary.cpp index 90db9550a72..f0060acb411 100644 --- a/src/TableFunctions/TableFunctionDictionary.cpp +++ b/src/TableFunctions/TableFunctionDictionary.cpp @@ -43,7 +43,7 @@ void TableFunctionDictionary::parseArguments(const ASTPtr & ast_function, Contex dictionary_name = checkAndGetLiteralArgument(args[0], "dictionary_name"); } -ColumnsDescription TableFunctionDictionary::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionDictionary::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { const ExternalDictionariesLoader & external_loader = context->getExternalDictionariesLoader(); std::string resolved_name = external_loader.resolveDictionaryName(dictionary_name, context->getCurrentDatabase()); @@ -76,10 +76,10 @@ ColumnsDescription TableFunctionDictionary::getActualTableStructure(ContextPtr c } StoragePtr TableFunctionDictionary::executeImpl( - const ASTPtr &, ContextPtr context, const std::string & table_name, ColumnsDescription) const + const ASTPtr &, ContextPtr context, const std::string & table_name, ColumnsDescription, bool is_insert_query) const { StorageID dict_id(getDatabaseName(), table_name); - auto dictionary_table_structure = getActualTableStructure(context); + auto dictionary_table_structure = getActualTableStructure(context, is_insert_query); auto result = std::make_shared( dict_id, dictionary_name, std::move(dictionary_table_structure), String{}, StorageDictionary::Location::Custom, context); diff --git a/src/TableFunctions/TableFunctionDictionary.h b/src/TableFunctions/TableFunctionDictionary.h index cc184a32a17..d0beb292fe1 100644 --- a/src/TableFunctions/TableFunctionDictionary.h +++ b/src/TableFunctions/TableFunctionDictionary.h @@ -18,9 +18,9 @@ public: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Dictionary"; } diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index 66e7ce7592c..5a64a988156 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -120,12 +120,12 @@ void TableFunctionExecutable::parseArguments(const ASTPtr & ast_function, Contex } } -ColumnsDescription TableFunctionExecutable::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionExecutable::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return parseColumnsListFromString(structure, context); } -StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { auto storage_id = StorageID(getDatabaseName(), table_name); auto global_context = context->getGlobalContext(); @@ -135,7 +135,7 @@ StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, if (settings_query != nullptr) settings.applyChanges(settings_query->as()->changes); - auto storage = std::make_shared(storage_id, format, settings, input_queries, getActualTableStructure(context), ConstraintsDescription{}); + auto storage = std::make_shared(storage_id, format, settings, input_queries, getActualTableStructure(context, is_insert_query), ConstraintsDescription{}); storage->startup(); return storage; } diff --git a/src/TableFunctions/TableFunctionExecutable.h b/src/TableFunctions/TableFunctionExecutable.h index 2d9f86e14e6..aa595312fe4 100644 --- a/src/TableFunctions/TableFunctionExecutable.h +++ b/src/TableFunctions/TableFunctionExecutable.h @@ -24,11 +24,11 @@ public: bool hasStaticStructure() const override { return true; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Executable"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index aae93c8b596..f127979d92a 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -91,7 +91,7 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt query = std::move(explain_query); } -ColumnsDescription TableFunctionExplain::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionExplain::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { Block sample_block = getInterpreter(context).getSampleBlock(query->as()->getKind()); ColumnsDescription columns_description; @@ -123,7 +123,7 @@ static Block executeMonoBlock(QueryPipeline & pipeline) } StoragePtr TableFunctionExplain::executeImpl( - const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const + const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { /// To support settings inside explain subquery. auto mutable_context = Context::createCopy(context); @@ -132,7 +132,7 @@ StoragePtr TableFunctionExplain::executeImpl( Block block = executeMonoBlock(blockio.pipeline); StorageID storage_id(getDatabaseName(), table_name); - auto storage = std::make_shared(storage_id, getActualTableStructure(context), std::move(block)); + auto storage = std::make_shared(storage_id, getActualTableStructure(context, is_insert_query), std::move(block)); storage->startup(); return storage; } diff --git a/src/TableFunctions/TableFunctionExplain.h b/src/TableFunctions/TableFunctionExplain.h index 99d3e52ee68..2eb7e35d0b5 100644 --- a/src/TableFunctions/TableFunctionExplain.h +++ b/src/TableFunctions/TableFunctionExplain.h @@ -17,7 +17,7 @@ public: std::string getName() const override { return name; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Explain"; } @@ -25,7 +25,7 @@ private: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; InterpreterExplainQuery getInterpreter(ContextPtr context) const; diff --git a/src/TableFunctions/TableFunctionFactory.cpp b/src/TableFunctions/TableFunctionFactory.cpp index 76108f1cdd4..ce3daff0785 100644 --- a/src/TableFunctions/TableFunctionFactory.cpp +++ b/src/TableFunctions/TableFunctionFactory.cpp @@ -41,7 +41,7 @@ TableFunctionPtr TableFunctionFactory::get( { auto hints = getHints(table_function->name); if (!hints.empty()) - throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function {}. Maybe you meant: {}", table_function->name , toString(hints)); + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function {}. Maybe you meant: {}", table_function->name, toString(hints)); else throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function {}", table_function->name); } diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 0e49f26db40..56a6839ddbb 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -25,6 +25,7 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr if (context->getApplicationType() != Context::ApplicationType::LOCAL) { ITableFunctionFileLike::parseFirstArguments(arg, context); + StorageFile::parseFileSource(std::move(filename), filename, path_to_archive); return; } @@ -39,6 +40,8 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr fd = STDOUT_FILENO; else if (filename == "stderr") fd = STDERR_FILENO; + else + StorageFile::parseFileSource(std::move(filename), filename, path_to_archive); } else if (type == Field::Types::Int64 || type == Field::Types::UInt64) { @@ -76,22 +79,31 @@ StoragePtr TableFunctionFile::getStorage(const String & source, ConstraintsDescription{}, String{}, global_context->getSettingsRef().rename_files_after_processing, + path_to_archive, }; + if (fd >= 0) return std::make_shared(fd, args); return std::make_shared(source, global_context->getUserFilesPath(), args); } -ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { if (fd >= 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Schema inference is not supported for table function '{}' with file descriptor", getName()); size_t total_bytes_to_read = 0; - Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); - return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context); + + Strings paths; + Strings paths_to_archives; + if (path_to_archive.empty()) + paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); + else + paths_to_archives = StorageFile::getPathsList(path_to_archive, context->getUserFilesPath(), context, total_bytes_to_read); + + return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context, paths_to_archives); } diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h index 797948cad03..439ae87b4ae 100644 --- a/src/TableFunctions/TableFunctionFile.h +++ b/src/TableFunctions/TableFunctionFile.h @@ -20,7 +20,7 @@ public: return name; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override { diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp index f5aff4bd098..eb7437a3943 100644 --- a/src/TableFunctions/TableFunctionFormat.cpp +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -51,7 +52,7 @@ void TableFunctionFormat::parseArguments(const ASTPtr & ast_function, ContextPtr structure = checkAndGetLiteralArgument(args[1], "structure"); } -ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { @@ -72,7 +73,17 @@ Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr cont auto read_buf = std::make_unique(data); auto input_format = context->getInputFormat(format, *read_buf, block, context->getSettingsRef().max_block_size); - auto pipeline = std::make_unique(input_format); + QueryPipelineBuilder builder; + builder.init(Pipe(input_format)); + if (columns.hasDefaults()) + { + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, columns, *input_format, context); + }); + } + + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto reader = std::make_unique(*pipeline); std::vector blocks; @@ -87,9 +98,9 @@ Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr cont return concatenateBlocks(blocks); } -StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); Block res_block = parseData(columns, context); auto res = std::make_shared(StorageID(getDatabaseName(), table_name), columns, res_block); res->startup(); diff --git a/src/TableFunctions/TableFunctionFormat.h b/src/TableFunctions/TableFunctionFormat.h index d64ab14cb64..e20e8b6ea4b 100644 --- a/src/TableFunctions/TableFunctionFormat.h +++ b/src/TableFunctions/TableFunctionFormat.h @@ -18,10 +18,10 @@ public: bool hasStaticStructure() const override { return false; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Values"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; Block parseData(ColumnsDescription columns, ContextPtr context) const; diff --git a/src/TableFunctions/TableFunctionGenerateRandom.cpp b/src/TableFunctions/TableFunctionGenerateRandom.cpp index 08059796660..c6a9154cc66 100644 --- a/src/TableFunctions/TableFunctionGenerateRandom.cpp +++ b/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -97,7 +97,7 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co } } -ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { @@ -113,9 +113,9 @@ ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextP return parseColumnsListFromString(structure, context); } -StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionGenerateRandom::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - ColumnsDescription columns = getActualTableStructure(context); + ColumnsDescription columns = getActualTableStructure(context, is_insert_query); auto res = std::make_shared( StorageID(getDatabaseName(), table_name), columns, String{}, max_array_length, max_string_length, random_seed); res->startup(); diff --git a/src/TableFunctions/TableFunctionGenerateRandom.h b/src/TableFunctions/TableFunctionGenerateRandom.h index 584d65311f4..a5d11ce0af6 100644 --- a/src/TableFunctions/TableFunctionGenerateRandom.h +++ b/src/TableFunctions/TableFunctionGenerateRandom.h @@ -19,10 +19,10 @@ public: void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "GenerateRandom"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; String structure = "auto"; diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp index 7aab55b48c9..a8e2108fda8 100644 --- a/src/TableFunctions/TableFunctionHDFS.cpp +++ b/src/TableFunctions/TableFunctionHDFS.cpp @@ -28,7 +28,7 @@ StoragePtr TableFunctionHDFS::getStorage( compression_method_); } -ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h index eec0a05fe8d..a7eb5daa440 100644 --- a/src/TableFunctions/TableFunctionHDFS.h +++ b/src/TableFunctions/TableFunctionHDFS.h @@ -34,7 +34,7 @@ public: return signature; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override { diff --git a/src/TableFunctions/TableFunctionInput.cpp b/src/TableFunctions/TableFunctionInput.cpp index 4941241acae..658a55c6fc4 100644 --- a/src/TableFunctions/TableFunctionInput.cpp +++ b/src/TableFunctions/TableFunctionInput.cpp @@ -43,7 +43,7 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr structure = checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context), "structure"); } -ColumnsDescription TableFunctionInput::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionInput::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { @@ -58,9 +58,9 @@ ColumnsDescription TableFunctionInput::getActualTableStructure(ContextPtr contex return parseColumnsListFromString(structure, context); } -StoragePtr TableFunctionInput::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionInput::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto storage = std::make_shared(StorageID(getDatabaseName(), table_name), getActualTableStructure(context)); + auto storage = std::make_shared(StorageID(getDatabaseName(), table_name), getActualTableStructure(context, is_insert_query)); storage->startup(); return storage; } diff --git a/src/TableFunctions/TableFunctionInput.h b/src/TableFunctions/TableFunctionInput.h index 8e7b34cb829..3164ce43eef 100644 --- a/src/TableFunctions/TableFunctionInput.h +++ b/src/TableFunctions/TableFunctionInput.h @@ -20,10 +20,10 @@ public: void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Input"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; String structure; diff --git a/src/TableFunctions/TableFunctionMeiliSearch.cpp b/src/TableFunctions/TableFunctionMeiliSearch.cpp index 41ae5bb9ee2..01840a80262 100644 --- a/src/TableFunctions/TableFunctionMeiliSearch.cpp +++ b/src/TableFunctions/TableFunctionMeiliSearch.cpp @@ -8,13 +8,13 @@ namespace DB { StoragePtr TableFunctionMeiliSearch::executeImpl( - const ASTPtr & /* ast_function */, ContextPtr /*context*/, const String & table_name, ColumnsDescription /*cached_columns*/) const + const ASTPtr & /* ast_function */, ContextPtr /*context*/, const String & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { return std::make_shared( StorageID(getDatabaseName(), table_name), configuration.value(), ColumnsDescription{}, ConstraintsDescription{}, String{}); } -ColumnsDescription TableFunctionMeiliSearch::getActualTableStructure(ContextPtr /* context */) const +ColumnsDescription TableFunctionMeiliSearch::getActualTableStructure(ContextPtr /* context */, bool /*is_insert_query*/) const { return StorageMeiliSearch::getTableStructureFromData(configuration.value()); } diff --git a/src/TableFunctions/TableFunctionMeiliSearch.h b/src/TableFunctions/TableFunctionMeiliSearch.h index 86be944ab12..a127809a9c5 100644 --- a/src/TableFunctions/TableFunctionMeiliSearch.h +++ b/src/TableFunctions/TableFunctionMeiliSearch.h @@ -13,11 +13,11 @@ public: private: StoragePtr executeImpl( - const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "meilisearch"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; std::optional configuration; diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp index 586cee54085..599953a1add 100644 --- a/src/TableFunctions/TableFunctionMerge.cpp +++ b/src/TableFunctions/TableFunctionMerge.cpp @@ -118,7 +118,7 @@ const TableFunctionMerge::DBToTableSetMap & TableFunctionMerge::getSourceDatabas return *source_databases_and_tables; } -ColumnsDescription TableFunctionMerge::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionMerge::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { for (const auto & db_with_tables : getSourceDatabasesAndTables(context)) { @@ -134,11 +134,11 @@ ColumnsDescription TableFunctionMerge::getActualTableStructure(ContextPtr contex } -StoragePtr TableFunctionMerge::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionMerge::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { auto res = std::make_shared( StorageID(getDatabaseName(), table_name), - getActualTableStructure(context), + getActualTableStructure(context, is_insert_query), String{}, source_database_name_or_regexp, database_is_regexp, diff --git a/src/TableFunctions/TableFunctionMerge.h b/src/TableFunctions/TableFunctionMerge.h index 3439056deda..8cc5119978a 100644 --- a/src/TableFunctions/TableFunctionMerge.h +++ b/src/TableFunctions/TableFunctionMerge.h @@ -17,13 +17,13 @@ public: std::string getName() const override { return name; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Merge"; } using TableSet = std::set; using DBToTableSetMap = std::map; const DBToTableSetMap & getSourceDatabasesAndTables(ContextPtr context) const; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; static TableSet getMatchedTablesWithAccess(const String & database_name, const String & table_regexp, const ContextPtr & context); diff --git a/src/TableFunctions/TableFunctionMongoDB.cpp b/src/TableFunctions/TableFunctionMongoDB.cpp index 31dd64f8254..5c7c1d98cdf 100644 --- a/src/TableFunctions/TableFunctionMongoDB.cpp +++ b/src/TableFunctions/TableFunctionMongoDB.cpp @@ -27,9 +27,9 @@ namespace ErrorCodes StoragePtr TableFunctionMongoDB::executeImpl(const ASTPtr & /*ast_function*/, - ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/) const + ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); auto storage = std::make_shared( StorageID(configuration->database, table_name), configuration->host, @@ -46,7 +46,7 @@ StoragePtr TableFunctionMongoDB::executeImpl(const ASTPtr & /*ast_function*/, return storage; } -ColumnsDescription TableFunctionMongoDB::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionMongoDB::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return parseColumnsListFromString(structure, context); } diff --git a/src/TableFunctions/TableFunctionMongoDB.h b/src/TableFunctions/TableFunctionMongoDB.h index b5033b2d654..c2c15cabe5a 100644 --- a/src/TableFunctions/TableFunctionMongoDB.h +++ b/src/TableFunctions/TableFunctionMongoDB.h @@ -17,11 +17,11 @@ public: private: StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, - const std::string & table_name, ColumnsDescription cached_columns) const override; + const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "MongoDB"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; std::optional configuration; diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index 1705dfcbfa5..03bd2264551 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -57,7 +57,7 @@ void TableFunctionMySQL::parseArguments(const ASTPtr & ast_function, ContextPtr pool.emplace(createMySQLPoolWithFailover(*configuration, mysql_settings)); } -ColumnsDescription TableFunctionMySQL::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionMySQL::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return StorageMySQL::getTableStructureFromData(*pool, configuration->database, configuration->table, context); } @@ -66,7 +66,8 @@ StoragePtr TableFunctionMySQL::executeImpl( const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, - ColumnsDescription /*cached_columns*/) const + ColumnsDescription /*cached_columns*/, + bool /*is_insert_query*/) const { auto res = std::make_shared( StorageID(getDatabaseName(), table_name), diff --git a/src/TableFunctions/TableFunctionMySQL.h b/src/TableFunctions/TableFunctionMySQL.h index 5a230530bc4..04f619f5f4b 100644 --- a/src/TableFunctions/TableFunctionMySQL.h +++ b/src/TableFunctions/TableFunctionMySQL.h @@ -23,10 +23,10 @@ public: return name; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "MySQL"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; mutable std::optional pool; diff --git a/src/TableFunctions/TableFunctionNull.cpp b/src/TableFunctions/TableFunctionNull.cpp index d25b9e15aa7..57911e16d4b 100644 --- a/src/TableFunctions/TableFunctionNull.cpp +++ b/src/TableFunctions/TableFunctionNull.cpp @@ -32,14 +32,14 @@ void TableFunctionNull::parseArguments(const ASTPtr & ast_function, ContextPtr c structure = checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(arguments[0], context), "structure"); } -ColumnsDescription TableFunctionNull::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionNull::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure != "auto") return parseColumnsListFromString(structure, context); return default_structure; } -StoragePtr TableFunctionNull::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionNull::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { ColumnsDescription columns; if (structure != "auto") diff --git a/src/TableFunctions/TableFunctionNull.h b/src/TableFunctions/TableFunctionNull.h index 4fece9e6da9..e80552d4cff 100644 --- a/src/TableFunctions/TableFunctionNull.h +++ b/src/TableFunctions/TableFunctionNull.h @@ -23,11 +23,11 @@ public: void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Null"; } void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; String structure = "auto"; ColumnsDescription structure_hint; diff --git a/src/TableFunctions/TableFunctionNumbers.cpp b/src/TableFunctions/TableFunctionNumbers.cpp index ba7a4dc4b36..d6cf50bc7d6 100644 --- a/src/TableFunctions/TableFunctionNumbers.cpp +++ b/src/TableFunctions/TableFunctionNumbers.cpp @@ -23,14 +23,14 @@ namespace ErrorCodes template -ColumnsDescription TableFunctionNumbers::getActualTableStructure(ContextPtr /*context*/) const +ColumnsDescription TableFunctionNumbers::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const { /// NOTE: https://bugs.llvm.org/show_bug.cgi?id=47418 return ColumnsDescription{{{"number", std::make_shared()}}}; } template -StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { if (const auto * function = ast_function->as()) { diff --git a/src/TableFunctions/TableFunctionNumbers.h b/src/TableFunctions/TableFunctionNumbers.h index 0a2f3eb863e..e380f40f7b2 100644 --- a/src/TableFunctions/TableFunctionNumbers.h +++ b/src/TableFunctions/TableFunctionNumbers.h @@ -19,12 +19,12 @@ public: std::string getName() const override { return name; } bool hasStaticStructure() const override { return true; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "SystemNumbers"; } UInt64 evaluateArgument(ContextPtr context, ASTPtr & argument) const; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; }; diff --git a/src/TableFunctions/TableFunctionPostgreSQL.cpp b/src/TableFunctions/TableFunctionPostgreSQL.cpp index 2b17a1b2c2b..322e0df7c15 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.cpp +++ b/src/TableFunctions/TableFunctionPostgreSQL.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes StoragePtr TableFunctionPostgreSQL::executeImpl(const ASTPtr & /*ast_function*/, - ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const + ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { auto result = std::make_shared( StorageID(getDatabaseName(), table_name), @@ -38,7 +38,7 @@ StoragePtr TableFunctionPostgreSQL::executeImpl(const ASTPtr & /*ast_function*/, } -ColumnsDescription TableFunctionPostgreSQL::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionPostgreSQL::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return StoragePostgreSQL::getTableStructureFromData(connection_pool, configuration->table, configuration->schema, context); } diff --git a/src/TableFunctions/TableFunctionPostgreSQL.h b/src/TableFunctions/TableFunctionPostgreSQL.h index 9f10e1c180e..f7d77567dd4 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.h +++ b/src/TableFunctions/TableFunctionPostgreSQL.h @@ -20,11 +20,11 @@ public: private: StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, - const std::string & table_name, ColumnsDescription cached_columns) const override; + const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "PostgreSQL"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; postgres::PoolWithFailoverPtr connection_pool; diff --git a/src/TableFunctions/TableFunctionRedis.cpp b/src/TableFunctions/TableFunctionRedis.cpp index ec659ae61e0..0b7433845b4 100644 --- a/src/TableFunctions/TableFunctionRedis.cpp +++ b/src/TableFunctions/TableFunctionRedis.cpp @@ -25,9 +25,9 @@ namespace ErrorCodes } StoragePtr TableFunctionRedis::executeImpl( - const ASTPtr & /*ast_function*/, ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/) const + const ASTPtr & /*ast_function*/, ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); StorageInMemoryMetadata metadata; metadata.setColumns(columns); @@ -39,7 +39,7 @@ StoragePtr TableFunctionRedis::executeImpl( return storage; } -ColumnsDescription TableFunctionRedis::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionRedis::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { return parseColumnsListFromString(structure, context); } diff --git a/src/TableFunctions/TableFunctionRedis.h b/src/TableFunctions/TableFunctionRedis.h index b985a89e3d7..a7fc0df0a15 100644 --- a/src/TableFunctions/TableFunctionRedis.h +++ b/src/TableFunctions/TableFunctionRedis.h @@ -19,11 +19,11 @@ public: private: StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, - const String & table_name, ColumnsDescription cached_columns) const override; + const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Redis"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; RedisConfiguration configuration; diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 4143014a7b3..1f42ce4ba30 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -37,7 +37,10 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr String cluster_name; String cluster_description; - String database, table, username = "default", password; + String database = "system"; + String table = "one"; /// The table containing one row is used by default for queries without explicit table specification. + String username = "default"; + String password; if (args_func.size() != 1) throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -86,7 +89,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr else { /// Supported signatures: - /// + /// remote('addresses_expr') /// remote('addresses_expr', db.table) /// remote('addresses_expr', 'db', 'table') /// remote('addresses_expr', db.table, 'user') @@ -102,6 +105,8 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr /// /// remoteSecure() - same as remote() /// + /// cluster() + /// cluster('cluster_name') /// cluster('cluster_name', db.table) /// cluster('cluster_name', 'db', 'table') /// cluster('cluster_name', db.table, sharding_key) @@ -109,7 +114,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr /// /// clusterAllReplicas() - same as cluster() - if (args.size() < 2 || args.size() > max_args) + if ((!is_cluster_function && args.empty()) || args.size() > max_args) throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); size_t arg_num = 0; @@ -128,8 +133,15 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr if (is_cluster_function) { - args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); - cluster_name = checkAndGetLiteralArgument(args[arg_num], "cluster_name"); + if (!args.empty()) + { + args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); + cluster_name = checkAndGetLiteralArgument(args[arg_num], "cluster_name"); + } + else + { + cluster_name = "default"; + } } else { @@ -141,44 +153,49 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr } ++arg_num; - const auto * function = args[arg_num]->as(); - if (function && TableFunctionFactory::instance().isTableFunctionName(function->name)) - { - remote_table_function_ptr = args[arg_num]; - ++arg_num; - } - else - { - args[arg_num] = evaluateConstantExpressionForDatabaseName(args[arg_num], context); - database = checkAndGetLiteralArgument(args[arg_num], "database"); - ++arg_num; - - auto qualified_name = QualifiedTableName::parseFromString(database); - if (qualified_name.database.empty()) + /// Names of database and table is not necessary. + if (arg_num < args.size()) + { + const auto * function = args[arg_num]->as(); + if (function && TableFunctionFactory::instance().isTableFunctionName(function->name)) { - if (arg_num >= args.size()) + remote_table_function_ptr = args[arg_num]; + ++arg_num; + } + else + { + args[arg_num] = evaluateConstantExpressionForDatabaseName(args[arg_num], context); + database = checkAndGetLiteralArgument(args[arg_num], "database"); + + ++arg_num; + + auto qualified_name = QualifiedTableName::parseFromString(database); + if (qualified_name.database.empty()) { - throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (arg_num >= args.size()) + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table name was not found in function arguments. {}", static_cast(help_message)); + } + else + { + std::swap(qualified_name.database, qualified_name.table); + args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); + qualified_name.table = checkAndGetLiteralArgument(args[arg_num], "table"); + ++arg_num; + } } - else + + database = std::move(qualified_name.database); + table = std::move(qualified_name.table); + + /// Cluster function may have sharding key for insert + if (is_cluster_function && arg_num < args.size()) { - std::swap(qualified_name.database, qualified_name.table); - args[arg_num] = evaluateConstantExpressionOrIdentifierAsLiteral(args[arg_num], context); - qualified_name.table = checkAndGetLiteralArgument(args[arg_num], "table"); + sharding_key = args[arg_num]; ++arg_num; } } - - database = std::move(qualified_name.database); - table = std::move(qualified_name.table); - - /// Cluster function may have sharding key for insert - if (is_cluster_function && arg_num < args.size()) - { - sharding_key = args[arg_num]; - ++arg_num; - } } /// Username and password parameters are prohibited in cluster version of the function @@ -203,15 +220,19 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr ++arg_num; } - if (arg_num < args.size() && !sharding_key) + if (arg_num < args.size()) { + if (sharding_key) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arguments `user` and `password` should be string literals (in single quotes)"); sharding_key = args[arg_num]; ++arg_num; } } if (arg_num < args.size()) + { throw Exception(help_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } } if (!cluster_name.empty()) @@ -264,7 +285,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr secure, /* priority= */ Priority{1}, /* cluster_name= */ "", - /* password= */ "" + /* cluster_secret= */ "" }; cluster = std::make_shared(context->getSettingsRef(), names, params); } @@ -276,12 +297,12 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr remote_table_id.table_name = table; } -StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const +StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const { /// StorageDistributed supports mismatching structure of remote table, so we can use outdated structure for CREATE ... AS remote(...) /// without additional conversion in StorageTableFunctionProxy if (cached_columns.empty()) - cached_columns = getActualTableStructure(context); + cached_columns = getActualTableStructure(context, is_insert_query); assert(cluster); StoragePtr res = remote_table_function_ptr @@ -318,7 +339,7 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & /*ast_function*/, Con return res; } -ColumnsDescription TableFunctionRemote::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionRemote::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { assert(cluster); return getStructureOfRemoteTable(*cluster, remote_table_id, context, remote_table_function_ptr); @@ -329,11 +350,13 @@ TableFunctionRemote::TableFunctionRemote(const std::string & name_, bool secure_ { is_cluster_function = (name == "cluster" || name == "clusterAllReplicas"); help_message = PreformattedMessage::create( - "Table function '{}' requires from 2 to {} parameters: " - ", , {}", + "Table function '{}' requires from {} to {} parameters: " + "{}", name, + is_cluster_function ? 0 : 1, is_cluster_function ? 4 : 6, - is_cluster_function ? " [, sharding_key]" : " [, username[, password], sharding_key]"); + is_cluster_function ? "[, , ] [, sharding_key]" + : " [, , ] [, username[, password], sharding_key]"); } void registerTableFunctionRemote(TableFunctionFactory & factory) diff --git a/src/TableFunctions/TableFunctionRemote.h b/src/TableFunctions/TableFunctionRemote.h index 32039d1e6a8..0f75bf2b854 100644 --- a/src/TableFunctions/TableFunctionRemote.h +++ b/src/TableFunctions/TableFunctionRemote.h @@ -22,13 +22,13 @@ public: std::string getName() const override { return name; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; bool needStructureConversion() const override { return false; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Distributed"; } void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index c8cc0cddd30..0ae3a19dc7f 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -18,6 +18,8 @@ #include #include #include "registerTableFunctions.h" +#include +#include #include @@ -32,6 +34,24 @@ namespace ErrorCodes } +std::vector TableFunctionS3::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const +{ + auto & table_function_node = query_node_table_function->as(); + auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes(); + size_t table_function_arguments_size = table_function_arguments_nodes.size(); + + std::vector result; + + for (size_t i = 0; i < table_function_arguments_size; ++i) + { + auto * function_node = table_function_arguments_nodes[i]->as(); + if (function_node && function_node->getFunctionName() == "headers") + result.push_back(i); + } + + return result; +} + /// This is needed to avoid copy-pase. Because s3Cluster arguments only differ in additional argument (first) - cluster name void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context) { @@ -41,13 +61,14 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context } else { - if (args.empty() || args.size() > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); auto * header_it = StorageURL::collectHeaders(args, configuration.headers_from_ast, context); if (header_it != args.end()) args.erase(header_it); + if (args.empty() || args.size() > 6) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); + for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); @@ -160,7 +181,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context configuration.keys = {configuration.url.key}; if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url.uri.getPath(), true); + configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(configuration.url.uri.getPath()).getPath(), true); } void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context) @@ -292,7 +313,7 @@ void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & } } -ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (configuration.structure == "auto") { @@ -309,7 +330,7 @@ bool TableFunctionS3::supportsReadingSubsetOfColumns() return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format); } -StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { S3::URI s3_uri (configuration.url); diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index c983bec9bf4..ac932260792 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -64,15 +64,20 @@ protected: const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, - ColumnsDescription cached_columns) const override; + ColumnsDescription cached_columns, + bool is_insert_query) const override; const char * getStorageTypeName() const override { return "S3"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; mutable StorageS3::Configuration configuration; ColumnsDescription structure_hint; + +private: + + std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; }; } diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index 1d93132c411..ce96f7f580b 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -17,7 +17,7 @@ namespace DB StoragePtr TableFunctionS3Cluster::executeImpl( const ASTPtr & /*function*/, ContextPtr context, - const std::string & table_name, ColumnsDescription /*cached_columns*/) const + const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { StoragePtr storage; ColumnsDescription columns; diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h index 459ff144f02..4fe25079cf4 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ b/src/TableFunctions/TableFunctionS3Cluster.h @@ -52,7 +52,8 @@ protected: const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, - ColumnsDescription cached_columns) const override; + ColumnsDescription cached_columns, + bool is_insert_query) const override; const char * getStorageTypeName() const override { return "S3Cluster"; } }; diff --git a/src/TableFunctions/TableFunctionSQLite.cpp b/src/TableFunctions/TableFunctionSQLite.cpp index a9831363bd9..27e6fcf1fd1 100644 --- a/src/TableFunctions/TableFunctionSQLite.cpp +++ b/src/TableFunctions/TableFunctionSQLite.cpp @@ -29,7 +29,7 @@ namespace ErrorCodes StoragePtr TableFunctionSQLite::executeImpl(const ASTPtr & /*ast_function*/, - ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/) const + ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { auto storage = std::make_shared(StorageID(getDatabaseName(), table_name), sqlite_db, @@ -42,7 +42,7 @@ StoragePtr TableFunctionSQLite::executeImpl(const ASTPtr & /*ast_function*/, } -ColumnsDescription TableFunctionSQLite::getActualTableStructure(ContextPtr /* context */) const +ColumnsDescription TableFunctionSQLite::getActualTableStructure(ContextPtr /* context */, bool /*is_insert_query*/) const { return StorageSQLite::getTableStructureFromData(sqlite_db, remote_table_name); } diff --git a/src/TableFunctions/TableFunctionSQLite.h b/src/TableFunctions/TableFunctionSQLite.h index fded5646b39..74318f058a9 100644 --- a/src/TableFunctions/TableFunctionSQLite.h +++ b/src/TableFunctions/TableFunctionSQLite.h @@ -18,11 +18,11 @@ public: private: StoragePtr executeImpl( const ASTPtr & ast_function, ContextPtr context, - const std::string & table_name, ColumnsDescription cached_columns) const override; + const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "SQLite"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; String database_path, remote_table_name; diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index 4ed204a2af3..8d5a023fc3b 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -118,7 +118,7 @@ StoragePtr TableFunctionURL::getStorage( configuration.http_method); } -ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionURL::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { if (structure == "auto") { diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h index 021eb71df53..5e58a36dde9 100644 --- a/src/TableFunctions/TableFunctionURL.h +++ b/src/TableFunctions/TableFunctionURL.h @@ -32,7 +32,7 @@ public: return signature; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; static void addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context); diff --git a/src/TableFunctions/TableFunctionURLCluster.cpp b/src/TableFunctions/TableFunctionURLCluster.cpp index c94943db758..a2949278155 100644 --- a/src/TableFunctions/TableFunctionURLCluster.cpp +++ b/src/TableFunctions/TableFunctionURLCluster.cpp @@ -38,7 +38,7 @@ StoragePtr TableFunctionURLCluster::getStorage( format, compression_method, StorageID(getDatabaseName(), table_name), - getActualTableStructure(context), + getActualTableStructure(context, /* is_insert_query */ true), ConstraintsDescription{}, configuration, structure != "auto"); diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp index cf0e20c624c..42a19874704 100644 --- a/src/TableFunctions/TableFunctionValues.cpp +++ b/src/TableFunctions/TableFunctionValues.cpp @@ -119,14 +119,14 @@ void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr structure = ColumnsDescription(names_and_types); } -ColumnsDescription TableFunctionValues::getActualTableStructure(ContextPtr /*context*/) const +ColumnsDescription TableFunctionValues::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const { return structure; } -StoragePtr TableFunctionValues::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionValues::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); Block sample_block; for (const auto & name_type : columns.getOrdinary()) diff --git a/src/TableFunctions/TableFunctionValues.h b/src/TableFunctions/TableFunctionValues.h index 61ce5158086..7c87bff835e 100644 --- a/src/TableFunctions/TableFunctionValues.h +++ b/src/TableFunctions/TableFunctionValues.h @@ -14,10 +14,10 @@ public: std::string getName() const override { return name; } bool hasStaticStructure() const override { return true; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "Values"; } - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; static DataTypes getTypesFromArgument(const ASTPtr & arg, ContextPtr context); diff --git a/src/TableFunctions/TableFunctionView.cpp b/src/TableFunctions/TableFunctionView.cpp index 6b50e7e0611..2a50fb2d006 100644 --- a/src/TableFunctions/TableFunctionView.cpp +++ b/src/TableFunctions/TableFunctionView.cpp @@ -41,7 +41,7 @@ void TableFunctionView::parseArguments(const ASTPtr & ast_function, ContextPtr / throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' requires a query argument.", getName()); } -ColumnsDescription TableFunctionView::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionView::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { assert(create.select); assert(create.children.size() == 1); @@ -58,9 +58,9 @@ ColumnsDescription TableFunctionView::getActualTableStructure(ContextPtr context } StoragePtr TableFunctionView::executeImpl( - const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const + const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const { - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); auto res = std::make_shared(StorageID(getDatabaseName(), table_name), create, columns, ""); res->startup(); return res; diff --git a/src/TableFunctions/TableFunctionView.h b/src/TableFunctions/TableFunctionView.h index bbf072655ed..c679a1f315d 100644 --- a/src/TableFunctions/TableFunctionView.h +++ b/src/TableFunctions/TableFunctionView.h @@ -21,7 +21,7 @@ public: const ASTSelectWithUnionQuery & getSelectQuery() const; private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "View"; } @@ -29,7 +29,7 @@ private: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; ASTCreateQuery create; }; diff --git a/src/TableFunctions/TableFunctionViewIfPermitted.cpp b/src/TableFunctions/TableFunctionViewIfPermitted.cpp index 12762e24f7e..d7944df1b28 100644 --- a/src/TableFunctions/TableFunctionViewIfPermitted.cpp +++ b/src/TableFunctions/TableFunctionViewIfPermitted.cpp @@ -55,16 +55,16 @@ void TableFunctionViewIfPermitted::parseArguments(const ASTPtr & ast_function, C else_table_function = TableFunctionFactory::instance().get(else_ast, context); } -ColumnsDescription TableFunctionViewIfPermitted::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionViewIfPermitted::getActualTableStructure(ContextPtr context, bool is_insert_query) const { - return else_table_function->getActualTableStructure(context); + return else_table_function->getActualTableStructure(context, is_insert_query); } StoragePtr TableFunctionViewIfPermitted::executeImpl( - const ASTPtr & /* ast_function */, ContextPtr context, const std::string & table_name, ColumnsDescription /* cached_columns */) const + const ASTPtr & /* ast_function */, ContextPtr context, const std::string & table_name, ColumnsDescription /* cached_columns */, bool is_insert_query) const { StoragePtr storage; - auto columns = getActualTableStructure(context); + auto columns = getActualTableStructure(context, is_insert_query); if (isPermitted(context, columns)) { diff --git a/src/TableFunctions/TableFunctionViewIfPermitted.h b/src/TableFunctions/TableFunctionViewIfPermitted.h index 9fdb34f30ab..bee4e15bfa5 100644 --- a/src/TableFunctions/TableFunctionViewIfPermitted.h +++ b/src/TableFunctions/TableFunctionViewIfPermitted.h @@ -20,7 +20,7 @@ public: const ASTSelectWithUnionQuery & getSelectQuery() const; private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "ViewIfPermitted"; } @@ -28,7 +28,7 @@ private: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; bool isPermitted(const ContextPtr & context, const ColumnsDescription & else_columns) const; diff --git a/src/TableFunctions/TableFunctionZeros.cpp b/src/TableFunctions/TableFunctionZeros.cpp index 3c487362e1f..eb93626590e 100644 --- a/src/TableFunctions/TableFunctionZeros.cpp +++ b/src/TableFunctions/TableFunctionZeros.cpp @@ -20,14 +20,14 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; template -ColumnsDescription TableFunctionZeros::getActualTableStructure(ContextPtr /*context*/) const +ColumnsDescription TableFunctionZeros::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const { /// NOTE: https://bugs.llvm.org/show_bug.cgi?id=47418 return ColumnsDescription{{{"zero", std::make_shared()}}}; } template -StoragePtr TableFunctionZeros::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +StoragePtr TableFunctionZeros::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const { if (const auto * function = ast_function->as()) { diff --git a/src/TableFunctions/TableFunctionZeros.h b/src/TableFunctions/TableFunctionZeros.h index eef1577673e..07d523ee37c 100644 --- a/src/TableFunctions/TableFunctionZeros.h +++ b/src/TableFunctions/TableFunctionZeros.h @@ -19,12 +19,12 @@ public: std::string getName() const override { return name; } bool hasStaticStructure() const override { return true; } private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; const char * getStorageTypeName() const override { return "SystemZeros"; } UInt64 evaluateArgument(ContextPtr context, ASTPtr & argument) const; - ColumnsDescription getActualTableStructure(ContextPtr context) const override; + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; }; diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index eb6e0372223..6dd29551f48 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -75,6 +75,7 @@ void registerTableFunctions() #if USE_AZURE_BLOB_STORAGE registerTableFunctionAzureBlobStorage(factory); + registerTableFunctionAzureBlobStorageCluster(factory); #endif diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 758e193e88f..db212698e65 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -72,6 +72,7 @@ void registerTableFunctionExplain(TableFunctionFactory & factory); #if USE_AZURE_BLOB_STORAGE void registerTableFunctionAzureBlobStorage(TableFunctionFactory & factory); +void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory); #endif void registerTableFunctions(); diff --git a/src/configure_config.cmake b/src/configure_config.cmake index c11a19b36ea..9f038f10fcd 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -25,6 +25,9 @@ endif() if (TARGET ch_rust::skim) set(USE_SKIM 1) endif() +if (TARGET ch_rust::prql) + set(USE_PRQL 1) +endif() if (TARGET OpenSSL::SSL) set(USE_SSL 1) endif() @@ -159,3 +162,8 @@ endif () if (TARGET ch_contrib::fiu) set(FIU_ENABLE 1) endif() +if (TARGET ch_contrib::libarchive) + set(USE_LIBARCHIVE 1) +endif() + +set(SOURCE_DIR ${CMAKE_SOURCE_DIR}) diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt new file mode 100644 index 00000000000..20ea31efa70 --- /dev/null +++ b/tests/analyzer_integration_broken_tests.txt @@ -0,0 +1,97 @@ +test_access_for_functions/test.py::test_access_rights_for_function +test_concurrent_backups_s3/test.py::test_concurrent_backups +test_distributed_ddl/test.py::test_default_database[configs] +test_distributed_ddl/test.py::test_default_database[configs_secure] +test_distributed_ddl/test.py::test_on_server_fail[configs] +test_distributed_ddl/test.py::test_on_server_fail[configs_secure] +test_distributed_insert_backward_compatibility/test.py::test_distributed_in_tuple +test_distributed_load_balancing/test.py::test_distributed_replica_max_ignored_errors +test_distributed_load_balancing/test.py::test_load_balancing_default +test_distributed_load_balancing/test.py::test_load_balancing_priority_round_robin[dist_priority] +test_distributed_load_balancing/test.py::test_load_balancing_priority_round_robin[dist_priority_negative] +test_distributed_load_balancing/test.py::test_load_balancing_round_robin +test_mask_sensitive_info/test.py::test_encryption_functions +test_merge_table_over_distributed/test.py::test_global_in +test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed +test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task +test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster +test_row_policy/test.py::test_change_of_users_xml_changes_row_policies +test_row_policy/test.py::test_change_of_users_xml_changes_row_policies +test_row_policy/test.py::test_dcl_introspection +test_row_policy/test.py::test_dcl_introspection +test_row_policy/test.py::test_dcl_management +test_row_policy/test.py::test_dcl_management +test_row_policy/test.py::test_dcl_users_with_policies_from_users_xml +test_row_policy/test.py::test_dcl_users_with_policies_from_users_xml +test_row_policy/test.py::test_grant_create_row_policy +test_row_policy/test.py::test_grant_create_row_policy +test_row_policy/test.py::test_introspection +test_row_policy/test.py::test_introspection +test_row_policy/test.py::test_join +test_row_policy/test.py::test_join +test_row_policy/test.py::test_miscellaneous_engines +test_row_policy/test.py::test_miscellaneous_engines +test_row_policy/test.py::test_policy_from_users_xml_affects_only_user_assigned +test_row_policy/test.py::test_policy_from_users_xml_affects_only_user_assigned +test_row_policy/test.py::test_policy_on_distributed_table_via_role +test_row_policy/test.py::test_policy_on_distributed_table_via_role +test_row_policy/test.py::test_reload_users_xml_by_timer +test_row_policy/test.py::test_reload_users_xml_by_timer +test_row_policy/test.py::test_row_policy_filter_with_subquery +test_row_policy/test.py::test_row_policy_filter_with_subquery +test_row_policy/test.py::test_smoke +test_row_policy/test.py::test_smoke +test_row_policy/test.py::test_some_users_without_policies +test_row_policy/test.py::test_some_users_without_policies +test_row_policy/test.py::test_tags_with_db_and_table_names +test_row_policy/test.py::test_tags_with_db_and_table_names +test_row_policy/test.py::test_throwif_error_in_prewhere_with_same_condition_as_filter +test_row_policy/test.py::test_throwif_error_in_prewhere_with_same_condition_as_filter +test_row_policy/test.py::test_throwif_error_in_where_with_same_condition_as_filter +test_row_policy/test.py::test_throwif_error_in_where_with_same_condition_as_filter +test_row_policy/test.py::test_throwif_in_prewhere_doesnt_expose_restricted_data +test_row_policy/test.py::test_throwif_in_prewhere_doesnt_expose_restricted_data +test_row_policy/test.py::test_throwif_in_where_doesnt_expose_restricted_data +test_row_policy/test.py::test_throwif_in_where_doesnt_expose_restricted_data +test_row_policy/test.py::test_users_xml_is_readonly +test_row_policy/test.py::test_users_xml_is_readonly +test_row_policy/test.py::test_with_prewhere +test_row_policy/test.py::test_with_prewhere +test_settings_constraints_distributed/test.py::test_select_clamps_settings +test_compression_codec_read/test.py::test_default_codec_read +test_dictionaries_update_and_reload/test.py::test_reload_after_fail_in_cache_dictionary +test_distributed_type_object/test.py::test_distributed_type_object +test_materialized_mysql_database/test.py::test_select_without_columns_5_7 +test_materialized_mysql_database/test.py::test_select_without_columns_8_0 +test_shard_level_const_function/test.py::test_remote +test_storage_postgresql/test.py::test_postgres_select_insert +test_storage_rabbitmq/test.py::test_rabbitmq_materialized_view +test_system_merges/test.py::test_mutation_simple[] +test_system_merges/test.py::test_mutation_simple[replicated] +test_disk_over_web_server/test.py::test_cache[node2] +test_disk_over_web_server/test.py::test_incorrect_usage +test_disk_over_web_server/test.py::test_replicated_database +test_disk_over_web_server/test.py::test_unavailable_server +test_disk_over_web_server/test.py::test_usage[node2] +test_distributed_backward_compatability/test.py::test_distributed_in_tuple +test_executable_table_function/test.py::test_executable_function_input_python +test_settings_profile/test.py::test_show_profiles +test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster +test_postgresql_protocol/test.py::test_python_client +test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database +test_profile_events_s3/test.py::test_profile_events +test_user_defined_object_persistence/test.py::test_persistence +test_settings_profile/test.py::test_show_profiles +test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster +test_select_access_rights/test_main.py::test_alias_columns +test_select_access_rights/test_main.py::test_select_count +test_select_access_rights/test_main.py::test_select_join +test_postgresql_protocol/test.py::test_python_client +test_replicating_constants/test.py::test_different_versions +test_merge_tree_s3/test.py::test_heavy_insert_select_check_memory[node] +test_wrong_db_or_table_name/test.py::test_wrong_table_name +test_drop_is_lock_free/test.py::test_query_is_lock_free[detach table] +test_odbc_interaction/test.py::test_postgres_insert +test_zookeeper_config/test.py::test_chroot_with_different_root +test_zookeeper_config/test.py::test_chroot_with_same_root +test_merge_tree_azure_blob_storage/test.py::test_table_manipulations diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index e0f259306aa..15d46403da9 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -2,16 +2,13 @@ 00562_in_subquery_merge_tree 00593_union_all_assert_columns_removed 00673_subquery_prepared_set_performance -00700_decimal_compare 00717_merge_and_distributed 00725_memory_tracking 00754_distributed_optimize_skip_select_on_unused_shards 00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere -00838_unique_index 00927_asof_joins 00940_order_by_read_in_order_query_plan 00945_bloom_filter_index -00979_set_index_not 00981_in_subquery_with_tuple 01049_join_low_card_bug_long 01062_pm_all_join_with_block_continuation @@ -72,7 +69,6 @@ 01925_test_storage_merge_aliases 01930_optimize_skip_unused_shards_rewrite_in 01947_mv_subquery -01951_distributed_push_down_limit 01952_optimize_distributed_group_by_sharding_key 02000_join_on_const 02001_shard_num_shard_count @@ -82,7 +78,6 @@ 02242_join_rocksdb 02267_join_dup_columns_issue36199 02302_s3_file_pruning -02317_distinct_in_order_optimization_explain 02341_global_join_cte 02345_implicit_transaction 02352_grouby_shadows_arg @@ -92,9 +87,7 @@ 02382_join_and_filtering_set 02402_merge_engine_with_view 02404_memory_bound_merging -02421_decimal_in_precision_issue_41125 02426_orc_bug -02428_decimal_in_floating_point_literal 02428_parameterized_view 02458_use_structure_from_insertion_table 02479_race_condition_between_insert_and_droppin_mv @@ -130,3 +123,7 @@ 02581_share_big_sets_between_mutation_tasks_long 02581_share_big_sets_between_multiple_mutations_tasks_long 00992_system_parts_race_condition_zookeeper_long +02818_parameterized_view_with_cte_multiple_usage +02790_optimize_skip_unused_shards_join +01940_custom_tld_sharding_key +02815_range_dict_no_direct_join diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 514aaf7e2ac..a8c7f2c3a9c 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -8,7 +8,11 @@ import sys from github import Github from build_download_helper import get_build_name_for_check, read_build_urls -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from clickhouse_helper import ( + ClickHouseHelper, + prepare_tests_results_for_clickhouse, + get_instance_type, +) from commit_status_helper import ( RerunHelper, format_description, @@ -17,7 +21,6 @@ from commit_status_helper import ( ) from docker_pull_helper import get_image_with_version from env_helper import ( - GITHUB_RUN_URL, REPORTS_PATH, TEMP_PATH, ) @@ -26,19 +29,37 @@ from pr_info import PRInfo from report import TestResult from s3_helper import S3Helper from stopwatch import Stopwatch +from upload_result_helper import upload_results IMAGE_NAME = "clickhouse/fuzzer" -def get_run_command(pr_number, sha, download_url, workspace_path, image): +def get_run_command( + check_start_time, check_name, pr_number, sha, download_url, workspace_path, image +): + instance_type = get_instance_type() + + envs = [ + "-e CLICKHOUSE_CI_LOGS_HOST", + "-e CLICKHOUSE_CI_LOGS_PASSWORD", + f"-e CHECK_START_TIME='{check_start_time}'", + f"-e CHECK_NAME='{check_name}'", + f"-e INSTANCE_TYPE='{instance_type}'", + f"-e PR_TO_TEST={pr_number}", + f"-e SHA_TO_TEST={sha}", + f"-e BINARY_URL_TO_DOWNLOAD='{download_url}'", + ] + + env_str = " ".join(envs) + return ( f"docker run " # For sysctl "--privileged " "--network=host " f"--volume={workspace_path}:/workspace " + f"{env_str} " "--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE " - f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" ' f"{image}" ) @@ -79,7 +100,7 @@ def main(): build_url = url break else: - raise Exception("Cannot binary clickhouse among build results") + raise Exception("Cannot find the clickhouse binary among build results") logging.info("Got build url %s", build_url) @@ -88,11 +109,19 @@ def main(): os.makedirs(workspace_path) run_command = get_run_command( - pr_info.number, pr_info.sha, build_url, workspace_path, docker_image + stopwatch.start_time_str, + check_name, + pr_info.number, + pr_info.sha, + build_url, + workspace_path, + docker_image, ) logging.info("Going to run %s", run_command) run_log_path = os.path.join(temp_path, "run.log") + main_log_path = os.path.join(workspace_path, "main.log") + with open(run_log_path, "w", encoding="utf-8") as log: with subprocess.Popen( run_command, shell=True, stderr=log, stdout=log @@ -105,20 +134,47 @@ def main(): subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + # Cleanup run log from the credentials of CI logs database. + # Note: a malicious user can still print them by splitting the value into parts. + # But we will be warned when a malicious user modifies CI script. + # Although they can also print them from inside tests. + # Nevertheless, the credentials of the CI logs have limited scope + # and does not provide access to sensitive info. + + ci_logs_host = os.getenv("CLICKHOUSE_CI_LOGS_HOST", "CLICKHOUSE_CI_LOGS_HOST") + ci_logs_password = os.getenv( + "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD" + ) + + if ci_logs_host not in ("CLICKHOUSE_CI_LOGS_HOST", ""): + subprocess.check_call( + f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}' '{main_log_path}'", + shell=True, + ) + check_name_lower = ( check_name.lower().replace("(", "").replace(")", "").replace(" ", "") ) s3_prefix = f"{pr_info.number}/{pr_info.sha}/fuzzer_{check_name_lower}/" paths = { "run.log": run_log_path, - "main.log": os.path.join(workspace_path, "main.log"), - "server.log.zst": os.path.join(workspace_path, "server.log.zst"), + "main.log": main_log_path, "fuzzer.log": os.path.join(workspace_path, "fuzzer.log"), "report.html": os.path.join(workspace_path, "report.html"), "core.zst": os.path.join(workspace_path, "core.zst"), "dmesg.log": os.path.join(workspace_path, "dmesg.log"), } + compressed_server_log_path = os.path.join(workspace_path, "server.log.zst") + if os.path.exists(compressed_server_log_path): + paths["server.log.zst"] = compressed_server_log_path + + # The script can fail before the invocation of `zstd`, but we are still interested in its log: + + not_compressed_server_log_path = os.path.join(workspace_path, "server.log") + if os.path.exists(not_compressed_server_log_path): + paths["server.log"] = not_compressed_server_log_path + s3_helper = S3Helper() for f in paths: try: @@ -127,10 +183,6 @@ def main(): logging.info("Exception uploading file %s text %s", f, ex) paths[f] = "" - report_url = GITHUB_RUN_URL - if paths["report.html"]: - report_url = paths["report.html"] - # Try to get status message saved by the fuzzer try: with open( @@ -152,6 +204,19 @@ def main(): if "fail" in status: test_result.status = "FAIL" + if paths["report.html"]: + report_url = paths["report.html"] + else: + report_url = upload_results( + s3_helper, + pr_info.number, + pr_info.sha, + [test_result], + [], + check_name, + [url for url in paths.values() if url], + ) + ch_helper = ClickHouseHelper() prepared_events = prepare_tests_results_for_clickhouse( diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 35b98a7c3bb..3e853c0ce41 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -1,20 +1,16 @@ #!/usr/bin/env python3 +from pathlib import Path +from typing import List, Tuple import subprocess import logging import json import os import sys import time -from typing import List, Tuple from ci_config import CI_CONFIG, BuildConfig -from commit_status_helper import ( - NotSet, - get_commit_filtered_statuses, - get_commit, - post_commit_status, -) +from ccache_utils import CargoCache from docker_pull_helper import get_image_with_version from env_helper import ( GITHUB_JOB, @@ -24,28 +20,32 @@ from env_helper import ( S3_DOWNLOAD, TEMP_PATH, ) -from get_robot_token import get_best_robot_token -from github_helper import GitHub +from git_helper import Git, git_runner from pr_info import PRInfo from s3_helper import S3Helper from tee_popen import TeePopen from version_helper import ( ClickHouseVersion, - Git, get_version_from_repo, update_version_local, ) +from clickhouse_helper import ( + ClickHouseHelper, + prepare_tests_results_for_clickhouse, + get_instance_type, +) +from stopwatch import Stopwatch IMAGE_NAME = "clickhouse/binary-builder" BUILD_LOG_NAME = "build_log.log" def _can_export_binaries(build_config: BuildConfig) -> bool: - if build_config["package_type"] != "deb": + if build_config.package_type != "deb": return False - if build_config["sanitizer"] != "": + if build_config.sanitizer != "": return True - if build_config["build_type"] != "": + if build_config.debug_build: return True return False @@ -53,34 +53,37 @@ def _can_export_binaries(build_config: BuildConfig) -> bool: def get_packager_cmd( build_config: BuildConfig, packager_path: str, - output_path: str, + output_path: Path, + cargo_cache_dir: Path, build_version: str, image_version: str, official: bool, ) -> str: - package_type = build_config["package_type"] - comp = build_config["compiler"] + package_type = build_config.package_type + comp = build_config.compiler cmake_flags = "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=1" cmd = ( - f"cd {packager_path} && CMAKE_FLAGS='{cmake_flags}' ./packager --output-dir={output_path} " - f"--package-type={package_type} --compiler={comp}" + f"cd {packager_path} && CMAKE_FLAGS='{cmake_flags}' ./packager " + f"--output-dir={output_path} --package-type={package_type} --compiler={comp}" ) - if build_config["build_type"]: - cmd += f" --build-type={build_config['build_type']}" - if build_config["sanitizer"]: - cmd += f" --sanitizer={build_config['sanitizer']}" - if build_config["tidy"] == "enable": + if build_config.debug_build: + cmd += " --debug-build" + if build_config.sanitizer: + cmd += f" --sanitizer={build_config.sanitizer}" + if build_config.tidy: cmd += " --clang-tidy" cmd += " --cache=sccache" cmd += " --s3-rw-access" cmd += f" --s3-bucket={S3_BUILDS_BUCKET}" + cmd += f" --cargo-cache-dir={cargo_cache_dir}" - if "additional_pkgs" in build_config and build_config["additional_pkgs"]: + if build_config.additional_pkgs: cmd += " --additional-pkgs" cmd += f" --docker-image-version={image_version}" + cmd += " --with-profiler" cmd += f" --version={build_version}" if _can_export_binaries(build_config): @@ -93,13 +96,13 @@ def get_packager_cmd( def build_clickhouse( - packager_cmd: str, logs_path: str, build_output_path: str -) -> Tuple[str, bool]: - build_log_path = os.path.join(logs_path, BUILD_LOG_NAME) + packager_cmd: str, logs_path: Path, build_output_path: Path +) -> Tuple[Path, bool]: + build_log_path = logs_path / BUILD_LOG_NAME success = False with TeePopen(packager_cmd, build_log_path) as process: retcode = process.wait() - if os.path.exists(build_output_path): + if build_output_path.exists(): build_results = os.listdir(build_output_path) else: build_results = [] @@ -183,7 +186,7 @@ def create_json_artifact( result = { "log_url": log_url, "build_urls": build_urls, - "build_config": build_config, + "build_config": build_config.__dict__, "elapsed_seconds": elapsed, "status": success, "job_name": GITHUB_JOB, @@ -220,10 +223,10 @@ def upload_master_static_binaries( pr_info: PRInfo, build_config: BuildConfig, s3_helper: S3Helper, - build_output_path: str, + build_output_path: Path, ) -> None: """Upload binary artifacts to a static S3 links""" - static_binary_name = build_config.get("static_binary_name", False) + static_binary_name = build_config.static_binary_name if pr_info.number != 0: return elif not static_binary_name: @@ -232,48 +235,21 @@ def upload_master_static_binaries( return s3_path = "/".join((pr_info.base_ref, static_binary_name, "clickhouse")) - binary = os.path.join(build_output_path, "clickhouse") + binary = build_output_path / "clickhouse" url = s3_helper.upload_build_file_to_s3(binary, s3_path) print(f"::notice ::Binary static URL: {url}") -def mark_failed_reports_pending(build_name: str, pr_info: PRInfo) -> None: - try: - gh = GitHub(get_best_robot_token()) - commit = get_commit(gh, pr_info.sha) - statuses = get_commit_filtered_statuses(commit) - report_status = [ - name - for name, builds in CI_CONFIG["builds_report_config"].items() - if build_name in builds - ][0] - for status in statuses: - if status.context == report_status and status.state in ["failure", "error"]: - logging.info( - "Commit already have failed status for '%s', setting it to 'pending'", - report_status, - ) - post_commit_status( - commit, - "pending", - status.target_url or NotSet, - "Set to pending on rerun", - report_status, - pr_info, - ) - except: # we do not care about any exception here - logging.info("Failed to get or mark the reports status as pending, continue") - - def main(): logging.basicConfig(level=logging.INFO) + stopwatch = Stopwatch() build_name = sys.argv[1] - build_config = CI_CONFIG["build_config"][build_name] + build_config = CI_CONFIG.build_config[build_name] - if not os.path.exists(TEMP_PATH): - os.makedirs(TEMP_PATH) + temp_path = Path(TEMP_PATH) + os.makedirs(temp_path, exist_ok=True) pr_info = PRInfo() @@ -294,17 +270,12 @@ def main(): # put them as github actions artifact (result) check_for_success_run(s3_helper, s3_path_prefix, build_name, build_config) - # If it's a latter running, we need to mark possible failed status - mark_failed_reports_pending(build_name, pr_info) - docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME) image_version = docker_image.version logging.info("Got version from repo %s", version.string) official_flag = pr_info.number == 0 - if "official" in build_config: - official_flag = build_config["official"] version_type = "testing" if "release" in pr_info.labels or "release-lts" in pr_info.labels: @@ -317,14 +288,18 @@ def main(): logging.info("Build short name %s", build_name) - build_output_path = os.path.join(TEMP_PATH, build_name) - if not os.path.exists(build_output_path): - os.makedirs(build_output_path) + build_output_path = temp_path / build_name + os.makedirs(build_output_path, exist_ok=True) + cargo_cache = CargoCache( + temp_path / "cargo_cache" / "registry", temp_path, s3_helper + ) + cargo_cache.download() packager_cmd = get_packager_cmd( build_config, os.path.join(REPO_COPY, "docker/packager"), build_output_path, + cargo_cache.directory, version.string, image_version, official_flag, @@ -332,9 +307,8 @@ def main(): logging.info("Going to run packager with %s", packager_cmd) - logs_path = os.path.join(TEMP_PATH, "build_log") - if not os.path.exists(logs_path): - os.makedirs(logs_path) + logs_path = temp_path / "build_log" + os.makedirs(logs_path, exist_ok=True) start = time.time() log_path, success = build_clickhouse(packager_cmd, logs_path, build_output_path) @@ -343,6 +317,9 @@ def main(): f"sudo chown -R ubuntu:ubuntu {build_output_path}", shell=True ) logging.info("Build finished with %s, log path %s", success, log_path) + if success: + cargo_cache.upload() + if not success: # We check if docker works, because if it's down, it's infrastructure try: @@ -355,8 +332,8 @@ def main(): # FIXME performance performance_urls = [] - performance_path = os.path.join(build_output_path, "performance.tar.zst") - if os.path.exists(performance_path): + performance_path = build_output_path / "performance.tar.zst" + if performance_path.exists(): performance_urls.append( s3_helper.upload_build_file_to_s3(performance_path, s3_performance_path) ) @@ -379,9 +356,9 @@ def main(): print("::notice ::Build URLs: {}".format("\n".join(build_urls))) - if os.path.exists(log_path): + if log_path.exists(): log_url = s3_helper.upload_build_file_to_s3( - log_path, s3_path_prefix + "/" + os.path.basename(log_path) + log_path, s3_path_prefix + "/" + log_path.name ) logging.info("Log url %s", log_url) else: @@ -394,7 +371,92 @@ def main(): ) upload_master_static_binaries(pr_info, build_config, s3_helper, build_output_path) - # Fail build job if not successeded + + # Upload profile data + ch_helper = ClickHouseHelper() + + clickhouse_ci_logs_host = os.getenv("CLICKHOUSE_CI_LOGS_HOST", "") + if clickhouse_ci_logs_host: + instance_type = get_instance_type() + query = f"""INSERT INTO build_time_trace +( + pull_request_number, + commit_sha, + check_start_time, + check_name, + instance_type, + file, + library, + time, + pid, + tid, + ph, + ts, + dur, + cat, + name, + detail, + count, + avgMs, + args_name +) +SELECT {pr_info.number}, '{pr_info.sha}', '{stopwatch.start_time_str}', '{build_name}', '{instance_type}', * +FROM input(' + file String, + library String, + time DateTime64(6), + pid UInt32, + tid UInt32, + ph String, + ts UInt64, + dur UInt64, + cat String, + name String, + detail String, + count UInt64, + avgMs UInt64, + args_name String') +FORMAT JSONCompactEachRow""" + + auth = { + "X-ClickHouse-User": "ci", + "X-ClickHouse-Key": os.getenv("CLICKHOUSE_CI_LOGS_PASSWORD", ""), + } + url = f"https://{clickhouse_ci_logs_host}/" + profiles_dir = temp_path / "profiles_source" + os.makedirs(profiles_dir, exist_ok=True) + logging.info("Processing profile JSON files from {GIT_REPO_ROOT}/build_docker") + git_runner( + "./utils/prepare-time-trace/prepare-time-trace.sh " + f"build_docker {profiles_dir.absolute()}" + ) + profile_data_file = temp_path / "profile.json" + with open(profile_data_file, "wb") as profile_fd: + for profile_sourse in os.listdir(profiles_dir): + with open(profiles_dir / profile_sourse, "rb") as ps_fd: + profile_fd.write(ps_fd.read()) + + logging.info( + "::notice ::Log Uploading profile data, path: %s, size: %s, query: %s", + profile_data_file, + profile_data_file.stat().st_size, + query, + ) + ch_helper.insert_file(url, auth, query, profile_data_file) + + # Upload statistics to CI database + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + [], + "success" if success else "failure", + stopwatch.duration_seconds, + stopwatch.start_time_str, + log_url, + f"Build ({build_name})", + ) + ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + + # Fail the build job if it didn't succeed if not success: sys.exit(1) diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 47c11ee0911..a6fda749494 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -16,6 +16,10 @@ from ci_config import CI_CONFIG DOWNLOAD_RETRIES_COUNT = 5 +class DownloadException(Exception): + pass + + def get_with_retries( url: str, retries: int = DOWNLOAD_RETRIES_COUNT, @@ -91,7 +95,7 @@ def get_gh_api( def get_build_name_for_check(check_name: str) -> str: - return CI_CONFIG["tests_config"][check_name]["required_build"] # type: ignore + return CI_CONFIG.test_configs[check_name].required_build def read_build_urls(build_name: str, reports_path: str) -> List[str]: @@ -149,7 +153,9 @@ def download_build_with_progress(url: str, path: Path) -> None: if os.path.exists(path): os.remove(path) else: - raise Exception(f"Cannot download dataset from {url}, all retries exceeded") + raise DownloadException( + f"Cannot download dataset from {url}, all retries exceeded" + ) if sys.stdout.isatty(): sys.stdout.write("\n") @@ -174,7 +180,7 @@ def download_builds_filter( print(urls) if not urls: - raise Exception("No build URLs found") + raise DownloadException("No build URLs found") download_builds(result_path, urls, filter_fn) diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 1362f3c8934..a134cb19346 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -70,7 +70,7 @@ def get_failed_report( message = f"{job_name} failed" build_result = BuildResult( compiler="unknown", - build_type="unknown", + debug_build=False, sanitizer="unknown", status=message, elapsed_seconds=0, @@ -85,7 +85,7 @@ def process_report( build_config = build_report["build_config"] build_result = BuildResult( compiler=build_config["compiler"], - build_type=build_config["build_type"], + debug_build=build_config["debug_build"], sanitizer=build_config["sanitizer"], status="success" if build_report["status"] else "failure", elapsed_seconds=build_report["elapsed_seconds"], @@ -149,7 +149,7 @@ def main(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) - builds_for_check = CI_CONFIG["builds_report_config"][build_check_name] + builds_for_check = CI_CONFIG.builds_report_config[build_check_name] required_builds = required_builds or len(builds_for_check) # Collect reports from json artifacts diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py index 250655ddeb2..8282fb7768a 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -323,7 +323,9 @@ def main(event): if action == "edited": print("PR is edited, check if the body is correct") - error, category = check_pr_description(pull_request["body"]) + error, _ = check_pr_description( + pull_request["body"], pull_request["base"]["repo"]["full_name"] + ) if error: print( f"The PR's body is wrong, is going to comment it. The error is: {error}" diff --git a/tests/ci/ccache_utils.py b/tests/ci/ccache_utils.py index e8522127132..e726bf80b92 100644 --- a/tests/ci/ccache_utils.py +++ b/tests/ci/ccache_utils.py @@ -1,71 +1,31 @@ #!/usr/bin/env python3 import logging -import time -import sys import os import shutil +from hashlib import md5 from pathlib import Path import requests # type: ignore +from build_download_helper import download_build_with_progress, DownloadException from compress_files import decompress_fast, compress_fast from env_helper import S3_DOWNLOAD, S3_BUILDS_BUCKET +from git_helper import git_runner from s3_helper import S3Helper DOWNLOAD_RETRIES_COUNT = 5 -def dowload_file_with_progress(url, path): - logging.info("Downloading from %s to temp path %s", url, path) - for i in range(DOWNLOAD_RETRIES_COUNT): - try: - with open(path, "wb") as f: - response = requests.get(url, stream=True) - response.raise_for_status() - total_length = response.headers.get("content-length") - if total_length is None or int(total_length) == 0: - logging.info( - "No content-length, will download file without progress" - ) - f.write(response.content) - else: - dl = 0 - total_length = int(total_length) - logging.info("Content length is %ld bytes", total_length) - for data in response.iter_content(chunk_size=4096): - dl += len(data) - f.write(data) - if sys.stdout.isatty(): - done = int(50 * dl / total_length) - percent = int(100 * float(dl) / total_length) - eq_str = "=" * done - space_str = " " * (50 - done) - sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%") - sys.stdout.flush() - break - except Exception as ex: - sys.stdout.write("\n") - time.sleep(3) - logging.info("Exception while downloading %s, retry %s", ex, i + 1) - if os.path.exists(path): - os.remove(path) - else: - raise Exception(f"Cannot download dataset from {url}, all retries exceeded") - - sys.stdout.write("\n") - logging.info("Downloading finished") - - def get_ccache_if_not_exists( - path_to_ccache_dir: str, + path_to_ccache_dir: Path, s3_helper: S3Helper, current_pr_number: int, - temp_path: str, + temp_path: Path, release_pr: int, ) -> int: """returns: number of PR for downloaded PR. -1 if ccache not found""" - ccache_name = os.path.basename(path_to_ccache_dir) + ccache_name = path_to_ccache_dir.name cache_found = False prs_to_check = [current_pr_number] # Release PR is either 0 or defined @@ -94,11 +54,11 @@ def get_ccache_if_not_exists( logging.info("Found ccache on path %s", obj) url = f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{obj}" - compressed_cache = os.path.join(temp_path, os.path.basename(obj)) - dowload_file_with_progress(url, compressed_cache) + compressed_cache = temp_path / os.path.basename(obj) + download_build_with_progress(url, compressed_cache) - path_to_decompress = str(Path(path_to_ccache_dir).parent) - if not os.path.exists(path_to_decompress): + path_to_decompress = path_to_ccache_dir.parent + if not path_to_decompress.exists(): os.makedirs(path_to_decompress) if os.path.exists(path_to_ccache_dir): @@ -122,15 +82,77 @@ def get_ccache_if_not_exists( return ccache_pr -def upload_ccache(path_to_ccache_dir, s3_helper, current_pr_number, temp_path): +def upload_ccache( + path_to_ccache_dir: Path, + s3_helper: S3Helper, + current_pr_number: int, + temp_path: Path, +) -> None: logging.info("Uploading cache %s for pr %s", path_to_ccache_dir, current_pr_number) - ccache_name = os.path.basename(path_to_ccache_dir) - compressed_cache_path = os.path.join(temp_path, ccache_name + ".tar.zst") + ccache_name = path_to_ccache_dir.name + compressed_cache_path = temp_path / f"{ccache_name}.tar.zst" compress_fast(path_to_ccache_dir, compressed_cache_path) - s3_path = ( - str(current_pr_number) + "/ccaches/" + os.path.basename(compressed_cache_path) - ) + s3_path = f"{current_pr_number}/ccaches/{compressed_cache_path.name}" logging.info("Will upload %s to path %s", compressed_cache_path, s3_path) s3_helper.upload_build_file_to_s3(compressed_cache_path, s3_path) logging.info("Upload finished") + + +class CargoCache: + PREFIX = "ccache/cargo_cache" + + def __init__( + self, + directory: Path, + temp_path: Path, + s3_helper: S3Helper, + ): + self._cargo_lock_file = Path(git_runner.cwd) / "rust" / "Cargo.lock" + self.lock_hash = md5(self._cargo_lock_file.read_bytes()).hexdigest() + self.directory = directory + self.archive_name = f"Cargo_cache_{self.lock_hash}.tar.zst" + self.temp_path = temp_path + self.s3_helper = s3_helper + self._url = ( + f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{self.PREFIX}/{self.archive_name}" + ) + self._force_upload_cache = False + + def download(self): + logging.info("Searching rust cache for Cargo.lock md5 %s", self.lock_hash) + compressed_cache = self.temp_path / self.archive_name + try: + download_build_with_progress(self._url, compressed_cache) + except DownloadException: + logging.warning("Unable downloading cargo cache, creating empty directory") + self.directory.mkdir(parents=True, exist_ok=True) + return + + # decompress the cache and check if the necessary directory is there + self.directory.parent.mkdir(parents=True, exist_ok=True) + decompress_fast(compressed_cache, self.directory.parent) + if not self.directory.exists(): + logging.warning( + "The cargo cache archive was successfully downloaded and " + "decompressed, but %s does not exitst. Creating empty one", + self.directory, + ) + logging.info("Cache for Cargo.lock md5 %s will be uploaded", self.lock_hash) + self.directory.mkdir(parents=True, exist_ok=True) + + def upload(self): + if not self._force_upload_cache: + cache_response = requests.head(self._url) + if cache_response.status_code == 200: + logging.info( + "Remote cargo cache %s already exist, won't reupload", self._url + ) + return + + logging.info("Compressing cargo cache") + archive_path = self.directory.parent / self.archive_name + compress_fast(self.directory, archive_path) + s3_path = f"{self.PREFIX}/{self.archive_name}" + logging.info("Uploading %s to S3 path %s", archive_path, s3_path) + self.s3_helper.upload_build_file_to_s3(archive_path, s3_path) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index ea7d112c73e..a2f3a3ab401 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,193 +1,191 @@ #!/usr/bin/env python3 +import logging + from dataclasses import dataclass -from typing import Callable, Dict, TypeVar +from typing import Callable, Dict, List, Literal -ConfValue = TypeVar("ConfValue", str, bool) -BuildConfig = Dict[str, ConfValue] -CI_CONFIG = { - "build_config": { - "package_release": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "", - "package_type": "deb", - "static_binary_name": "amd64", - "additional_pkgs": True, - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "coverity": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "", - "package_type": "coverity", - "tidy": "disable", - "with_coverage": False, - "official": False, - "comment": "A special build for coverity", - }, - "package_aarch64": { - "compiler": "clang-16-aarch64", - "build_type": "", - "sanitizer": "", - "package_type": "deb", - "static_binary_name": "aarch64", - "additional_pkgs": True, - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_asan": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "address", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_ubsan": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "undefined", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_tsan": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "thread", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_msan": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "memory", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "package_debug": { - "compiler": "clang-16", - "build_type": "debug", - "sanitizer": "", - "package_type": "deb", - "tidy": "disable", - "with_coverage": False, - "comment": "Note: sparse checkout was used", - }, - "binary_release": { - "compiler": "clang-16", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_tidy": { - "compiler": "clang-16", - "build_type": "debug", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "debug-amd64", - "tidy": "enable", - "with_coverage": False, - "comment": "clang-tidy is used for static analysis", - }, - "binary_darwin": { - "compiler": "clang-16-darwin", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "macos", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_aarch64": { - "compiler": "clang-16-aarch64", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_aarch64_v80compat": { - "compiler": "clang-16-aarch64-v80compat", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "aarch64v80compat", - "tidy": "disable", - "with_coverage": False, - "comment": "For ARMv8.1 and older", - }, - "binary_freebsd": { - "compiler": "clang-16-freebsd", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "freebsd", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_darwin_aarch64": { - "compiler": "clang-16-darwin-aarch64", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "macos-aarch64", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_ppc64le": { - "compiler": "clang-16-ppc64le", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "powerpc64le", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, - "binary_amd64_compat": { - "compiler": "clang-16-amd64-compat", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "amd64compat", - "tidy": "disable", - "with_coverage": False, - "comment": "SSE2-only build", - }, - "binary_riscv64": { - "compiler": "clang-16-riscv64", - "build_type": "", - "sanitizer": "", - "package_type": "binary", - "static_binary_name": "riscv64", - "tidy": "disable", - "with_coverage": False, - "comment": "", - }, +@dataclass +class BuildConfig: + compiler: str + package_type: Literal["deb", "binary"] + additional_pkgs: bool = False + debug_build: bool = False + sanitizer: str = "" + tidy: bool = False + comment: str = "" + static_binary_name: str = "" + + +@dataclass +class TestConfig: + required_build: str + force_tests: bool = False + + +BuildConfigs = Dict[str, BuildConfig] +BuildsReportConfig = Dict[str, List[str]] +TestConfigs = Dict[str, TestConfig] + + +@dataclass +class CiConfig: + build_config: BuildConfigs + builds_report_config: BuildsReportConfig + test_configs: TestConfigs + + def validate(self) -> None: + errors = [] + # All build configs must belong to build_report_config + for build_name in self.build_config.keys(): + build_in_reports = False + for report_config in self.builds_report_config.values(): + if build_name in report_config: + build_in_reports = True + break + if not build_in_reports: + logging.error( + "Build name %s does not belong to build reports", build_name + ) + errors.append( + f"Build name {build_name} does not belong to build reports" + ) + # And otherwise + for build_report_name, build_names in self.builds_report_config.items(): + missed_names = [ + name for name in build_names if name not in self.build_config.keys() + ] + if missed_names: + logging.error( + "The following names of the build report '%s' " + "are missed in build_config: %s", + build_report_name, + missed_names, + ) + errors.append( + f"The following names of the build report '{build_report_name}' " + f"are missed in build_config: {missed_names}", + ) + # And finally, all of tests' requirements must be in the builds + for test_name, test_config in self.test_configs.items(): + if test_config.required_build not in self.build_config.keys(): + logging.error( + "The requierment '%s' for '%s' is not found in builds", + test_config, + test_name, + ) + errors.append( + f"The requierment '{test_config}' for " + f"'{test_name}' is not found in builds" + ) + + if errors: + raise KeyError("config contains errors", errors) + + +CI_CONFIG = CiConfig( + build_config={ + "package_release": BuildConfig( + compiler="clang-16", + package_type="deb", + static_binary_name="amd64", + additional_pkgs=True, + ), + "package_aarch64": BuildConfig( + compiler="clang-16-aarch64", + package_type="deb", + static_binary_name="aarch64", + additional_pkgs=True, + ), + "package_asan": BuildConfig( + compiler="clang-16", + sanitizer="address", + package_type="deb", + ), + "package_ubsan": BuildConfig( + compiler="clang-16", + sanitizer="undefined", + package_type="deb", + ), + "package_tsan": BuildConfig( + compiler="clang-16", + sanitizer="thread", + package_type="deb", + ), + "package_msan": BuildConfig( + compiler="clang-16", + sanitizer="memory", + package_type="deb", + ), + "package_debug": BuildConfig( + compiler="clang-16", + debug_build=True, + package_type="deb", + comment="Note: sparse checkout was used", + ), + "binary_release": BuildConfig( + compiler="clang-16", + package_type="binary", + ), + "binary_tidy": BuildConfig( + compiler="clang-16", + debug_build=True, + package_type="binary", + static_binary_name="debug-amd64", + tidy=True, + comment="clang-tidy is used for static analysis", + ), + "binary_darwin": BuildConfig( + compiler="clang-16-darwin", + package_type="binary", + static_binary_name="macos", + ), + "binary_aarch64": BuildConfig( + compiler="clang-16-aarch64", + package_type="binary", + ), + "binary_aarch64_v80compat": BuildConfig( + compiler="clang-16-aarch64-v80compat", + package_type="binary", + static_binary_name="aarch64v80compat", + comment="For ARMv8.1 and older", + ), + "binary_freebsd": BuildConfig( + compiler="clang-16-freebsd", + package_type="binary", + static_binary_name="freebsd", + ), + "binary_darwin_aarch64": BuildConfig( + compiler="clang-16-darwin-aarch64", + package_type="binary", + static_binary_name="macos-aarch64", + ), + "binary_ppc64le": BuildConfig( + compiler="clang-16-ppc64le", + package_type="binary", + static_binary_name="powerpc64le", + ), + "binary_amd64_compat": BuildConfig( + compiler="clang-16-amd64-compat", + package_type="binary", + static_binary_name="amd64compat", + comment="SSE2-only build", + ), + "binary_riscv64": BuildConfig( + compiler="clang-16-riscv64", + package_type="binary", + static_binary_name="riscv64", + ), + "binary_s390x": BuildConfig( + compiler="clang-16-s390x", + package_type="binary", + static_binary_name="s390x", + ), }, - "builds_report_config": { + builds_report_config={ "ClickHouse build check": [ "package_release", - "coverity", "package_aarch64", "package_asan", "package_ubsan", @@ -205,213 +203,84 @@ CI_CONFIG = { "binary_darwin_aarch64", "binary_ppc64le", "binary_riscv64", + "binary_s390x", "binary_amd64_compat", ], }, - "tests_config": { - # required_build - build name for artifacts - # force_tests - force success status for tests - "Install packages (amd64)": { - "required_build": "package_release", - }, - "Install packages (arm64)": { - "required_build": "package_aarch64", - }, - "Stateful tests (asan)": { - "required_build": "package_asan", - }, - "Stateful tests (tsan)": { - "required_build": "package_tsan", - }, - "Stateful tests (msan)": { - "required_build": "package_msan", - }, - "Stateful tests (ubsan)": { - "required_build": "package_ubsan", - }, - "Stateful tests (debug)": { - "required_build": "package_debug", - }, - "Stateful tests (release)": { - "required_build": "package_release", - }, - "Stateful tests (aarch64)": { - "required_build": "package_aarch64", - }, - "Stateful tests (release, DatabaseOrdinary)": { - "required_build": "package_release", - }, - "Stateful tests (release, DatabaseReplicated)": { - "required_build": "package_release", - }, + test_configs={ + "Install packages (amd64)": TestConfig("package_release"), + "Install packages (arm64)": TestConfig("package_aarch64"), + "Stateful tests (asan)": TestConfig("package_asan"), + "Stateful tests (tsan)": TestConfig("package_tsan"), + "Stateful tests (msan)": TestConfig("package_msan"), + "Stateful tests (ubsan)": TestConfig("package_ubsan"), + "Stateful tests (debug)": TestConfig("package_debug"), + "Stateful tests (release)": TestConfig("package_release"), + "Stateful tests (aarch64)": TestConfig("package_aarch64"), + "Stateful tests (release, DatabaseOrdinary)": TestConfig("package_release"), + "Stateful tests (release, DatabaseReplicated)": TestConfig("package_release"), # Stateful tests for parallel replicas - "Stateful tests (release, ParallelReplicas)": { - "required_build": "package_release", - }, - "Stateful tests (debug, ParallelReplicas)": { - "required_build": "package_debug", - }, - "Stateful tests (asan, ParallelReplicas)": { - "required_build": "package_asan", - }, - "Stateful tests (msan, ParallelReplicas)": { - "required_build": "package_msan", - }, - "Stateful tests (ubsan, ParallelReplicas)": { - "required_build": "package_ubsan", - }, - "Stateful tests (tsan, ParallelReplicas)": { - "required_build": "package_tsan", - }, + "Stateful tests (release, ParallelReplicas)": TestConfig("package_release"), + "Stateful tests (debug, ParallelReplicas)": TestConfig("package_debug"), + "Stateful tests (asan, ParallelReplicas)": TestConfig("package_asan"), + "Stateful tests (msan, ParallelReplicas)": TestConfig("package_msan"), + "Stateful tests (ubsan, ParallelReplicas)": TestConfig("package_ubsan"), + "Stateful tests (tsan, ParallelReplicas)": TestConfig("package_tsan"), # End stateful tests for parallel replicas - "Stateless tests (asan)": { - "required_build": "package_asan", - }, - "Stateless tests (tsan)": { - "required_build": "package_tsan", - }, - "Stateless tests (msan)": { - "required_build": "package_msan", - }, - "Stateless tests (ubsan)": { - "required_build": "package_ubsan", - }, - "Stateless tests (debug)": { - "required_build": "package_debug", - }, - "Stateless tests (release)": { - "required_build": "package_release", - }, - "Stateless tests (aarch64)": { - "required_build": "package_aarch64", - }, - "Stateless tests (release, wide parts enabled)": { - "required_build": "package_release", - }, - "Stateless tests (release, analyzer)": { - "required_build": "package_release", - }, - "Stateless tests (release, DatabaseOrdinary)": { - "required_build": "package_release", - }, - "Stateless tests (release, DatabaseReplicated)": { - "required_build": "package_release", - }, - "Stateless tests (release, s3 storage)": { - "required_build": "package_release", - }, - "Stateless tests (debug, s3 storage)": { - "required_build": "package_debug", - }, - "Stateless tests (tsan, s3 storage)": { - "required_build": "package_tsan", - }, - "Stress test (asan)": { - "required_build": "package_asan", - }, - "Stress test (tsan)": { - "required_build": "package_tsan", - }, - "Stress test (ubsan)": { - "required_build": "package_ubsan", - }, - "Stress test (msan)": { - "required_build": "package_msan", - }, - "Stress test (debug)": { - "required_build": "package_debug", - }, - "Upgrade check (asan)": { - "required_build": "package_asan", - }, - "Upgrade check (tsan)": { - "required_build": "package_tsan", - }, - "Upgrade check (msan)": { - "required_build": "package_msan", - }, - "Upgrade check (debug)": { - "required_build": "package_debug", - }, - "Integration tests (asan)": { - "required_build": "package_asan", - }, - "Integration tests (tsan)": { - "required_build": "package_tsan", - }, - "Integration tests (release)": { - "required_build": "package_release", - }, - "Integration tests (msan)": { - "required_build": "package_msan", - }, - "Integration tests flaky check (asan)": { - "required_build": "package_asan", - }, - "Compatibility check (amd64)": { - "required_build": "package_release", - }, - "Compatibility check (aarch64)": { - "required_build": "package_aarch64", - }, - "Unit tests (release-clang)": { - "required_build": "binary_release", - }, - "Unit tests (asan)": { - "required_build": "package_asan", - }, - "Unit tests (msan)": { - "required_build": "package_msan", - }, - "Unit tests (tsan)": { - "required_build": "package_tsan", - }, - "Unit tests (ubsan)": { - "required_build": "package_ubsan", - }, - "AST fuzzer (debug)": { - "required_build": "package_debug", - }, - "AST fuzzer (asan)": { - "required_build": "package_asan", - }, - "AST fuzzer (msan)": { - "required_build": "package_msan", - }, - "AST fuzzer (tsan)": { - "required_build": "package_tsan", - }, - "AST fuzzer (ubsan)": { - "required_build": "package_ubsan", - }, - "Stateless tests flaky check (asan)": { - "required_build": "package_asan", - }, - "ClickHouse Keeper Jepsen": { - "required_build": "binary_release", - }, - "ClickHouse Server Jepsen": { - "required_build": "binary_release", - }, - "Performance Comparison": { - "required_build": "package_release", - "test_grep_exclude_filter": "", - }, - "Performance Comparison Aarch64": { - "required_build": "package_aarch64", - "test_grep_exclude_filter": "", - }, - "SQLancer (release)": { - "required_build": "package_release", - }, - "SQLancer (debug)": { - "required_build": "package_debug", - }, - "Sqllogic test (release)": { - "required_build": "package_release", - }, + "Stateless tests (asan)": TestConfig("package_asan"), + "Stateless tests (tsan)": TestConfig("package_tsan"), + "Stateless tests (msan)": TestConfig("package_msan"), + "Stateless tests (ubsan)": TestConfig("package_ubsan"), + "Stateless tests (debug)": TestConfig("package_debug"), + "Stateless tests (release)": TestConfig("package_release"), + "Stateless tests (aarch64)": TestConfig("package_aarch64"), + "Stateless tests (release, wide parts enabled)": TestConfig("package_release"), + "Stateless tests (release, analyzer)": TestConfig("package_release"), + "Stateless tests (release, DatabaseOrdinary)": TestConfig("package_release"), + "Stateless tests (release, DatabaseReplicated)": TestConfig("package_release"), + "Stateless tests (release, s3 storage)": TestConfig("package_release"), + "Stateless tests (debug, s3 storage)": TestConfig("package_debug"), + "Stateless tests (tsan, s3 storage)": TestConfig("package_tsan"), + "Stress test (asan)": TestConfig("package_asan"), + "Stress test (tsan)": TestConfig("package_tsan"), + "Stress test (ubsan)": TestConfig("package_ubsan"), + "Stress test (msan)": TestConfig("package_msan"), + "Stress test (debug)": TestConfig("package_debug"), + "Upgrade check (asan)": TestConfig("package_asan"), + "Upgrade check (tsan)": TestConfig("package_tsan"), + "Upgrade check (msan)": TestConfig("package_msan"), + "Upgrade check (debug)": TestConfig("package_debug"), + "Integration tests (asan)": TestConfig("package_asan"), + "Integration tests (asan, analyzer)": TestConfig("package_asan"), + "Integration tests (tsan)": TestConfig("package_tsan"), + "Integration tests (release)": TestConfig("package_release"), + "Integration tests (msan)": TestConfig("package_msan"), + "Integration tests flaky check (asan)": TestConfig("package_asan"), + "Compatibility check (amd64)": TestConfig("package_release"), + "Compatibility check (aarch64)": TestConfig("package_aarch64"), + "Unit tests (release)": TestConfig("binary_release"), + "Unit tests (asan)": TestConfig("package_asan"), + "Unit tests (msan)": TestConfig("package_msan"), + "Unit tests (tsan)": TestConfig("package_tsan"), + "Unit tests (ubsan)": TestConfig("package_ubsan"), + "AST fuzzer (debug)": TestConfig("package_debug"), + "AST fuzzer (asan)": TestConfig("package_asan"), + "AST fuzzer (msan)": TestConfig("package_msan"), + "AST fuzzer (tsan)": TestConfig("package_tsan"), + "AST fuzzer (ubsan)": TestConfig("package_ubsan"), + "Stateless tests flaky check (asan)": TestConfig("package_asan"), + "ClickHouse Keeper Jepsen": TestConfig("binary_release"), + "ClickHouse Server Jepsen": TestConfig("binary_release"), + "Performance Comparison": TestConfig("package_release"), + "Performance Comparison Aarch64": TestConfig("package_aarch64"), + "SQLancer (release)": TestConfig("package_release"), + "SQLancer (debug)": TestConfig("package_debug"), + "Sqllogic test (release)": TestConfig("package_release"), + "SQLTest": TestConfig("package_release"), }, -} # type: dict +) +CI_CONFIG.validate() + # checks required by Mergeable Check REQUIRED_CHECKS = [ @@ -421,103 +290,10 @@ REQUIRED_CHECKS = [ "Fast test", "Stateful tests (release)", "Stateless tests (release)", - "Stateless tests (debug) [1/5]", - "Stateless tests (debug) [2/5]", - "Stateless tests (debug) [3/5]", - "Stateless tests (debug) [4/5]", - "Stateless tests (debug) [5/5]", - "AST fuzzer (asan)", - "AST fuzzer (msan)", - "AST fuzzer (tsan)", - "AST fuzzer (ubsan)", - "AST fuzzer (debug)", - "Compatibility check (aarch64)", - "Compatibility check (amd64)", - "Install packages (amd64)", - "Install packages (arm64)", - "Integration tests (asan) [1/6]", - "Integration tests (asan) [2/6]", - "Integration tests (asan) [3/6]", - "Integration tests (asan) [4/6]", - "Integration tests (asan) [5/6]", - "Integration tests (asan) [6/6]", - "Integration tests (release) [1/4]", - "Integration tests (release) [2/4]", - "Integration tests (release) [3/4]", - "Integration tests (release) [4/4]", - "Integration tests (tsan) [1/6]", - "Integration tests (tsan) [2/6]", - "Integration tests (tsan) [3/6]", - "Integration tests (tsan) [4/6]", - "Integration tests (tsan) [5/6]", - "Integration tests (tsan) [6/6]", - "Integration tests flaky check (asan)", - "Stateful tests (aarch64)", - "Stateful tests (asan)", - "Stateful tests (asan, ParallelReplicas)", - "Stateful tests (debug)", - "Stateful tests (debug, ParallelReplicas)", - "Stateful tests (msan)", - "Stateful tests (msan, ParallelReplicas)", - "Stateful tests (release, ParallelReplicas)", - "Stateful tests (tsan)", - "Stateful tests (tsan, ParallelReplicas)", - "Stateful tests (ubsan)", - "Stateful tests (ubsan, ParallelReplicas)", - "Stateless tests (aarch64)", - "Stateless tests (asan) [1/4]", - "Stateless tests (asan) [2/4]", - "Stateless tests (asan) [3/4]", - "Stateless tests (asan) [4/4]", - "Stateless tests (debug) [1/5]", - "Stateless tests (debug) [2/5]", - "Stateless tests (debug) [3/5]", - "Stateless tests (debug) [4/5]", - "Stateless tests (debug) [5/5]", - "Stateless tests (debug, s3 storage) [1/6]", - "Stateless tests (debug, s3 storage) [2/6]", - "Stateless tests (debug, s3 storage) [3/6]", - "Stateless tests (debug, s3 storage) [4/6]", - "Stateless tests (debug, s3 storage) [5/6]", - "Stateless tests (debug, s3 storage) [6/6]", - "Stateless tests (msan) [1/6]", - "Stateless tests (msan) [2/6]", - "Stateless tests (msan) [3/6]", - "Stateless tests (msan) [4/6]", - "Stateless tests (msan) [5/6]", - "Stateless tests (msan) [6/6]", - "Stateless tests (release, DatabaseReplicated) [1/4]", - "Stateless tests (release, DatabaseReplicated) [2/4]", - "Stateless tests (release, DatabaseReplicated) [3/4]", - "Stateless tests (release, DatabaseReplicated) [4/4]", - "Stateless tests (release, s3 storage) [1/2]", - "Stateless tests (release, s3 storage) [2/2]", - "Stateless tests (release, wide parts enabled)", - "Stateless tests (tsan) [1/5]", - "Stateless tests (tsan) [2/5]", - "Stateless tests (tsan) [3/5]", - "Stateless tests (tsan) [4/5]", - "Stateless tests (tsan) [5/5]", - "Stateless tests (tsan, s3 storage) [1/5]", - "Stateless tests (tsan, s3 storage) [2/5]", - "Stateless tests (tsan, s3 storage) [3/5]", - "Stateless tests (tsan, s3 storage) [4/5]", - "Stateless tests (tsan, s3 storage) [5/5]", - "Stateless tests (ubsan) [1/2]", - "Stateless tests (ubsan) [2/2]", - "Stress test (asan)", - "Stress test (debug)", - "Stress test (msan)", - "Stress test (tsan)", - "Stress test (ubsan)", - "Upgrade check (asan)", - "Upgrade check (debug)", - "Upgrade check (msan)", - "Upgrade check (tsan)", "Style Check", "Unit tests (asan)", "Unit tests (msan)", - "Unit tests (release-clang)", + "Unit tests (release)", "Unit tests (tsan)", "Unit tests (ubsan)", ] diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 64b64896f66..39132549bac 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -from typing import List +from pathlib import Path +from typing import Dict, List, Optional import json import logging import time @@ -16,30 +17,60 @@ class InsertException(Exception): class ClickHouseHelper: - def __init__(self, url=None): + def __init__( + self, url: Optional[str] = None, auth: Optional[Dict[str, str]] = None + ): if url is None: url = get_parameter_from_ssm("clickhouse-test-stat-url") self.url = url - self.auth = { + self.auth = auth or { "X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"), "X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password"), } @staticmethod - def _insert_json_str_info_impl(url, auth, db, table, json_str): + def insert_file( + url: str, + auth: Optional[Dict[str, str]], + query: str, + file: Path, + additional_options: Optional[Dict[str, str]] = None, + ) -> None: + params = { + "query": query, + "date_time_input_format": "best_effort", + "send_logs_level": "warning", + } + if additional_options: + for k, v in additional_options.items(): + params[k] = v + + with open(file, "rb") as data_fd: + ClickHouseHelper._insert_post( + url, params=params, data=data_fd, headers=auth + ) + + @staticmethod + def insert_json_str(url, auth, db, table, json_str): params = { "database": db, "query": f"INSERT INTO {table} FORMAT JSONEachRow", "date_time_input_format": "best_effort", "send_logs_level": "warning", } + ClickHouseHelper._insert_post(url, params=params, data=json_str, headers=auth) + + @staticmethod + def _insert_post(*args, **kwargs): + url = "" + if args: + url = args[0] + url = kwargs.get("url", url) for i in range(5): try: - response = requests.post( - url, params=params, data=json_str, headers=auth - ) + response = requests.post(*args, **kwargs) except Exception as e: error = f"Received exception while sending data to {url} on {i} attempt: {e}" logging.warning(error) @@ -51,17 +82,12 @@ class ClickHouseHelper: break error = ( - "Cannot insert data into clickhouse at try " - + str(i) - + ": HTTP code " - + str(response.status_code) - + ": '" - + str(response.text) - + "'" + f"Cannot insert data into clickhouse at try {i}: HTTP code " + f"{response.status_code}: '{response.text}'" ) if response.status_code >= 500: - # A retriable error + # A retryable error time.sleep(1) continue @@ -76,7 +102,7 @@ class ClickHouseHelper: raise InsertException(error) def _insert_json_str_info(self, db, table, json_str): - self._insert_json_str_info_impl(self.url, self.auth, db, table, json_str) + self.insert_json_str(self.url, self.auth, db, table, json_str) def insert_event_into(self, db, table, event, safe=True): event_str = json.dumps(event) @@ -132,6 +158,23 @@ class ClickHouseHelper: return result +# Obtain the machine type from IMDS: +def get_instance_type(): + url = "http://169.254.169.254/latest/meta-data/instance-type" + for i in range(5): + try: + response = requests.get(url, timeout=1) + if response.status_code == 200: + return response.text + except Exception as e: + error = ( + f"Received exception while sending data to {url} on {i} attempt: {e}" + ) + logging.warning(error) + continue + return "" + + def prepare_tests_results_for_clickhouse( pr_info: PRInfo, test_results: TestResults, @@ -168,6 +211,7 @@ def prepare_tests_results_for_clickhouse( head_ref=head_ref, head_repo=head_repo, task_url=pr_info.task_url, + instance_type=get_instance_type(), ) # Always publish a total record for all checks. For checks with individual @@ -190,27 +234,3 @@ def prepare_tests_results_for_clickhouse( result.append(current_row) return result - - -def mark_flaky_tests( - clickhouse_helper: ClickHouseHelper, check_name: str, test_results: TestResults -) -> None: - try: - query = f"""SELECT DISTINCT test_name -FROM checks -WHERE - check_start_time BETWEEN now() - INTERVAL 3 DAY AND now() - AND check_name = '{check_name}' - AND (test_status = 'FAIL' OR test_status = 'FLAKY') - AND pull_request_number = 0 -""" - - tests_data = clickhouse_helper.select_json_each_row("default", query) - master_failed_tests = {row["test_name"] for row in tests_data} - logging.info("Found flaky tests: %s", ", ".join(master_failed_tests)) - - for test_result in test_results: - if test_result.status == "FAIL" and test_result.name in master_failed_tests: - test_result.status = "FLAKY" - except Exception as ex: - logging.error("Exception happened during flaky tests fetch %s", ex) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index efe149b0aa4..945bcfe05ed 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -51,7 +51,8 @@ class RerunHelper: def override_status(status: str, check_name: str, invert: bool = False) -> str: - if CI_CONFIG["tests_config"].get(check_name, {}).get("force_tests", False): + test_config = CI_CONFIG.test_configs.get(check_name) + if test_config and test_config.force_tests: return "success" if invert: diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index 04203617dca..97de7fed2d5 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -13,7 +13,6 @@ from github import Github from build_download_helper import download_builds_filter from clickhouse_helper import ( ClickHouseHelper, - mark_flaky_tests, prepare_tests_results_for_clickhouse, ) from commit_status_helper import RerunHelper, get_commit, post_commit_status @@ -231,7 +230,6 @@ def main(): ) ch_helper = ClickHouseHelper() - mark_flaky_tests(ch_helper, args.check_name, test_results) report_url = upload_results( s3_helper, diff --git a/tests/ci/compress_files.py b/tests/ci/compress_files.py index 8d52d030b84..d8e691ce3b4 100644 --- a/tests/ci/compress_files.py +++ b/tests/ci/compress_files.py @@ -1,24 +1,31 @@ #!/usr/bin/env python3 import subprocess import logging -import os + +from pathlib import Path +from typing import Optional -def compress_file_fast(path, archive_path): - if archive_path.endswith(".zst"): +PIGZ = Path("/usr/bin/pigz") + + +def compress_file_fast(path: Path, archive_path: Path) -> None: + if archive_path.suffix == ".zst": subprocess.check_call(f"zstd < {path} > {archive_path}", shell=True) - elif os.path.exists("/usr/bin/pigz"): + elif PIGZ.exists(): subprocess.check_call(f"pigz < {path} > {archive_path}", shell=True) else: subprocess.check_call(f"gzip < {path} > {archive_path}", shell=True) -def compress_fast(path, archive_path, exclude=None): +def compress_fast( + path: Path, archive_path: Path, exclude: Optional[Path] = None +) -> None: program_part = "" - if archive_path.endswith(".zst"): + if archive_path.suffix == ".zst": logging.info("zstd will be used for compression") program_part = "--use-compress-program='zstd --threads=0'" - elif os.path.exists("/usr/bin/pigz"): + elif PIGZ.exists(): logging.info("pigz found, will compress and decompress faster") program_part = "--use-compress-program='pigz'" else: @@ -32,27 +39,25 @@ def compress_fast(path, archive_path, exclude=None): else: exclude_part = f"--exclude {exclude}" - fname = os.path.basename(path) - if os.path.isfile(path): - path = os.path.dirname(path) - else: - path += "/.." + fname = path.name - cmd = f"tar {program_part} {exclude_part} -cf {archive_path} -C {path} {fname}" + cmd = ( + f"tar {program_part} {exclude_part} -cf {archive_path} -C {path.parent} {fname}" + ) logging.debug("compress_fast cmd: %s", cmd) subprocess.check_call(cmd, shell=True) -def decompress_fast(archive_path, result_path=None): +def decompress_fast(archive_path: Path, result_path: Optional[Path] = None) -> None: program_part = "" - if archive_path.endswith(".zst"): + if archive_path.suffix == ".zst": logging.info( "zstd will be used for decompression ('%s' -> '%s')", archive_path, result_path, ) program_part = "--use-compress-program='zstd --threads=0'" - elif os.path.exists("/usr/bin/pigz"): + elif PIGZ.exists(): logging.info( "pigz found, will compress and decompress faster ('%s' -> '%s')", archive_path, diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index a434d3cc841..89bd7b7755b 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -238,7 +238,7 @@ def build_and_push_image( result = [] # type: TestResults if os != "ubuntu": tag += f"-{os}" - init_args = ["docker", "buildx", "build", "--build-arg BUILDKIT_INLINE_CACHE=1"] + init_args = ["docker", "buildx", "build"] if push: init_args.append("--push") init_args.append("--output=type=image,push-by-digest=true") diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index d5d27f73694..14db348ef36 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -18,7 +18,7 @@ import docker_server as ds class TestDockerImageCheck(unittest.TestCase): docker_images_path = os.path.join( - os.path.dirname(__file__), "tests/docker_images.json" + os.path.dirname(__file__), "tests/docker_images_for_tests.json" ) def test_get_changed_docker_images(self): @@ -40,6 +40,12 @@ class TestDockerImageCheck(unittest.TestCase): [ di.DockerImage("docker/test/base", "clickhouse/test-base", False), di.DockerImage("docker/docs/builder", "clickhouse/docs-builder", True), + di.DockerImage( + "docker/test/sqltest", + "clickhouse/sqltest", + False, + "clickhouse/test-base", # type: ignore + ), di.DockerImage( "docker/test/stateless", "clickhouse/stateless-test", diff --git a/tests/ci/download_binary.py b/tests/ci/download_binary.py index c57780daa36..034e65f204d 100755 --- a/tests/ci/download_binary.py +++ b/tests/ci/download_binary.py @@ -9,7 +9,7 @@ import os from pathlib import Path from build_download_helper import download_build_with_progress -from ci_config import CI_CONFIG, BuildConfig +from ci_config import CI_CONFIG from env_helper import RUNNER_TEMP, S3_ARTIFACT_DOWNLOAD_TEMPLATE from git_helper import Git, commit from version_helper import get_version_from_repo, version_arg @@ -62,9 +62,9 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) for build in args.build_names: # check if it's in CI_CONFIG - config = CI_CONFIG["build_config"][build] # type: BuildConfig - if args.rename: - path = temp_path / f"clickhouse-{config['static_binary_name']}" + config = CI_CONFIG.build_config[build] + if args.rename and config.static_binary_name: + path = temp_path / f"clickhouse-{config.static_binary_name}" else: path = temp_path / "clickhouse" diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index d5198e5c3d7..460e17acd37 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -14,7 +14,6 @@ from github import Github from build_check import get_release_or_pr from clickhouse_helper import ( ClickHouseHelper, - mark_flaky_tests, prepare_tests_results_for_clickhouse, ) from commit_status_helper import ( @@ -190,7 +189,6 @@ def main(): state, description, test_results, additional_logs = process_results(output_path) ch_helper = ClickHouseHelper() - mark_flaky_tests(ch_helper, NAME, test_results) s3_path_prefix = os.path.join( get_release_or_pr(pr_info, get_version_from_repo())[0], pr_info.sha, diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index aa8a0cf9553..74392947b82 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -8,6 +8,7 @@ from commit_status_helper import ( get_commit, get_commit_filtered_statuses, post_commit_status, + update_mergeable_check, ) from get_robot_token import get_best_robot_token from pr_info import PRInfo @@ -18,6 +19,8 @@ def main(): pr_info = PRInfo(need_orgs=True) gh = Github(get_best_robot_token(), per_page=100) + # Update the Mergeable Check at the final step + update_mergeable_check(gh, pr_info, CI_STATUS_NAME) commit = get_commit(gh, pr_info.sha) statuses = [ @@ -27,7 +30,8 @@ def main(): ] if not statuses: return - status = statuses[0] + # Take the latest status + status = statuses[-1] if status.state == "pending": post_commit_status( commit, diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 9279b19b187..2bab330bd66 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -16,8 +16,8 @@ from github import Github from build_download_helper import download_all_deb_packages from clickhouse_helper import ( ClickHouseHelper, - mark_flaky_tests, prepare_tests_results_for_clickhouse, + get_instance_type, ) from commit_status_helper import ( NotSet, @@ -74,9 +74,11 @@ def get_image_name(check_name): def get_run_command( + pr_info, + check_start_time, check_name, builds_path, - repo_tests_path, + repo_path, result_path, server_log_path, kill_timeout, @@ -106,16 +108,28 @@ def get_run_command( envs += [f"-e {e}" for e in additional_envs] + instance_type = get_instance_type() + + envs += [ + "-e CLICKHOUSE_CI_LOGS_HOST", + "-e CLICKHOUSE_CI_LOGS_PASSWORD", + f"-e PULL_REQUEST_NUMBER='{pr_info.number}'", + f"-e COMMIT_SHA='{pr_info.sha}'", + f"-e CHECK_START_TIME='{check_start_time}'", + f"-e CHECK_NAME='{check_name}'", + f"-e INSTANCE_TYPE='{instance_type}'", + ] + env_str = " ".join(envs) volume_with_broken_test = ( - f"--volume={repo_tests_path}/analyzer_tech_debt.txt:/analyzer_tech_debt.txt" + f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt" if "analyzer" in check_name else "" ) return ( f"docker run --volume={builds_path}:/package_folder " - f"--volume={repo_tests_path}:/usr/share/clickhouse-test " + f"--volume={repo_path}/tests:/usr/share/clickhouse-test " f"{volume_with_broken_test} " f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server " f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" @@ -307,8 +321,6 @@ def main(): image_name = get_image_name(check_name) docker_image = get_image_with_version(reports_path, image_name) - repo_tests_path = os.path.join(repo_path, "tests") - packages_path = os.path.join(temp_path, "packages") if not os.path.exists(packages_path): os.makedirs(packages_path) @@ -335,9 +347,11 @@ def main(): additional_envs.append("GLOBAL_TAGS=no-random-settings") run_command = get_run_command( + pr_info, + stopwatch.start_time_str, check_name, packages_path, - repo_tests_path, + repo_path, result_path, server_log_path, kill_timeout, @@ -368,7 +382,23 @@ def main(): state = override_status(state, check_name, invert=validate_bugfix_check) ch_helper = ClickHouseHelper() - mark_flaky_tests(ch_helper, check_name, test_results) + + # Cleanup run log from the credentials of CI logs database. + # Note: a malicious user can still print them by splitting the value into parts. + # But we will be warned when a malicious user modifies CI script. + # Although they can also print them from inside tests. + # Nevertheless, the credentials of the CI logs have limited scope + # and does not provide access to sensitive info. + + ci_logs_host = os.getenv("CLICKHOUSE_CI_LOGS_HOST", "CLICKHOUSE_CI_LOGS_HOST") + ci_logs_password = os.getenv( + "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD" + ) + if ci_logs_host not in ("CLICKHOUSE_CI_LOGS_HOST", ""): + subprocess.check_call( + f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}'", + shell=True, + ) report_url = upload_results( s3_helper, diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index d619ce96cee..a5788e2af3f 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -15,7 +15,6 @@ from github import Github from build_download_helper import download_builds_filter from clickhouse_helper import ( ClickHouseHelper, - mark_flaky_tests, prepare_tests_results_for_clickhouse, ) from commit_status_helper import ( @@ -51,8 +50,19 @@ def prepare_test_scripts(): server_test = r"""#!/bin/bash set -e trap "bash -ex /packages/preserve_logs.sh" ERR +test_env='TEST_THE_DEFAULT_PARAMETER=15' +echo "$test_env" >> /etc/default/clickhouse systemctl start clickhouse-server -clickhouse-client -q 'SELECT version()'""" +clickhouse-client -q 'SELECT version()' +grep "$test_env" /proc/$(cat /var/run/clickhouse-server/clickhouse-server.pid)/environ""" + initd_test = r"""#!/bin/bash +set -e +trap "bash -ex /packages/preserve_logs.sh" ERR +test_env='TEST_THE_DEFAULT_PARAMETER=15' +echo "$test_env" >> /etc/default/clickhouse +/etc/init.d/clickhouse-server start +clickhouse-client -q 'SELECT version()' +grep "$test_env" /proc/$(cat /var/run/clickhouse-server/clickhouse-server.pid)/environ""" keeper_test = r"""#!/bin/bash set -e trap "bash -ex /packages/preserve_logs.sh" ERR @@ -103,6 +113,7 @@ chmod a+rw -R /tests_logs exit 1 """ (TEMP_PATH / "server_test.sh").write_text(server_test, encoding="utf-8") + (TEMP_PATH / "initd_test.sh").write_text(initd_test, encoding="utf-8") (TEMP_PATH / "keeper_test.sh").write_text(keeper_test, encoding="utf-8") (TEMP_PATH / "binary_test.sh").write_text(binary_test, encoding="utf-8") (TEMP_PATH / "preserve_logs.sh").write_text(preserve_logs, encoding="utf-8") @@ -113,6 +124,9 @@ def test_install_deb(image: DockerImage) -> TestResults: "Install server deb": r"""#!/bin/bash -ex apt-get install /packages/clickhouse-{server,client,common}*deb bash -ex /packages/server_test.sh""", + "Run server init.d": r"""#!/bin/bash -ex +apt-get install /packages/clickhouse-{server,client,common}*deb +bash -ex /packages/initd_test.sh""", "Install keeper deb": r"""#!/bin/bash -ex apt-get install /packages/clickhouse-keeper*deb bash -ex /packages/keeper_test.sh""", @@ -192,18 +206,18 @@ def test_install(image: DockerImage, tests: Dict[str, str]) -> TestResults: retcode = process.wait() if retcode == 0: status = OK + subprocess.check_call( + f"docker kill -s 9 {container_id}", shell=True + ) break status = FAIL copy2(log_file, LOGS_PATH) archive_path = TEMP_PATH / f"{container_name}-{retry}.tar.gz" - compress_fast( - LOGS_PATH.as_posix(), - archive_path.as_posix(), - ) + compress_fast(LOGS_PATH, archive_path) logs.append(archive_path) + subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True) - subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True) test_results.append(TestResult(name, status, stopwatch.duration_seconds, logs)) return test_results @@ -280,7 +294,7 @@ def main(): sys.exit(0) docker_images = { - name: get_image_with_version(REPORTS_PATH, name) + name: get_image_with_version(REPORTS_PATH, name, args.download) for name in (RPM_IMAGE, DEB_IMAGE) } prepare_test_scripts() @@ -297,6 +311,8 @@ def main(): is_match = is_match or path.endswith(".rpm") if args.tgz: is_match = is_match or path.endswith(".tgz") + # We don't need debug packages, so let's filter them out + is_match = is_match and "-dbg" not in path return is_match download_builds_filter( @@ -345,7 +361,6 @@ def main(): return ch_helper = ClickHouseHelper() - mark_flaky_tests(ch_helper, args.check_name, test_results) description = format_description(description) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 8ef6244a1c5..d5b70720ee9 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -15,7 +15,6 @@ from github import Github from build_download_helper import download_all_deb_packages from clickhouse_helper import ( ClickHouseHelper, - mark_flaky_tests, prepare_tests_results_for_clickhouse, ) from commit_status_helper import ( @@ -38,19 +37,20 @@ from upload_result_helper import upload_results # When update, update -# integration/ci-runner.py:ClickhouseIntegrationTestsRunner.get_images_names too +# tests/integration/ci-runner.py:ClickhouseIntegrationTestsRunner.get_images_names too IMAGES = [ + "clickhouse/dotnet-client", + "clickhouse/integration-helper", + "clickhouse/integration-test", "clickhouse/integration-tests-runner", + "clickhouse/kerberized-hadoop", + "clickhouse/kerberos-kdc", "clickhouse/mysql-golang-client", "clickhouse/mysql-java-client", "clickhouse/mysql-js-client", "clickhouse/mysql-php-client", + "clickhouse/nginx-dav", "clickhouse/postgresql-java-client", - "clickhouse/integration-test", - "clickhouse/kerberos-kdc", - "clickhouse/kerberized-hadoop", - "clickhouse/integration-helper", - "clickhouse/dotnet-client", ] @@ -71,7 +71,7 @@ def get_json_params_dict( } -def get_env_for_runner(build_path, repo_path, result_path, work_path): +def get_env_for_runner(check_name, build_path, repo_path, result_path, work_path): binary_path = os.path.join(build_path, "clickhouse") odbc_bridge_path = os.path.join(build_path, "clickhouse-odbc-bridge") library_bridge_path = os.path.join(build_path, "clickhouse-library-bridge") @@ -88,6 +88,9 @@ def get_env_for_runner(build_path, repo_path, result_path, work_path): my_env["CLICKHOUSE_TESTS_JSON_PARAMS_PATH"] = os.path.join(work_path, "params.json") my_env["CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER"] = "0" + if "analyzer" in check_name.lower(): + my_env["CLICKHOUSE_USE_NEW_ANALYZER"] = "1" + return my_env @@ -225,7 +228,9 @@ def main(): else: download_all_deb_packages(check_name, reports_path, build_path) - my_env = get_env_for_runner(build_path, repo_path, result_path, work_path) + my_env = get_env_for_runner( + check_name, build_path, repo_path, result_path, work_path + ) json_path = os.path.join(work_path, "params.json") with open(json_path, "w", encoding="utf-8") as json_params: @@ -271,7 +276,6 @@ def main(): state = override_status(state, check_name, invert=validate_bugfix_check) ch_helper = ClickHouseHelper() - mark_flaky_tests(ch_helper, check_name, test_results) s3_helper = S3Helper() report_url = upload_results( diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index c21fafa2605..54eb70536a8 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -1,11 +1,12 @@ #!/usr/bin/env python3 -import time +import argparse import logging import os import sys +import time -import argparse +from pathlib import Path import boto3 # type: ignore import requests # type: ignore @@ -268,8 +269,8 @@ if __name__ == "__main__": description = "Found invalid analysis (ノಥ益ಥ)ノ ┻━┻" compress_fast( - os.path.join(result_path, "store"), - os.path.join(result_path, "jepsen_store.tar.zst"), + Path(result_path) / "store", + Path(result_path) / "jepsen_store.tar.zst", ) additional_data.append(os.path.join(result_path, "jepsen_store.tar.zst")) except Exception as ex: diff --git a/tests/ci/lambda_shared_package/lambda_shared/__init__.py b/tests/ci/lambda_shared_package/lambda_shared/__init__.py index c56994cc86a..aa88342fcc3 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/__init__.py +++ b/tests/ci/lambda_shared_package/lambda_shared/__init__.py @@ -219,3 +219,12 @@ def list_runners(access_token: str) -> RunnerDescriptions: result.append(desc) return result + + +def cached_value_is_valid(updated_at: float, ttl: float) -> bool: + "a common function to identify if cachable value is still valid" + if updated_at == 0: + return False + if time.time() - ttl < updated_at: + return True + return False diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py index ef47eacc082..82fc53b9356 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/pr.py +++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py @@ -101,7 +101,7 @@ LABELS = { CATEGORY_TO_LABEL = {c: lb for lb, categories in LABELS.items() for c in categories} -def check_pr_description(pr_body: str) -> Tuple[str, str]: +def check_pr_description(pr_body: str, repo_name: str) -> Tuple[str, str]: """The function checks the body to being properly formatted according to .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty, then there is an error.""" @@ -109,11 +109,7 @@ def check_pr_description(pr_body: str) -> Tuple[str, str]: lines = [re.sub(r"\s+", " ", line) for line in lines] # Check if body contains "Reverts ClickHouse/ClickHouse#36337" - if [ - True - for line in lines - if re.match(r"\AReverts {GITHUB_REPOSITORY}#[\d]+\Z", line) - ]: + if [True for line in lines if re.match(rf"\AReverts {repo_name}#[\d]+\Z", line)]: return "", LABELS["pr-not-for-changelog"][0] category = "" diff --git a/tests/ci/lambda_shared_package/lambda_shared/token.py b/tests/ci/lambda_shared_package/lambda_shared/token.py index d3bf15ab259..6d5653f6a58 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/token.py +++ b/tests/ci/lambda_shared_package/lambda_shared/token.py @@ -8,6 +8,8 @@ import boto3 # type: ignore import jwt import requests # type: ignore +from . import cached_value_is_valid + def get_key_and_app_from_aws() -> Tuple[str, int]: secret_name = "clickhouse_github_secret_key" @@ -68,7 +70,7 @@ def get_access_token_by_key_app(private_key: str, app_id: int) -> str: @dataclass class CachedToken: - time: int + time: float value: str updating: bool = False @@ -81,12 +83,9 @@ def get_cached_access_token() -> str: return _cached_token.value # Indicate that the value is updating now, so the cached value can be # used. The first setting and close-to-ttl are not counted as update - if _cached_token.time != 0 or time.time() - 590 < _cached_token.time: - _cached_token.updating = True - else: - _cached_token.updating = False + _cached_token.updating = cached_value_is_valid(_cached_token.time, 590) private_key, app_id = get_key_and_app_from_aws() - _cached_token.time = int(time.time()) + _cached_token.time = time.time() _cached_token.value = get_access_token_by_key_app(private_key, app_id) _cached_token.updating = False return _cached_token.value diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 14844ed9b25..35b0614b01f 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -246,6 +246,12 @@ def main(): if args.check_running_workflows: workflows = get_workflows_for_head(repo, pr.head.sha) + logging.info( + "The PR #%s has following workflows:\n%s", + pr.number, + "\n".join(f"{wf.html_url}: status is {wf.status}" for wf in workflows), + ) + workflows_in_progress = [wf for wf in workflows if wf.status != "completed"] # At most one workflow in progress is fine. We check that there no # cases like, e.g. PullRequestCI and DocksCheck in progress at once diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 41ace95c350..27a67e2ae0e 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 - import os import logging import sys @@ -20,11 +19,15 @@ from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo from s3_helper import S3Helper from tee_popen import TeePopen +from clickhouse_helper import get_instance_type +from stopwatch import Stopwatch IMAGE_NAME = "clickhouse/performance-comparison" def get_run_command( + check_start_time, + check_name, workspace, result_path, repo_tests_path, @@ -33,12 +36,24 @@ def get_run_command( additional_env, image, ): + instance_type = get_instance_type() + + envs = [ + f"-e CHECK_START_TIME='{check_start_time}'", + f"-e CHECK_NAME='{check_name}'", + f"-e INSTANCE_TYPE='{instance_type}'", + f"-e PR_TO_TEST={pr_to_test}", + f"-e SHA_TO_TEST={sha_to_test}", + ] + + env_str = " ".join(envs) + return ( f"docker run --privileged --volume={workspace}:/workspace " f"--volume={result_path}:/output " f"--volume={repo_tests_path}:/usr/share/clickhouse-test " f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio " - f"-e PR_TO_TEST={pr_to_test} -e SHA_TO_TEST={sha_to_test} {additional_env} " + f"{env_str} {additional_env} " f"{image}" ) @@ -62,6 +77,9 @@ class RamDrive: if __name__ == "__main__": logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) repo_tests_path = os.path.join(repo_path, "tests") @@ -71,7 +89,7 @@ if __name__ == "__main__": reports_path = os.getenv("REPORTS_PATH", "./reports") check_name = sys.argv[1] - required_build = CI_CONFIG["tests_config"][check_name]["required_build"] + required_build = CI_CONFIG.test_configs[check_name].required_build if not os.path.exists(temp_path): os.makedirs(temp_path) @@ -121,15 +139,6 @@ if __name__ == "__main__": ) sys.exit(0) - test_grep_exclude_filter = CI_CONFIG["tests_config"][check_name][ - "test_grep_exclude_filter" - ] - if test_grep_exclude_filter: - docker_env += f" -e CHPC_TEST_GREP_EXCLUDE={test_grep_exclude_filter}" - logging.info( - "Fill fliter our performance tests by grep -v %s", test_grep_exclude_filter - ) - rerun_helper = RerunHelper(commit, check_name_with_group) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") @@ -166,6 +175,8 @@ if __name__ == "__main__": docker_env += "".join([f" -e {name}" for name in env_extra]) run_command = get_run_command( + stopwatch.start_time_str, + check_name, result_path, result_path, repo_tests_path, @@ -177,6 +188,7 @@ if __name__ == "__main__": logging.info("Going to run command %s", run_command) run_log_path = os.path.join(temp_path, "run.log") + compare_log_path = os.path.join(result_path, "compare.log") popen_env = os.environ.copy() popen_env.update(env_extra) @@ -190,7 +202,7 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) paths = { - "compare.log": os.path.join(result_path, "compare.log"), + "compare.log": compare_log_path, "output.7z": os.path.join(result_path, "output.7z"), "report.html": os.path.join(result_path, "report.html"), "all-queries.html": os.path.join(result_path, "all-queries.html"), diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 86d4985c6b2..dee71b726df 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -279,7 +279,7 @@ class PRInfo: "user_orgs": self.user_orgs, } - def has_changes_in_documentation(self): + def has_changes_in_documentation(self) -> bool: # If the list wasn't built yet the best we can do is to # assume that there were changes. if self.changed_files is None or not self.changed_files: @@ -287,10 +287,9 @@ class PRInfo: for f in self.changed_files: _, ext = os.path.splitext(f) - path_in_docs = "docs" in f - path_in_website = "website" in f + path_in_docs = f.startswith("docs/") if ( - ext in DIFF_IN_DOCUMENTATION_EXT and (path_in_docs or path_in_website) + ext in DIFF_IN_DOCUMENTATION_EXT and path_in_docs ) or "docker/docs" in f: return True return False diff --git a/tests/ci/report.py b/tests/ci/report.py index a9014acec12..30c99719b05 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -239,7 +239,7 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes @dataclass class BuildResult: compiler: str - build_type: str + debug_build: bool sanitizer: str status: str elapsed_seconds: int @@ -349,7 +349,7 @@ def create_test_html_report( has_log_urls = True row = "" - has_error = test_result.status in ("FAIL", "FLAKY", "NOT_FAILED") + has_error = test_result.status in ("FAIL", "NOT_FAILED") if has_error and test_result.raw_logs is not None: row = '' row += "" + test_result.name + "" @@ -484,8 +484,8 @@ def create_build_html_report( ): row = "" row += f"{build_result.compiler}" - if build_result.build_type: - row += f"{build_result.build_type}" + if build_result.debug_build: + row += "debug" else: row += "relwithdebuginfo" if build_result.sanitizer: diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 330a1309016..db98a2c1ab5 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -108,7 +108,7 @@ def main(): gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) - description_error, category = check_pr_description(pr_info.body) + description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY) pr_labels_to_add = [] pr_labels_to_remove = [] if ( @@ -137,17 +137,20 @@ def main(): if pr_labels_to_remove: remove_labels(gh, pr_info, pr_labels_to_remove) - if FEATURE_LABEL in pr_info.labels: - print(f"The '{FEATURE_LABEL}' in the labels, expect the 'Docs Check' status") + if FEATURE_LABEL in pr_info.labels and not pr_info.has_changes_in_documentation(): + print( + f"The '{FEATURE_LABEL}' in the labels, " + "but there's no changed documentation" + ) post_commit_status( # do not pass pr_info here intentionally commit, - "pending", + "failure", NotSet, f"expect adding docs for {FEATURE_LABEL}", DOCS_NAME, + pr_info, ) - elif not description_error: - set_mergeable_check(commit, "skipped") + sys.exit(1) if description_error: print( @@ -173,6 +176,7 @@ def main(): ) sys.exit(1) + set_mergeable_check(commit, "skipped") ci_report_url = create_ci_report(pr_info, []) if not can_run: print("::notice ::Cannot run") diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 9ee0a431294..2bfe639739b 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -92,7 +92,7 @@ class S3Helper: file_path, ) else: - logging.info("No content type provied for %s", file_path) + logging.info("No content type provided for %s", file_path) else: if re.search(r"\.(txt|log|err|out)$", s3_path) or re.search( r"\.log\..*(? str: if CI: diff --git a/tests/ci/sqltest.py b/tests/ci/sqltest.py new file mode 100644 index 00000000000..be22a1c9312 --- /dev/null +++ b/tests/ci/sqltest.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 + +import logging +import subprocess +import os +import sys + +from github import Github + +from build_download_helper import get_build_name_for_check, read_build_urls +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from commit_status_helper import ( + RerunHelper, + get_commit, + post_commit_status, +) +from docker_pull_helper import get_image_with_version +from env_helper import ( + GITHUB_RUN_URL, + REPORTS_PATH, + TEMP_PATH, +) +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResult +from s3_helper import S3Helper +from stopwatch import Stopwatch + +IMAGE_NAME = "clickhouse/sqltest" + + +def get_run_command(pr_number, sha, download_url, workspace_path, image): + return ( + f"docker run " + # For sysctl + "--privileged " + "--network=host " + f"--volume={workspace_path}:/workspace " + "--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE " + f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" ' + f"{image}" + ) + + +def main(): + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = TEMP_PATH + reports_path = REPORTS_PATH + + check_name = sys.argv[1] + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + pr_info = PRInfo() + + gh = Github(get_best_robot_token(), per_page=100) + commit = get_commit(gh, pr_info.sha) + + rerun_helper = RerunHelper(commit, check_name) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + + docker_image = get_image_with_version(reports_path, IMAGE_NAME) + + build_name = get_build_name_for_check(check_name) + print(build_name) + urls = read_build_urls(build_name, reports_path) + if not urls: + raise Exception("No build URLs found") + + for url in urls: + if url.endswith("/clickhouse"): + build_url = url + break + else: + raise Exception("Cannot find the clickhouse binary among build results") + + logging.info("Got build url %s", build_url) + + workspace_path = os.path.join(temp_path, "workspace") + if not os.path.exists(workspace_path): + os.makedirs(workspace_path) + + run_command = get_run_command( + pr_info.number, pr_info.sha, build_url, workspace_path, docker_image + ) + logging.info("Going to run %s", run_command) + + run_log_path = os.path.join(temp_path, "run.log") + with open(run_log_path, "w", encoding="utf-8") as log: + with subprocess.Popen( + run_command, shell=True, stderr=log, stdout=log + ) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + + check_name_lower = ( + check_name.lower().replace("(", "").replace(")", "").replace(" ", "") + ) + s3_prefix = f"{pr_info.number}/{pr_info.sha}/sqltest_{check_name_lower}/" + paths = { + "run.log": run_log_path, + "server.log.zst": os.path.join(workspace_path, "server.log.zst"), + "server.err.log.zst": os.path.join(workspace_path, "server.err.log.zst"), + "report.html": os.path.join(workspace_path, "report.html"), + "test.log": os.path.join(workspace_path, "test.log"), + } + + s3_helper = S3Helper() + for f in paths: + try: + paths[f] = s3_helper.upload_test_report_to_s3(paths[f], s3_prefix + f) + except Exception as ex: + logging.info("Exception uploading file %s text %s", f, ex) + paths[f] = "" + + report_url = GITHUB_RUN_URL + if paths["report.html"]: + report_url = paths["report.html"] + + status = "success" + description = "See the report" + test_result = TestResult(description, "OK") + + ch_helper = ClickHouseHelper() + + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + [test_result], + status, + stopwatch.duration_seconds, + stopwatch.start_time_str, + report_url, + check_name, + ) + + ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) + + logging.info("Result: '%s', '%s', '%s'", status, description, report_url) + print(f"::notice ::Report url: {report_url}") + post_commit_status(commit, status, report_url, description, check_name, pr_info) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 6d17384c63f..2c566144f2c 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -125,6 +125,8 @@ def prepare_for_hung_check(drop_databases): # However, it obstruct checking for hung queries. logging.info("Will terminate gdb (if any)") call_with_retry("kill -TERM $(pidof gdb)") + # Sometimes there is a message `Child process was stopped by signal 19` in logs after stopping gdb + call_with_retry("kill -CONT $(lsof -ti:9000)") # ThreadFuzzer significantly slows down server and causes false-positive hung check failures call_with_retry("clickhouse client -q 'SYSTEM STOP THREAD FUZZER'") @@ -200,13 +202,14 @@ def prepare_for_hung_check(drop_databases): call( make_query_command( """ - select sleepEachRow(( - select maxOrDefault(300 - elapsed) + 1 - from system.processes - where query not like '%from system.processes%' and elapsed < 300 + SELECT sleepEachRow(( + SELECT maxOrDefault(300 - elapsed) + 1 + FROM system.processes + WHERE query NOT LIKE '%FROM system.processes%' AND elapsed < 300 ) / 300) - from numbers(300) - format Null + FROM numbers(300) + FORMAT Null + SETTINGS function_sleep_max_microseconds_per_block = 0 """ ), shell=True, @@ -299,7 +302,7 @@ if __name__ == "__main__": have_long_running_queries = prepare_for_hung_check(args.drop_databases) except Exception as ex: have_long_running_queries = True - logging.error("Failed to prepare for hung check %s", str(ex)) + logging.error("Failed to prepare for hung check: %s", str(ex)) logging.info("Checking if some queries hung") cmd = " ".join( [ diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index ac280916a2f..21c3178faab 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -13,8 +13,8 @@ from github import Github from build_download_helper import download_all_deb_packages from clickhouse_helper import ( ClickHouseHelper, - mark_flaky_tests, prepare_tests_results_for_clickhouse, + get_instance_type, ) from commit_status_helper import RerunHelper, get_commit, post_commit_status from docker_pull_helper import get_image_with_version @@ -29,12 +29,34 @@ from upload_result_helper import upload_results def get_run_command( - build_path, result_folder, repo_tests_path, server_log_folder, image + pr_info, + check_start_time, + check_name, + build_path, + result_folder, + repo_tests_path, + server_log_folder, + image, ): + instance_type = get_instance_type() + + envs = [ + # a static link, don't use S3_URL or S3_DOWNLOAD + "-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets'", + "-e CLICKHOUSE_CI_LOGS_HOST", + "-e CLICKHOUSE_CI_LOGS_PASSWORD", + f"-e PULL_REQUEST_NUMBER='{pr_info.number}'", + f"-e COMMIT_SHA='{pr_info.sha}'", + f"-e CHECK_START_TIME='{check_start_time}'", + f"-e CHECK_NAME='{check_name}'", + f"-e INSTANCE_TYPE='{instance_type}'", + ] + + env_str = " ".join(envs) + cmd = ( "docker run --cap-add=SYS_PTRACE " - # a static link, don't use S3_URL or S3_DOWNLOAD - "-e S3_URL='https://s3.amazonaws.com/clickhouse-datasets' " + f"{env_str} " # For dmesg and sysctl "--privileged " f"--volume={build_path}:/package_folder " @@ -150,9 +172,16 @@ def run_stress_test(docker_image_name): run_log_path = os.path.join(temp_path, "run.log") run_command = get_run_command( - packages_path, result_path, repo_tests_path, server_log_path, docker_image + pr_info, + stopwatch.start_time_str, + check_name, + packages_path, + result_path, + repo_tests_path, + server_log_path, + docker_image, ) - logging.info("Going to run func tests: %s", run_command) + logging.info("Going to run stress test: %s", run_command) with TeePopen(run_command, run_log_path, timeout=60 * 150) as process: retcode = process.wait() @@ -168,7 +197,23 @@ def run_stress_test(docker_image_name): result_path, server_log_path, run_log_path ) ch_helper = ClickHouseHelper() - mark_flaky_tests(ch_helper, check_name, test_results) + + # Cleanup run log from the credentials of CI logs database. + # Note: a malicious user can still print them by splitting the value into parts. + # But we will be warned when a malicious user modifies CI script. + # Although they can also print them from inside tests. + # Nevertheless, the credentials of the CI logs have limited scope + # and does not provide access to sensitive info. + + ci_logs_host = os.getenv("CLICKHOUSE_CI_LOGS_HOST", "CLICKHOUSE_CI_LOGS_HOST") + ci_logs_password = os.getenv( + "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD" + ) + if ci_logs_host not in ("CLICKHOUSE_CI_LOGS_HOST", ""): + subprocess.check_call( + f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}'", + shell=True, + ) report_url = upload_results( s3_helper, diff --git a/tests/ci/stress_tests.lib b/tests/ci/stress_tests.lib index 190f3f39f9e..85b376ac39d 100644 --- a/tests/ci/stress_tests.lib +++ b/tests/ci/stress_tests.lib @@ -9,8 +9,6 @@ FAIL="\tFAIL\t\\N\t" FAILURE_CONTEXT_LINES=100 FAILURE_CONTEXT_MAX_LINE_WIDTH=300 -source attach_gdb.lib - function escaped() { # That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language. diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 33a5cd21f39..0871dd7ec6a 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -12,7 +12,6 @@ from typing import List, Tuple from clickhouse_helper import ( ClickHouseHelper, - mark_flaky_tests, prepare_tests_results_for_clickhouse, ) from commit_status_helper import ( @@ -189,7 +188,6 @@ def main(): state, description, test_results, additional_files = process_result(temp_path) ch_helper = ClickHouseHelper() - mark_flaky_tests(ch_helper, NAME, test_results) report_url = upload_results( s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME diff --git a/tests/ci/terminate_runner_lambda/app.py b/tests/ci/terminate_runner_lambda/app.py index ed198d855b9..010f7dd6734 100644 --- a/tests/ci/terminate_runner_lambda/app.py +++ b/tests/ci/terminate_runner_lambda/app.py @@ -9,13 +9,13 @@ from typing import Any, Dict, List import boto3 # type: ignore -from lambda_shared import RunnerDescriptions, list_runners +from lambda_shared import RunnerDescriptions, list_runners, cached_value_is_valid from lambda_shared.token import get_access_token_by_key_app, get_cached_access_token @dataclass class CachedInstances: - time: int + time: float value: dict updating: bool = False @@ -27,17 +27,12 @@ def get_cached_instances() -> dict: """return cached instances description with updating it once per five minutes""" if time.time() - 250 < cached_instances.time or cached_instances.updating: return cached_instances.value - # Indicate that the value is updating now, so the cached value can be - # used. The first setting and close-to-ttl are not counted as update - if cached_instances.time != 0 or time.time() - 300 < cached_instances.time: - cached_instances.updating = True - else: - cached_instances.updating = False + cached_instances.updating = cached_value_is_valid(cached_instances.time, 300) ec2_client = boto3.client("ec2") instances_response = ec2_client.describe_instances( Filters=[{"Name": "instance-state-name", "Values": ["running"]}] ) - cached_instances.time = int(time.time()) + cached_instances.time = time.time() cached_instances.value = { instance["InstanceId"]: instance for reservation in instances_response["Reservations"] @@ -47,6 +42,28 @@ def get_cached_instances() -> dict: return cached_instances.value +@dataclass +class CachedRunners: + time: float + value: RunnerDescriptions + updating: bool = False + + +cached_runners = CachedRunners(0, []) + + +def get_cached_runners(access_token: str) -> RunnerDescriptions: + """From time to time request to GH api costs up to 3 seconds, and + it's a disaster from the termination lambda perspective""" + if time.time() - 5 < cached_runners.time or cached_instances.updating: + return cached_runners.value + cached_runners.updating = cached_value_is_valid(cached_runners.time, 15) + cached_runners.value = list_runners(access_token) + cached_runners.time = time.time() + cached_runners.updating = False + return cached_runners.value + + def how_many_instances_to_kill(event_data: dict) -> Dict[str, int]: data_array = event_data["CapacityToTerminate"] to_kill_by_zone = {} # type: Dict[str, int] @@ -104,7 +121,7 @@ def main(access_token: str, event: dict) -> Dict[str, List[str]]: ) print("Time spent on the requests to AWS: ", time.time() - start) - runners = list_runners(access_token) + runners = get_cached_runners(access_token) runner_ids = set(runner.name for runner in runners) # We used to delete potential hosts to terminate from GitHub runners pool, # but the documentation states: diff --git a/tests/ci/tests/docker_images.json b/tests/ci/tests/docker_images_for_tests.json similarity index 96% rename from tests/ci/tests/docker_images.json rename to tests/ci/tests/docker_images_for_tests.json index 0d40d43c33f..70db8760561 100644 --- a/tests/ci/tests/docker_images.json +++ b/tests/ci/tests/docker_images_for_tests.json @@ -119,7 +119,8 @@ "docker/test/stateless", "docker/test/integration/base", "docker/test/fuzzer", - "docker/test/keeper-jepsen" + "docker/test/keeper-jepsen", + "docker/test/sqltest" ] }, "docker/test/integration/kerberized_hadoop": { @@ -153,5 +154,9 @@ "docker/test/sqllogic": { "name": "clickhouse/sqllogic-test", "dependent": [] + }, + "docker/test/sqltest": { + "name": "clickhouse/sqltest", + "dependent": [] } } diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index 5279ccde492..1c3ee303b27 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -12,7 +12,6 @@ from github import Github from build_download_helper import download_unit_tests from clickhouse_helper import ( ClickHouseHelper, - mark_flaky_tests, prepare_tests_results_for_clickhouse, ) from commit_status_helper import ( @@ -159,7 +158,6 @@ def main(): state, description, test_results, additional_logs = process_results(test_output) ch_helper = ClickHouseHelper() - mark_flaky_tests(ch_helper, check_name, test_results) report_url = upload_results( s3_helper, diff --git a/tests/ci/utils.lib b/tests/ci/utils.lib index b5ce4ae0d78..c90b7ebe6f6 100644 --- a/tests/ci/utils.lib +++ b/tests/ci/utils.lib @@ -2,6 +2,11 @@ function run_with_retry() { + if [[ $- =~ e ]]; then + set_e=true + else + set_e=false + fi set +e local total_retries="$1" @@ -12,7 +17,9 @@ function run_with_retry() until [ "$retry" -ge "$total_retries" ] do if "$@"; then - set -e + if $set_e; then + set -e + fi return else retry=$((retry + 1)) @@ -26,4 +33,4 @@ function run_with_retry() function fn_exists() { declare -F "$1" > /dev/null; -} \ No newline at end of file +} diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index 171dd743f90..d452c985407 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -12,7 +12,8 @@ echo "Running init script" export DEBIAN_FRONTEND=noninteractive export RUNNER_HOME=/home/ubuntu/actions-runner -export RUNNER_URL="https://github.com/ClickHouse" +export RUNNER_ORG="ClickHouse" +export RUNNER_URL="https://github.com/${RUNNER_ORG}" # Funny fact, but metadata service has fixed IP INSTANCE_ID=$(ec2metadata --instance-id) export INSTANCE_ID @@ -102,7 +103,8 @@ check_proceed_spot_termination() { runner_pid=$(pgrep Runner.Listener) if [ -n "$runner_pid" ]; then # Kill the runner to not allow it cancelling the job - kill -9 "$runner_pid" + # shellcheck disable=SC2046 + kill -9 $(list_children "$runner_pid") fi sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" terminate_and_exit @@ -171,6 +173,7 @@ set -uo pipefail echo "Runner's public DNS: $(ec2metadata --public-hostname)" echo "Runner's labels: ${LABELS}" +echo "Runner's instance type: $(ec2metadata --instance-type)" EOF # Create a post-run script that will restart docker daemon before the job started @@ -234,6 +237,19 @@ is_job_assigned() { || return 1 } +list_children () { + local children + children=$(ps --ppid "$1" -o pid=) + if [ -z "$children" ]; then + return + fi + + for pid in $children; do + list_children "$pid" + done + echo "$children" +} + while true; do runner_pid=$(pgrep Runner.Listener) echo "Got runner pid '$runner_pid'" @@ -268,17 +284,11 @@ while true; do RUNNER_AGE=$(( $(date +%s) - $(stat -c +%Y /proc/"$runner_pid" 2>/dev/null || date +%s) )) echo "The runner is launched $RUNNER_AGE seconds ago and still has hot received the job" if (( 60 < RUNNER_AGE )); then - echo "Check if the instance should tear down" - if ! no_terminating_metadata; then - # Another check if the worker still didn't start - if is_job_assigned; then - echo "During the metadata check the job was assigned, continue" - continue - fi - kill -9 "$runner_pid" - sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" - terminate_on_event - fi + echo "Attempt to delete the runner for a graceful shutdown" + sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \ + || continue + echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down" + terminate_and_exit fi fi sleep 5 diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 5e2331ece3c..e511d773577 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -64,6 +64,7 @@ NEED_RERUN_WORKFLOWS = { "DocsCheck", "MasterCI", "NightlyBuilds", + "PublishedReleaseCI", "PullRequestCI", "ReleaseBranchCI", } diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 95470f77987..ad7fd8632f2 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -383,19 +383,19 @@ def get_stacktraces_from_clickhouse(args): ) replicated_msg = ( f"{args.client} {settings_str} --query " - '"SELECT materialize((hostName(), tcpPort())) as host, thread_id, ' + '"SELECT materialize((hostName(), tcpPort())) as host, thread_name, thread_id, query_id, trace, ' "arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), " "arrayMap(x -> addressToLine(x), trace), " - "arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace " + "arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace_str " "FROM clusterAllReplicas('test_cluster_database_replicated', 'system.stack_trace') " 'ORDER BY host, thread_id FORMAT Vertical"' ) msg = ( f"{args.client} {settings_str} --query " - "\"SELECT arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), " + "\"SELECT thread_name, thread_id, query_id, trace, arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), " "arrayMap(x -> addressToLine(x), trace), " - "arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace " + "arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace_str " 'FROM system.stack_trace FORMAT Vertical"' ) @@ -505,7 +505,6 @@ class FailureReason(enum.Enum): REPLICATED_DB = "replicated-database" S3_STORAGE = "s3-storage" BUILD = "not running for current build" - NO_UPGRADE_CHECK = "not running for upgrade check" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" # UNKNOWN reasons @@ -529,6 +528,12 @@ def threshold_generator(always_on_prob, always_off_prob, min_val, max_val): return gen +# To keep dependency list as short as possible, tzdata is not used here (to +# avoid try/except block for import) +def get_localzone(): + return os.getenv("TZ", "/".join(os.readlink("/etc/localtime").split("/")[-2:])) + + class SettingsRandomizer: settings = { "max_insert_threads": lambda: 0 @@ -602,20 +607,36 @@ class SettingsRandomizer: "enable_memory_bound_merging_of_aggregation_results": lambda: random.randint( 0, 1 ), + "min_count_to_compile_expression": lambda: random.choice([0, 3]), + "min_count_to_compile_aggregate_expression": lambda: random.choice([0, 3]), + "min_count_to_compile_sort_description": lambda: random.choice([0, 3]), + "session_timezone": lambda: random.choice( + [ + # special non-deterministic around 1970 timezone, see [1]. + # + # [1]: https://github.com/ClickHouse/ClickHouse/issues/42653 + "America/Mazatlan", + "America/Hermosillo", + "Mexico/BajaSur", + # server default that is randomized across all timezones + # NOTE: due to lots of trickery we cannot use empty timezone here, but this should be the same. + get_localzone(), + ] + ), } @staticmethod def get_random_settings(args): - random_settings = [] + random_settings = {} is_debug = BuildFlags.DEBUG in args.build_flags for setting, generator in SettingsRandomizer.settings.items(): if ( is_debug and setting == "allow_prefetched_read_pool_for_remote_filesystem" ): - random_settings.append(f"{setting}=0") + random_settings[setting] = 0 else: - random_settings.append(f"{setting}={generator()}") + random_settings[setting] = generator() return random_settings @@ -651,10 +672,10 @@ class MergeTreeSettingsRandomizer: @staticmethod def get_random_settings(args): - random_settings = [] + random_settings = {} for setting, generator in MergeTreeSettingsRandomizer.settings.items(): if setting not in args.changed_merge_tree_settings: - random_settings.append(f"{setting}={generator()}") + random_settings[setting] = generator() return random_settings @@ -766,7 +787,14 @@ class TestCase: @staticmethod def cli_format_settings(settings_list) -> str: - return " ".join([f"--{setting}" for setting in settings_list]) + out = [] + for k, v in settings_list.items(): + out.extend([f"--{k}", str(v)]) + return " ".join(out) + + @staticmethod + def http_format_settings(settings_list) -> str: + return urllib.parse.urlencode(settings_list) def has_show_create_table_in_test(self): return not subprocess.call(["grep", "-iq", "show create", self.case_file]) @@ -774,11 +802,12 @@ class TestCase: def add_random_settings(self, client_options): new_options = "" if self.randomize_settings: + http_params = self.http_format_settings(self.random_settings) if len(self.base_url_params) == 0: - os.environ["CLICKHOUSE_URL_PARAMS"] = "&".join(self.random_settings) + os.environ["CLICKHOUSE_URL_PARAMS"] = http_params else: os.environ["CLICKHOUSE_URL_PARAMS"] = ( - self.base_url_params + "&" + "&".join(self.random_settings) + self.base_url_params + "&" + http_params ) new_options += f" {self.cli_format_settings(self.random_settings)}" @@ -919,14 +948,18 @@ class TestCase: elif tags and ("no-replicated-database" in tags) and args.replicated_database: return FailureReason.REPLICATED_DB - # TODO: remove checking "no-upgrade-check" after 23.1 - elif args.upgrade_check and ( - "no-upgrade-check" in tags or "no-upgrade-check" in tags - ): - return FailureReason.NO_UPGRADE_CHECK - elif tags and ("no-s3-storage" in tags) and args.s3_storage: return FailureReason.S3_STORAGE + elif ( + tags + and ("no-s3-storage-with-slow-build" in tags) + and args.s3_storage + and ( + BuildFlags.THREAD in args.build_flags + or BuildFlags.DEBUG in args.build_flags + ) + ): + return FailureReason.S3_STORAGE elif tags: for build_flag in args.build_flags: @@ -1163,8 +1196,18 @@ class TestCase: # This is for .sh tests os.environ["CLICKHOUSE_LOG_COMMENT"] = args.testcase_basename + query_params = "" + if "need-query-parameters" in self.tags: + query_params = ( + " --param_CLICKHOUSE_DATABASE=" + + database + + " --param_CLICKHOUSE_DATABASE_1=" + + database + + "_1" + ) + params = { - "client": client + " --database=" + database, + "client": client + " --database=" + database + query_params, "logs_level": server_logs_level, "options": client_options, "test": self.case_file, @@ -1212,7 +1255,29 @@ class TestCase: seconds_left = max( args.timeout - (datetime.now() - start_time).total_seconds(), 20 ) - drop_database_query = "DROP DATABASE IF EXISTS " + database + + # Check if the test does not cleanup its tables. + # Only for newly added tests. Please extend this check to the old tests as well. + if self.case_file >= "02800": + leftover_tables = ( + clickhouse_execute( + args, + f"SHOW TABLES FROM {database}", + timeout=seconds_left, + settings={ + "log_comment": args.testcase_basename, + }, + ) + .decode() + .replace("\n", ", ") + ) + + if len(leftover_tables) != 0: + raise Exception( + f"The test should cleanup its tables ({leftover_tables}), otherwise it is inconvenient for running it locally." + ) + + drop_database_query = f"DROP DATABASE IF EXISTS {database}" if args.replicated_database: drop_database_query += " ON CLUSTER test_cluster_database_replicated" @@ -1402,13 +1467,13 @@ class TestSuite: else: raise Exception(f"Unknown file_extension: {filename}") - def parse_tags_from_line(line, comment_sign): + def parse_tags_from_line(line, comment_sign) -> set[str]: if not line.startswith(comment_sign): - return None + return set() tags_str = line[len(comment_sign) :].lstrip() # noqa: ignore E203 tags_prefix = "Tags:" if not tags_str.startswith(tags_prefix): - return None + return set() tags_str = tags_str[len(tags_prefix) :] # noqa: ignore E203 tags = tags_str.split(",") tags = {tag.strip() for tag in tags} @@ -1426,12 +1491,23 @@ class TestSuite: def load_tags_from_file(filepath): comment_sign = get_comment_sign(filepath) + need_query_params = False with open(filepath, "r", encoding="utf-8") as file: try: - line = find_tag_line(file) + tag_line = find_tag_line(file) except UnicodeDecodeError: return [] - return parse_tags_from_line(line, comment_sign) + try: + if filepath.endswith(".sql"): + for line in file: + if "{CLICKHOUSE_DATABASE" in line: + need_query_params = True + except UnicodeDecodeError: + pass + parsed_tags = parse_tags_from_line(tag_line, comment_sign) + if need_query_params: + parsed_tags.add("need-query-parameters") + return parsed_tags all_tags = {} start_time = datetime.now() @@ -2097,7 +2173,7 @@ def reportLogStats(args): print("\n") query = """ - SELECT message_format_string, count(), substr(any(message), 1, 120) AS any_message + SELECT message_format_string, count(), any(message) AS any_message FROM system.text_log WHERE (now() - toIntervalMinute(240)) < event_time AND (message NOT LIKE (replaceRegexpAll(message_format_string, '{[:.0-9dfx]*}', '%') AS s)) diff --git a/tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml b/tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml new file mode 100644 index 00000000000..504841296a8 --- /dev/null +++ b/tests/config/config.d/enable_wait_for_shutdown_replicated_tables.xml @@ -0,0 +1,5 @@ + + + 3000 + + diff --git a/tests/config/config.d/forbidden_headers.xml b/tests/config/config.d/forbidden_headers.xml new file mode 100644 index 00000000000..0d48f650fe6 --- /dev/null +++ b/tests/config/config.d/forbidden_headers.xml @@ -0,0 +1,6 @@ + + +
exact_header
+ (?i)(case_insensitive_header) +
+
diff --git a/tests/config/config.d/s3_storage_policy_by_default.xml b/tests/config/config.d/s3_storage_policy_by_default.xml index 9685512a12a..1a23e99b759 100644 --- a/tests/config/config.d/s3_storage_policy_by_default.xml +++ b/tests/config/config.d/s3_storage_policy_by_default.xml @@ -6,14 +6,18 @@ http://localhost:11111/test/test/ clickhouse clickhouse - 1 - 22548578304 + + cache + 1Gi + /var/lib/clickhouse/s3_cache/ + s3 +
-
s3
+
cached_s3
diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index deee71bd812..8533fef9fc9 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -1,56 +1,14 @@ - s3 s3_disk/ - http://localhost:11111/test/00170_test/ + http://localhost:11111/test/common/ clickhouse clickhouse 20000 - - s3 - s3_disk_2/ - http://localhost:11111/test/00170_test/ - clickhouse - clickhouse - 20000 - - - s3 - s3_disk_3/ - http://localhost:11111/test/00170_test/ - clickhouse - clickhouse - 20000 - - - s3 - s3_disk_4/ - http://localhost:11111/test/00170_test/ - clickhouse - clickhouse - 20000 - - - s3 - s3_disk_5/ - http://localhost:11111/test/00170_test/ - clickhouse - clickhouse - 20000 - - - s3 - s3_disk_6/ - http://localhost:11111/test/00170_test/ - clickhouse - clickhouse - 20000 - - cache s3_disk @@ -59,65 +17,6 @@ 1 100 - - cache - s3_disk_2 - s3_cache_2/ - 128Mi - 100Mi - 100 - - - cache - s3_disk_3 - s3_disk_3_cache/ - 128Mi - 22548578304 - 1 - 1 - 100 - - - cache - s3_disk_4 - s3_cache_4/ - 128Mi - 1 - 1 - 100 - - - cache - s3_disk_5 - s3_cache_5/ - 128Mi - 100 - - - cache - s3_disk_6 - s3_cache_6/ - 128Mi - 1 - 100 - 100 - - - cache - s3_disk_6 - s3_cache_small/ - 1000 - 100 - - - cache - s3_disk_6 - s3_cache_small_segment_size/ - 128Mi - 10Ki - 1 - 100 - local_blob_storage @@ -159,7 +58,7 @@ cache - s3_cache_5 + s3_cache s3_cache_multi/ 22548578304 100 @@ -180,34 +79,6 @@ - - -
- s3_cache_2 -
-
-
- - -
- s3_cache_3 -
-
-
- - -
- s3_cache_4 -
-
-
- - -
- s3_cache_6 -
-
-
@@ -215,13 +86,6 @@
- - -
- s3_cache_small -
-
-
@@ -243,13 +107,6 @@
- - -
- s3_cache_small_segment_size -
-
-
diff --git a/tests/config/config.d/text_log.xml b/tests/config/config.d/text_log.xml index dce4942d952..d3608c5439d 100644 --- a/tests/config/config.d/text_log.xml +++ b/tests/config/config.d/text_log.xml @@ -2,6 +2,6 @@ system text_log
- 7500 + 4000
diff --git a/tests/config/install.sh b/tests/config/install.sh index 50f2627d37c..503da780061 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -43,7 +43,7 @@ ln -sf $SRC_PATH/config.d/transactions.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/encryption.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/CORS.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/zookeeper_log.xml $DEST_SERVER_PATH/config.d/ -ln -sf $SRC_PATH/config.d/logger_test.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/logger_trace.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/named_collection.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/ssl_certs.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/filesystem_cache_log.xml $DEST_SERVER_PATH/config.d/ @@ -51,12 +51,14 @@ ln -sf $SRC_PATH/config.d/session_log.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/system_unfreeze.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_zero_copy_replication.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/nlp.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/forbidden_headers.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_keeper_map.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/custom_disks_base_path.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/reverse_dns_query_function.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/compressed_marks_and_index.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/disable_s3_env_credentials.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/enable_wait_for_shutdown_replicated_tables.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/backups.xml $DEST_SERVER_PATH/config.d/ # Not supported with fasttest. diff --git a/tests/config/users.d/session_log_test.xml b/tests/config/users.d/session_log_test.xml index daddaa6e4b9..cc2c2c5fcde 100644 --- a/tests/config/users.d/session_log_test.xml +++ b/tests/config/users.d/session_log_test.xml @@ -17,7 +17,7 @@ - + ::1 127.0.0.1 diff --git a/tests/instructions/easy_tasks_sorted_ru.md b/tests/instructions/easy_tasks_sorted_ru.md index 09ea48d0bd9..17e9708eef5 100644 --- a/tests/instructions/easy_tasks_sorted_ru.md +++ b/tests/instructions/easy_tasks_sorted_ru.md @@ -129,7 +129,7 @@ position с конца строки. Атомарно удаляет таблицу перед созданием новой, если такая была. -## * Приведение типов для IN (subquery). +## + Приведение типов для IN (subquery). `SELECT 1 IN (SELECT -1 UNION ALL SELECT 1)` @@ -205,12 +205,12 @@ https://clickhouse.com/docs/en/operations/table_engines/external_data/ ## Возможность задавать параметры соединений для табличных функций, движков таблиц и для реплик из отдельных разделов конфигурации. -## Настройка rollup_use_nulls. +## + Настройка rollup_use_nulls. + +Upd: it is named "group_by_use_nulls". ## + Настройка cast_keep_nullable. -## Функция bitEquals для сравнения произвольных типов данных побитово. +## Функция bitEquals для сравнения произвольных типов данных побитово ## Функция serialize для implementation specific non portable non backwards compatible сериализации любого типа данных в набор байт. - -## Функция bitEquals и оператор <=>. diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index d6d17abe725..aa89ccf11b3 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -7,9 +7,11 @@ import json import logging import os import random +import re import shutil import subprocess import time +import shlex import zlib # for crc32 @@ -110,16 +112,36 @@ def get_counters(fname): if not (".py::" in line and " " in line): continue - line_arr = line.strip().split(" ") + line = line.strip() + # [gw0] [ 7%] ERROR test_mysql_protocol/test.py::test_golang_client + # ^^^^^^^^^^^^^ + if line.strip().startswith("["): + line = re.sub("^\[[^\[\]]*\] \[[^\[\]]*\] ", "", line) + + line_arr = line.split(" ") if len(line_arr) < 2: logging.debug("Strange line %s", line) continue # Lines like: - # [gw0] [ 7%] ERROR test_mysql_protocol/test.py::test_golang_client - # [gw3] [ 40%] PASSED test_replicated_users/test.py::test_rename_replicated[QUOTA] - state = line_arr[-2] - test_name = line_arr[-1] + # + # ERROR test_mysql_protocol/test.py::test_golang_client + # PASSED test_replicated_users/test.py::test_rename_replicated[QUOTA] + # PASSED test_drop_is_lock_free/test.py::test_query_is_lock_free[detach part] + # + state = line_arr.pop(0) + test_name = " ".join(line_arr) + + # Normalize test names for lines like this: + # + # FAILED test_storage_s3/test.py::test_url_reconnect_in_the_middle - Exception + # FAILED test_distributed_ddl/test.py::test_default_database[configs] - AssertionError: assert ... + # + test_name = re.sub( + r"^(?P[^\[\] ]+)(?P\[[^\[\]]*\]|)(?P - .*|)$", + r"\g\g", + test_name, + ) if state in counters: counters[state].add(test_name) @@ -168,7 +190,7 @@ def clear_ip_tables_and_restart_daemons(): try: logging.info("Killing all alive docker containers") subprocess.check_output( - "timeout -s 9 10m docker ps --quiet | xargs --no-run-if-empty docker kill", + "timeout --signal=KILL 10m docker ps --quiet | xargs --no-run-if-empty docker kill", shell=True, ) except subprocess.CalledProcessError as err: @@ -177,7 +199,7 @@ def clear_ip_tables_and_restart_daemons(): try: logging.info("Removing all docker containers") subprocess.check_output( - "timeout -s 9 10m docker ps --all --quiet | xargs --no-run-if-empty docker rm --force", + "timeout --signal=KILL 10m docker ps --all --quiet | xargs --no-run-if-empty docker rm --force", shell=True, ) except subprocess.CalledProcessError as err: @@ -239,6 +261,8 @@ class ClickhouseIntegrationTestsRunner: self.start_time = time.time() self.soft_deadline_time = self.start_time + (TASK_TIMEOUT - MAX_TIME_IN_SANDBOX) + self.use_analyzer = os.environ.get("CLICKHOUSE_USE_NEW_ANALYZER") is not None + if "run_by_hash_total" in self.params: self.run_by_hash_total = self.params["run_by_hash_total"] self.run_by_hash_num = self.params["run_by_hash_num"] @@ -289,6 +313,7 @@ class ClickhouseIntegrationTestsRunner: "clickhouse/mysql-java-client", "clickhouse/mysql-js-client", "clickhouse/mysql-php-client", + "clickhouse/nginx-dav", "clickhouse/postgresql-java-client", ] @@ -297,7 +322,7 @@ class ClickhouseIntegrationTestsRunner: cmd = ( "cd {repo_path}/tests/integration && " - "timeout -s 9 1h ./runner {runner_opts} {image_cmd} --pre-pull --command '{command}' ".format( + "timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} --pre-pull --command '{command}' ".format( repo_path=repo_path, runner_opts=self._get_runner_opts(), image_cmd=image_cmd, @@ -398,6 +423,9 @@ class ClickhouseIntegrationTestsRunner: result.append("--tmpfs") if self.disable_net_host: result.append("--disable-net-host") + if self.use_analyzer: + result.append("--analyzer") + return " ".join(result) def _get_all_tests(self, repo_path): @@ -406,9 +434,9 @@ class ClickhouseIntegrationTestsRunner: out_file_full = os.path.join(self.result_path, "runner_get_all_tests.log") cmd = ( "cd {repo_path}/tests/integration && " - "timeout -s 9 1h ./runner {runner_opts} {image_cmd} ' --setup-plan' " - "| tee {out_file_full} | grep '::' | sed 's/ (fixtures used:.*//g' | sed 's/^ *//g' | sed 's/ *$//g' " - "| grep -v 'SKIPPED' | sort -u > {out_file}".format( + "timeout --signal=KILL 1h ./runner {runner_opts} {image_cmd} -- --setup-plan " + "| tee '{out_file_full}' | grep -F '::' | sed -r 's/ \(fixtures used:.*//g; s/^ *//g; s/ *$//g' " + "| grep -v -F 'SKIPPED' | sort --unique > {out_file}".format( repo_path=repo_path, runner_opts=self._get_runner_opts(), image_cmd=image_cmd, @@ -480,34 +508,32 @@ class ClickhouseIntegrationTestsRunner: result[test_file].append(test) return result - def _update_counters(self, main_counters, current_counters): + def _update_counters(self, main_counters, current_counters, broken_tests): for test in current_counters["PASSED"]: - if ( - test not in main_counters["PASSED"] - and test not in main_counters["FLAKY"] - ): - is_flaky = False + if test not in main_counters["PASSED"]: if test in main_counters["FAILED"]: main_counters["FAILED"].remove(test) - is_flaky = True if test in main_counters["ERROR"]: main_counters["ERROR"].remove(test) - is_flaky = True - if is_flaky: - main_counters["FLAKY"].append(test) - else: + if test in main_counters["BROKEN"]: + main_counters["BROKEN"].remove(test) + + if test not in broken_tests: main_counters["PASSED"].append(test) + else: + main_counters["NOT_FAILED"].append(test) for state in ("ERROR", "FAILED"): for test in current_counters[state]: - if test in main_counters["FLAKY"]: - continue if test in main_counters["PASSED"]: main_counters["PASSED"].remove(test) - main_counters["FLAKY"].append(test) continue - if test not in main_counters[state]: - main_counters[state].append(test) + if test not in broken_tests: + if test not in main_counters[state]: + main_counters[state].append(test) + else: + if test not in main_counters["BROKEN"]: + main_counters["BROKEN"].append(test) for state in ("SKIPPED",): for test in current_counters[state]: @@ -565,11 +591,22 @@ class ClickhouseIntegrationTestsRunner: return res def try_run_test_group( - self, repo_path, test_group, tests_in_group, num_tries, num_workers + self, + repo_path, + test_group, + tests_in_group, + num_tries, + num_workers, + broken_tests, ): try: return self.run_test_group( - repo_path, test_group, tests_in_group, num_tries, num_workers + repo_path, + test_group, + tests_in_group, + num_tries, + num_workers, + broken_tests, ) except Exception as e: logging.info("Failed to run {}:\n{}".format(str(test_group), str(e))) @@ -578,7 +615,6 @@ class ClickhouseIntegrationTestsRunner: "PASSED": [], "FAILED": [], "SKIPPED": [], - "FLAKY": [], } tests_times = defaultdict(float) for test in tests_in_group: @@ -587,14 +623,21 @@ class ClickhouseIntegrationTestsRunner: return counters, tests_times, [] def run_test_group( - self, repo_path, test_group, tests_in_group, num_tries, num_workers + self, + repo_path, + test_group, + tests_in_group, + num_tries, + num_workers, + broken_tests, ): counters = { "ERROR": [], "PASSED": [], "FAILED": [], "SKIPPED": [], - "FLAKY": [], + "BROKEN": [], + "NOT_FAILED": [], } tests_times = defaultdict(float) @@ -626,7 +669,7 @@ class ClickhouseIntegrationTestsRunner: info_basename = test_group_str + "_" + str(i) + ".nfo" info_path = os.path.join(repo_path, "tests/integration", info_basename) - test_cmd = " ".join([test for test in sorted(test_names)]) + test_cmd = " ".join([shlex.quote(test) for test in sorted(test_names)]) parallel_cmd = ( " --parallel {} ".format(num_workers) if num_workers > 0 else "" ) @@ -635,7 +678,7 @@ class ClickhouseIntegrationTestsRunner: # -E -- (E)rror # -p -- (p)assed # -s -- (s)kipped - cmd = "cd {}/tests/integration && timeout -s 9 1h ./runner {} {} -t {} {} '-rfEps --run-id={} --color=no --durations=0 {}' | tee {}".format( + cmd = "cd {}/tests/integration && timeout --signal=KILL 1h ./runner {} {} -t {} {} -- -rfEps --run-id={} --color=no --durations=0 {} | tee {}".format( repo_path, self._get_runner_opts(), image_cmd, @@ -700,7 +743,7 @@ class ClickhouseIntegrationTestsRunner: ) times_lines = parse_test_times(info_path) new_tests_times = get_test_times(times_lines) - self._update_counters(counters, new_counters) + self._update_counters(counters, new_counters, broken_tests) for test_name, test_time in new_tests_times.items(): tests_times[test_name] = test_time @@ -722,11 +765,11 @@ class ClickhouseIntegrationTestsRunner: ) log_paths.append(extras_result_path) - if len(counters["PASSED"]) + len(counters["FLAKY"]) == len(tests_in_group): + if len(counters["PASSED"]) == len(tests_in_group): logging.info("All tests from group %s passed", test_group) break if ( - len(counters["PASSED"]) + len(counters["FLAKY"]) >= 0 + len(counters["PASSED"]) >= 0 and len(counters["FAILED"]) == 0 and len(counters["ERROR"]) == 0 ): @@ -746,6 +789,7 @@ class ClickhouseIntegrationTestsRunner: and test not in counters["ERROR"] and test not in counters["SKIPPED"] and test not in counters["FAILED"] + and test not in counters["BROKEN"] and "::" in test ): counters["ERROR"].append(test) @@ -773,7 +817,7 @@ class ClickhouseIntegrationTestsRunner: final_retry += 1 logging.info("Running tests for the %s time", i) counters, tests_times, log_paths = self.try_run_test_group( - repo_path, "bugfix" if should_fail else "flaky", tests_to_run, 1, 1 + repo_path, "bugfix" if should_fail else "flaky", tests_to_run, 1, 1, [] ) logs += log_paths if counters["FAILED"]: @@ -790,7 +834,6 @@ class ClickhouseIntegrationTestsRunner: result_state = "failure" if not should_fail: break - assert len(counters["FLAKY"]) == 0 or should_fail logging.info("Try is OK, all tests passed, going to clear env") clear_ip_tables_and_restart_daemons() logging.info("And going to sleep for some time") @@ -800,7 +843,7 @@ class ClickhouseIntegrationTestsRunner: time.sleep(5) test_result = [] - for state in ("ERROR", "FAILED", "PASSED", "SKIPPED", "FLAKY"): + for state in ("ERROR", "FAILED", "PASSED", "SKIPPED"): if state == "PASSED": text_state = "OK" elif state == "FAILED": @@ -893,7 +936,8 @@ class ClickhouseIntegrationTestsRunner: "PASSED": [], "FAILED": [], "SKIPPED": [], - "FLAKY": [], + "BROKEN": [], + "NOT_FAILED": [], } tests_times = defaultdict(float) tests_log_paths = defaultdict(list) @@ -905,10 +949,16 @@ class ClickhouseIntegrationTestsRunner: logging.info("Shuffling test groups") random.shuffle(items_to_run) + broken_tests = list() + if self.use_analyzer: + with open(f"{repo_path}/tests/analyzer_integration_broken_tests.txt") as f: + broken_tests = f.read().splitlines() + logging.info(f"Broken tests in the list: {len(broken_tests)}") + for group, tests in items_to_run: logging.info("Running test group %s containing %s tests", group, len(tests)) group_counters, group_test_times, log_paths = self.try_run_test_group( - repo_path, group, tests, MAX_RETRY, NUM_WORKERS + repo_path, group, tests, MAX_RETRY, NUM_WORKERS, broken_tests ) total_tests = 0 for counter, value in group_counters.items(): @@ -940,7 +990,14 @@ class ClickhouseIntegrationTestsRunner: result_state = "success" test_result = [] - for state in ("ERROR", "FAILED", "PASSED", "SKIPPED", "FLAKY"): + for state in ( + "ERROR", + "FAILED", + "PASSED", + "SKIPPED", + "BROKEN", + "NOT_FAILED", + ): if state == "PASSED": text_state = "OK" elif state == "FAILED": @@ -953,15 +1010,12 @@ class ClickhouseIntegrationTestsRunner: ] failed_sum = len(counters["FAILED"]) + len(counters["ERROR"]) - status_text = "fail: {}, passed: {}, flaky: {}".format( - failed_sum, len(counters["PASSED"]), len(counters["FLAKY"]) - ) + status_text = "fail: {}, passed: {}".format(failed_sum, len(counters["PASSED"])) if self.soft_deadline_time < time.time(): status_text = "Timeout, " + status_text result_state = "failure" - counters["FLAKY"] = [] if not counters or sum(len(counter) for counter in counters.values()) == 0: status_text = "No tests found for some reason! It's a bug" result_state = "failure" @@ -969,16 +1023,6 @@ class ClickhouseIntegrationTestsRunner: if "(memory)" in self.params["context_name"]: result_state = "success" - for res in test_result: - # It's not easy to parse output of pytest - # Especially when test names may contain spaces - # Do not allow it to avoid obscure failures - if " " not in res[0]: - continue - logging.warning("Found invalid test name with space: %s", res[0]) - status_text = "Found test with invalid name, see main log" - result_state = "failure" - return result_state, status_text, test_result, [] diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 5933883f7b0..6e76270c607 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -12,6 +12,22 @@ from helpers.network import _NetworkManager logging.raiseExceptions = False +@pytest.fixture(autouse=True, scope="session") +def tune_local_port_range(): + # Lots of services uses non privileged ports: + # - hdfs -- 50020/50070/... + # - minio + # + # NOTE: 5K is not enough, and sometimes leads to EADDRNOTAVAIL error. + # NOTE: it is not inherited, so you may need to specify this in docker_compose_$SERVICE.yml + try: + run_and_check(["sysctl net.ipv4.ip_local_port_range='55000 65535'"], shell=True) + except Exception as ex: + logging.warning( + "Failed to run sysctl, tests may fail with EADDRINUSE %s", str(ex) + ) + + @pytest.fixture(autouse=True, scope="session") def cleanup_environment(): try: @@ -45,7 +61,7 @@ def cleanup_environment(): logging.debug("Pruning Docker networks") run_and_check( - ["docker network prune"], + ["docker network prune --force"], shell=True, nothrow=True, ) diff --git a/tests/integration/helpers/0_common_enable_analyzer.xml b/tests/integration/helpers/0_common_enable_analyzer.xml new file mode 100644 index 00000000000..aa374364ef0 --- /dev/null +++ b/tests/integration/helpers/0_common_enable_analyzer.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/helpers/client.py b/tests/integration/helpers/client.py index fdeedb9a80d..c5f0584f828 100644 --- a/tests/integration/helpers/client.py +++ b/tests/integration/helpers/client.py @@ -182,7 +182,8 @@ class CommandRequest: # we suppress stderror on client becase sometimes thread sanitizer # can print some debug information there env = {} - env["TSAN_OPTIONS"] = "verbosity=0" + env["ASAN_OPTIONS"] = "use_sigaltstack=0" + env["TSAN_OPTIONS"] = "use_sigaltstack=0 verbosity=0" self.process = sp.Popen( command, stdin=stdin_file, diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index cd6861c29a0..eff44de842a 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -64,6 +64,14 @@ DEFAULT_ENV_NAME = ".env" SANITIZER_SIGN = "==================" +CLICKHOUSE_START_COMMAND = ( + "clickhouse server --config-file=/etc/clickhouse-server/{main_config_file}" +) + +CLICKHOUSE_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.log" + +CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.log" + # to create docker-compose env file def _create_env_file(path, variables): @@ -478,6 +486,8 @@ class ClickHouseCluster: self.kafka_docker_id = None self.schema_registry_host = "schema-registry" self._schema_registry_port = 0 + self.schema_registry_auth_host = "schema-registry-auth" + self._schema_registry_auth_port = 0 self.kafka_docker_id = self.get_instance_docker_id(self.kafka_host) self.coredns_host = "coredns" @@ -649,6 +659,13 @@ class ClickHouseCluster: self._schema_registry_port = get_free_port() return self._schema_registry_port + @property + def schema_registry_auth_port(self): + if self._schema_registry_auth_port: + return self._schema_registry_auth_port + self._schema_registry_auth_port = get_free_port() + return self._schema_registry_auth_port + @property def kerberized_kafka_port(self): if self._kerberized_kafka_port: @@ -1155,8 +1172,11 @@ class ClickHouseCluster: self.with_kafka = True env_variables["KAFKA_HOST"] = self.kafka_host env_variables["KAFKA_EXTERNAL_PORT"] = str(self.kafka_port) + env_variables["SCHEMA_REGISTRY_DIR"] = instance.path + "/" env_variables["SCHEMA_REGISTRY_EXTERNAL_PORT"] = str(self.schema_registry_port) - env_variables["SCHEMA_REGISTRY_INTERNAL_PORT"] = "8081" + env_variables["SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT"] = str( + self.schema_registry_auth_port + ) self.base_cmd.extend( ["--file", p.join(docker_compose_yml_dir, "docker_compose_kafka.yml")] ) @@ -1490,6 +1510,7 @@ class ClickHouseCluster: with_kafka=False, with_kerberized_kafka=False, with_kerberos_kdc=False, + with_secrets=False, with_rabbitmq=False, with_nats=False, clickhouse_path_dir=None, @@ -1497,6 +1518,8 @@ class ClickHouseCluster: with_postgres=False, with_postgres_cluster=False, with_postgresql_java_client=False, + clickhouse_log_file=CLICKHOUSE_LOG_FILE, + clickhouse_error_log_file=CLICKHOUSE_ERROR_LOG_FILE, with_hdfs=False, with_kerberized_hdfs=False, with_mongo=False, @@ -1510,6 +1533,7 @@ class ClickHouseCluster: with_jdbc_bridge=False, with_hive=False, with_coredns=False, + allow_analyzer=True, hostname=None, env_variables=None, image="clickhouse/integration-test", @@ -1563,6 +1587,13 @@ class ClickHouseCluster: "LLVM_PROFILE_FILE" ] = "/var/lib/clickhouse/server_%h_%p_%m.profraw" + clickhouse_start_command = CLICKHOUSE_START_COMMAND + if clickhouse_log_file: + clickhouse_start_command += " --log-file=" + clickhouse_log_file + if clickhouse_error_log_file: + clickhouse_start_command += " --errorlog-file=" + clickhouse_error_log_file + logging.debug(f"clickhouse_start_command: {clickhouse_start_command}") + instance = ClickHouseInstance( cluster=self, base_path=self.base_dir, @@ -1587,15 +1618,20 @@ class ClickHouseCluster: with_nats=with_nats, with_nginx=with_nginx, with_kerberized_hdfs=with_kerberized_hdfs, + with_secrets=with_secrets + or with_kerberized_hdfs + or with_kerberos_kdc + or with_kerberized_kafka, with_mongo=with_mongo or with_mongo_secure, with_meili=with_meili, with_redis=with_redis, with_minio=with_minio, with_azurite=with_azurite, - with_cassandra=with_cassandra, with_jdbc_bridge=with_jdbc_bridge, with_hive=with_hive, with_coredns=with_coredns, + with_cassandra=with_cassandra, + allow_analyzer=allow_analyzer, server_bin_path=self.server_bin_path, odbc_bridge_bin_path=self.odbc_bridge_bin_path, library_bridge_bin_path=self.library_bridge_bin_path, @@ -1604,6 +1640,10 @@ class ClickHouseCluster: with_postgres=with_postgres, with_postgres_cluster=with_postgres_cluster, with_postgresql_java_client=with_postgresql_java_client, + clickhouse_start_command=clickhouse_start_command, + main_config_name=main_config_name, + users_config_name=users_config_name, + copy_common_configs=copy_common_configs, hostname=hostname, env_variables=env_variables, image=image, @@ -1612,9 +1652,6 @@ class ClickHouseCluster: ipv4_address=ipv4_address, ipv6_address=ipv6_address, with_installed_binary=with_installed_binary, - main_config_name=main_config_name, - users_config_name=users_config_name, - copy_common_configs=copy_common_configs, external_dirs=external_dirs, tmpfs=tmpfs or [], config_root_name=config_root_name, @@ -2475,20 +2512,27 @@ class ClickHouseCluster: raise Exception("Can't wait Azurite to start") def wait_schema_registry_to_start(self, timeout=180): - sr_client = CachedSchemaRegistryClient( - {"url": "http://localhost:{}".format(self.schema_registry_port)} - ) - start = time.time() - while time.time() - start < timeout: - try: - sr_client._send_request(sr_client.url) - logging.debug("Connected to SchemaRegistry") - return sr_client - except Exception as ex: - logging.debug(("Can't connect to SchemaRegistry: %s", str(ex))) - time.sleep(1) + for port in self.schema_registry_port, self.schema_registry_auth_port: + reg_url = "http://localhost:{}".format(port) + arg = {"url": reg_url} + sr_client = CachedSchemaRegistryClient(arg) - raise Exception("Can't wait Schema Registry to start") + start = time.time() + sr_started = False + sr_auth_started = False + while time.time() - start < timeout: + try: + sr_client._send_request(sr_client.url) + logging.debug("Connected to SchemaRegistry") + # don't care about possible auth errors + sr_started = True + break + except Exception as ex: + logging.debug(("Can't connect to SchemaRegistry: %s", str(ex))) + time.sleep(1) + + if not sr_started: + raise Exception("Can't wait Schema Registry to start") def wait_cassandra_to_start(self, timeout=180): self.cassandra_ip = self.get_instance_ip(self.cassandra_host) @@ -3046,16 +3090,6 @@ class ClickHouseCluster: subprocess_check_call(self.base_zookeeper_cmd + ["start", n]) -CLICKHOUSE_START_COMMAND = ( - "clickhouse server --config-file=/etc/clickhouse-server/{main_config_file}" - " --log-file=/var/log/clickhouse-server/clickhouse-server.log " - " --errorlog-file=/var/log/clickhouse-server/clickhouse-server.err.log" -) - -CLICKHOUSE_STAY_ALIVE_COMMAND = "bash -c \"trap 'pkill tail' INT TERM; {} --daemon; coproc tail -f /dev/null; wait $$!\"".format( - CLICKHOUSE_START_COMMAND -) - DOCKER_COMPOSE_TEMPLATE = """ version: '2.3' services: @@ -3127,6 +3161,7 @@ class ClickHouseInstance: with_nats, with_nginx, with_kerberized_hdfs, + with_secrets, with_mongo, with_meili, with_redis, @@ -3136,6 +3171,7 @@ class ClickHouseInstance: with_hive, with_coredns, with_cassandra, + allow_analyzer, server_bin_path, odbc_bridge_bin_path, library_bridge_bin_path, @@ -3189,7 +3225,7 @@ class ClickHouseInstance: if clickhouse_path_dir else None ) - self.kerberos_secrets_dir = p.abspath(p.join(base_path, "secrets")) + self.secrets_dir = p.abspath(p.join(base_path, "secrets")) self.macros = macros if macros is not None else {} self.with_zookeeper = with_zookeeper self.zookeeper_config_path = zookeeper_config_path @@ -3212,6 +3248,7 @@ class ClickHouseInstance: self.with_nats = with_nats self.with_nginx = with_nginx self.with_kerberized_hdfs = with_kerberized_hdfs + self.with_secrets = with_secrets self.with_mongo = with_mongo self.with_meili = with_meili self.with_redis = with_redis @@ -3222,6 +3259,7 @@ class ClickHouseInstance: self.with_hive = with_hive self.with_coredns = with_coredns self.coredns_config_dir = p.abspath(p.join(base_path, "coredns_config")) + self.allow_analyzer = allow_analyzer self.main_config_name = main_config_name self.users_config_name = users_config_name @@ -3230,6 +3268,9 @@ class ClickHouseInstance: self.clickhouse_start_command = clickhouse_start_command.replace( "{main_config_file}", self.main_config_name ) + self.clickhouse_stay_alive_command = "bash -c \"trap 'pkill tail' INT TERM; {} --daemon; coproc tail -f /dev/null; wait $$!\"".format( + clickhouse_start_command + ) self.path = p.join(self.cluster.instances_dir, name) self.docker_compose_path = p.join(self.path, "docker-compose.yml") @@ -3511,6 +3552,24 @@ class ClickHouseInstance: return error + def append_hosts(self, name, ip): + self.exec_in_container( + (["bash", "-c", "echo '{}' {} >> /etc/hosts".format(ip, name)]), + privileged=True, + user="root", + ) + + def set_hosts(self, hosts): + entries = ["127.0.0.1 localhost", "::1 localhost"] + for host in hosts: + entries.append(f"{host[0]} {host[1]}") + + self.exec_in_container( + ["bash", "-c", 'echo -e "{}" > /etc/hosts'.format("\\n".join(entries))], + privileged=True, + user="root", + ) + # Connects to the instance via HTTP interface, sends a query and returns both the answer and the error message # as a tuple (output, error). def http_query_and_get_answer_with_error( @@ -4190,6 +4249,11 @@ class ClickHouseInstance: ) write_embedded_config("0_common_instance_users.xml", users_d_dir) + if ( + os.environ.get("CLICKHOUSE_USE_NEW_ANALYZER") is not None + and self.allow_analyzer + ): + write_embedded_config("0_common_enable_analyzer.xml", users_d_dir) if len(self.custom_dictionaries_paths): write_embedded_config("0_common_enable_dictionaries.xml", self.config_d_dir) @@ -4204,17 +4268,16 @@ class ClickHouseInstance: if self.with_zookeeper: shutil.copy(self.zookeeper_config_path, conf_d_dir) - if ( - self.with_kerberized_kafka - or self.with_kerberized_hdfs - or self.with_kerberos_kdc - ): + if self.with_secrets: if self.with_kerberos_kdc: base_secrets_dir = self.cluster.instances_dir else: base_secrets_dir = self.path + from_dir = self.secrets_dir + to_dir = p.abspath(p.join(base_secrets_dir, "secrets")) + logging.debug(f"Copy secret from {from_dir} to {to_dir}") shutil.copytree( - self.kerberos_secrets_dir, + self.secrets_dir, p.abspath(p.join(base_secrets_dir, "secrets")), dirs_exist_ok=True, ) @@ -4319,7 +4382,7 @@ class ClickHouseInstance: entrypoint_cmd = self.clickhouse_start_command if self.stay_alive: - entrypoint_cmd = CLICKHOUSE_STAY_ALIVE_COMMAND.replace( + entrypoint_cmd = self.clickhouse_stay_alive_command.replace( "{main_config_file}", self.main_config_name ) else: diff --git a/tests/integration/helpers/keeper_config1.xml b/tests/integration/helpers/keeper_config1.xml index daacd55887d..f40ed9ac6fa 100644 --- a/tests/integration/helpers/keeper_config1.xml +++ b/tests/integration/helpers/keeper_config1.xml @@ -18,6 +18,8 @@ 15000 trace false + 2000 + 4000 diff --git a/tests/integration/helpers/keeper_config2.xml b/tests/integration/helpers/keeper_config2.xml index 53eb023dba5..d5bdb92a79d 100644 --- a/tests/integration/helpers/keeper_config2.xml +++ b/tests/integration/helpers/keeper_config2.xml @@ -18,6 +18,8 @@ 15000 trace false + 2000 + 4000 diff --git a/tests/integration/helpers/keeper_config3.xml b/tests/integration/helpers/keeper_config3.xml index 1db091c12bc..aa69b554660 100644 --- a/tests/integration/helpers/keeper_config3.xml +++ b/tests/integration/helpers/keeper_config3.xml @@ -18,6 +18,8 @@ 15000 trace false + 2000 + 4000 diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py index 3b909194b63..93ea3fa74b7 100644 --- a/tests/integration/helpers/keeper_utils.py +++ b/tests/integration/helpers/keeper_utils.py @@ -1,5 +1,6 @@ import socket import time +from kazoo.client import KazooClient def get_keeper_socket(cluster, node, port=9181): @@ -26,9 +27,17 @@ def send_4lw_cmd(cluster, node, cmd="ruok", port=9181): NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" -def wait_until_connected(cluster, node, port=9181): +def wait_until_connected(cluster, node, port=9181, timeout=30.0): + elapsed = 0.0 + while send_4lw_cmd(cluster, node, "mntr", port) == NOT_SERVING_REQUESTS_ERROR_MSG: time.sleep(0.1) + elapsed += 0.1 + + if elapsed >= timeout: + raise Exception( + f"{timeout}s timeout while waiting for {node.name} to start serving requests" + ) def wait_until_quorum_lost(cluster, node, port=9181): @@ -51,3 +60,36 @@ def get_leader(cluster, nodes): if is_leader(cluster, node): return node raise Exception("No leader in Keeper cluster.") + + +def get_fake_zk(cluster, node, timeout: float = 30.0) -> KazooClient: + _fake = KazooClient( + hosts=cluster.get_instance_ip(node.name) + ":9181", timeout=timeout + ) + _fake.start() + return _fake + + +def get_config_str(zk: KazooClient) -> str: + """ + Return decoded contents of /keeper/config node + """ + return zk.get("/keeper/config")[0].decode("utf-8") + + +def wait_configs_equal(left_config: str, right_zk: KazooClient, timeout: float = 30.0): + """ + Check whether get /keeper/config result in left_config is equal + to get /keeper/config on right_zk ZK connection. + """ + elapsed: float = 0.0 + while sorted(left_config.split("\n")) != sorted( + get_config_str(right_zk).split("\n") + ): + time.sleep(1) + elapsed += 1 + if elapsed >= timeout: + raise Exception( + f"timeout while checking nodes configs to get equal. " + f"Left: {left_config}, right: {get_config_str(right_zk)}" + ) diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 60b46926589..e6e79dc7947 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -219,10 +219,15 @@ class _NetworkManager: def __init__( self, - container_expire_timeout=120, - container_exit_timeout=120, + container_expire_timeout=600, + container_exit_timeout=660, docker_api_version=os.environ.get("DOCKER_API_VERSION"), ): + # container should be alive for at least 15 seconds then the expiration + # timeout, this is the protection from the case when the container will + # be destroyed just when some test will try to use it. + assert container_exit_timeout >= container_expire_timeout + 15 + self.container_expire_timeout = container_expire_timeout self.container_exit_timeout = container_exit_timeout diff --git a/tests/integration/helpers/postgres_utility.py b/tests/integration/helpers/postgres_utility.py index dfae37af434..3c8a23b15a2 100644 --- a/tests/integration/helpers/postgres_utility.py +++ b/tests/integration/helpers/postgres_utility.py @@ -76,16 +76,26 @@ def drop_postgres_schema(cursor, schema_name): def create_postgres_table( - cursor, table_name, replica_identity_full=False, template=postgres_table_template + cursor, + table_name, + database_name="", + replica_identity_full=False, + template=postgres_table_template, ): - drop_postgres_table(cursor, table_name) - cursor.execute(template.format(table_name)) + if database_name == "": + name = table_name + else: + name = f"{database_name}.{table_name}" + drop_postgres_table(cursor, name) + query = template.format(name) + cursor.execute(query) + print(f"Query: {query}") if replica_identity_full: - cursor.execute(f"ALTER TABLE {table_name} REPLICA IDENTITY FULL;") + cursor.execute(f"ALTER TABLE {name} REPLICA IDENTITY FULL;") -def drop_postgres_table(cursor, table_name): - cursor.execute(f"""DROP TABLE IF EXISTS "{table_name}" """) +def drop_postgres_table(cursor, name): + cursor.execute(f"""DROP TABLE IF EXISTS "{name}" """) def create_postgres_table_with_schema(cursor, schema_name, table_name): @@ -103,13 +113,16 @@ class PostgresManager: self.created_materialized_postgres_db_list = set() self.created_ch_postgres_db_list = set() - def init(self, instance, ip, port): + def init(self, instance, ip, port, default_database="postgres_database"): self.instance = instance self.ip = ip self.port = port - self.conn = get_postgres_conn(ip=self.ip, port=self.port) + self.default_database = default_database self.prepare() + def get_default_database(self): + return self.default_database + def restart(self): try: self.clear() @@ -118,11 +131,22 @@ class PostgresManager: self.prepare() raise ex + def execute(self, query): + self.cursor.execute(query) + def prepare(self): - conn = get_postgres_conn(ip=self.ip, port=self.port) - cursor = conn.cursor() - self.create_postgres_db(cursor, "postgres_database") - self.create_clickhouse_postgres_db(ip=self.ip, port=self.port) + self.conn = get_postgres_conn(ip=self.ip, port=self.port) + self.cursor = self.conn.cursor() + if self.default_database != "": + self.create_postgres_db(self.default_database) + self.conn = get_postgres_conn( + ip=self.ip, + port=self.port, + database=True, + database_name=self.default_database, + ) + self.cursor = self.conn.cursor() + self.create_clickhouse_postgres_db() def clear(self): if self.conn.closed == 0: @@ -132,63 +156,79 @@ class PostgresManager: for db in self.created_ch_postgres_db_list.copy(): self.drop_clickhouse_postgres_db(db) if len(self.created_postgres_db_list) > 0: - conn = get_postgres_conn(ip=self.ip, port=self.port) - cursor = conn.cursor() + self.conn = get_postgres_conn(ip=self.ip, port=self.port) + self.cursor = self.conn.cursor() for db in self.created_postgres_db_list.copy(): - self.drop_postgres_db(cursor, db) + self.drop_postgres_db(db) - def get_db_cursor(self): - self.conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) + def get_db_cursor(self, database_name=""): + if database_name == "": + database_name = self.default_database + self.conn = get_postgres_conn( + ip=self.ip, port=self.port, database=True, database_name=database_name + ) return self.conn.cursor() - def create_postgres_db(self, cursor, name="postgres_database"): - self.drop_postgres_db(cursor, name) - self.created_postgres_db_list.add(name) - cursor.execute(f"CREATE DATABASE {name}") + def database_or_default(self, database_name): + if database_name != "": + return database_name + if self.default_database != "": + return self.default_database + raise Exception("Database name is empty") - def drop_postgres_db(self, cursor, name="postgres_database"): - cursor.execute(f"DROP DATABASE IF EXISTS {name}") - if name in self.created_postgres_db_list: - self.created_postgres_db_list.remove(name) + def create_postgres_db(self, database_name=""): + database_name = self.database_or_default(database_name) + self.drop_postgres_db(database_name) + self.created_postgres_db_list.add(database_name) + self.cursor.execute(f"CREATE DATABASE {database_name}") + + def drop_postgres_db(self, database_name=""): + database_name = self.database_or_default(database_name) + self.cursor.execute(f"DROP DATABASE IF EXISTS {database_name}") + if database_name in self.created_postgres_db_list: + self.created_postgres_db_list.remove(database_name) def create_clickhouse_postgres_db( self, - ip, - port, - name="postgres_database", - database_name="postgres_database", + database_name="", schema_name="", + postgres_database="", ): - self.drop_clickhouse_postgres_db(name) - self.created_ch_postgres_db_list.add(name) + database_name = self.database_or_default(database_name) + if postgres_database == "": + postgres_database = database_name + self.drop_clickhouse_postgres_db(database_name) + self.created_ch_postgres_db_list.add(database_name) if len(schema_name) == 0: self.instance.query( f""" - CREATE DATABASE {name} - ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword')""" + CREATE DATABASE {database_name} + ENGINE = PostgreSQL('{self.ip}:{self.port}', '{postgres_database}', 'postgres', 'mysecretpassword')""" ) else: self.instance.query( f""" - CREATE DATABASE {name} - ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword', '{schema_name}')""" + CREATE DATABASE {database_name} + ENGINE = PostgreSQL('{self.ip}:{self.port}', '{postgres_database}', 'postgres', 'mysecretpassword', '{schema_name}')""" ) - def drop_clickhouse_postgres_db(self, name="postgres_database"): - self.instance.query(f"DROP DATABASE IF EXISTS {name}") - if name in self.created_ch_postgres_db_list: - self.created_ch_postgres_db_list.remove(name) + def drop_clickhouse_postgres_db(self, database_name=""): + database_name = self.database_or_default(database_name) + self.instance.query(f"DROP DATABASE IF EXISTS {database_name}") + if database_name in self.created_ch_postgres_db_list: + self.created_ch_postgres_db_list.remove(database_name) def create_materialized_db( self, ip, port, materialized_database="test_database", - postgres_database="postgres_database", + postgres_database="", settings=[], table_overrides="", ): + postgres_database = self.database_or_default(postgres_database) self.created_materialized_postgres_db_list.add(materialized_database) self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database}") @@ -207,35 +247,32 @@ class PostgresManager: self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database} SYNC") if materialized_database in self.created_materialized_postgres_db_list: self.created_materialized_postgres_db_list.remove(materialized_database) - assert materialized_database not in self.instance.query("SHOW DATABASES") - def create_and_fill_postgres_table(self, table_name): - conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) - cursor = conn.cursor() - self.create_and_fill_postgres_table_from_cursor(cursor, table_name) + def create_postgres_schema(self, name): + create_postgres_schema(self.cursor, name) - def create_and_fill_postgres_table_from_cursor(self, cursor, table_name): - create_postgres_table(cursor, table_name) - self.instance.query( - f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)" - ) - - def create_and_fill_postgres_tables(self, tables_num, numbers=50): - conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) - cursor = conn.cursor() - self.create_and_fill_postgres_tables_from_cursor( - cursor, tables_num, numbers=numbers - ) - - def create_and_fill_postgres_tables_from_cursor( - self, cursor, tables_num, numbers=50 + def create_postgres_table( + self, table_name, database_name="", template=postgres_table_template ): + create_postgres_table( + self.cursor, table_name, database_name=database_name, template=template + ) + + def create_and_fill_postgres_table(self, table_name, database_name=""): + create_postgres_table(self.cursor, table_name, database_name) + database_name = self.database_or_default(database_name) + self.instance.query( + f"INSERT INTO {database_name}.{table_name} SELECT number, number from numbers(50)" + ) + + def create_and_fill_postgres_tables(self, tables_num, numbers=50, database_name=""): for i in range(tables_num): table_name = f"postgresql_replica_{i}" - create_postgres_table(cursor, table_name) + create_postgres_table(self.cursor, table_name, database_name) if numbers > 0: + db = self.database_or_default(database_name) self.instance.query( - f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers({numbers})" + f"INSERT INTO {db}.{table_name} SELECT number, number from numbers({numbers})" ) diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py index 8ff4f9e9203..206f960293f 100644 --- a/tests/integration/helpers/s3_mocks/broken_s3.py +++ b/tests/integration/helpers/s3_mocks/broken_s3.py @@ -7,11 +7,18 @@ import urllib.parse import http.server import socketserver import string +import socket +import struct INF_COUNT = 100000000 +def _and_then(value, func): + assert callable(func) + return None if value is None else func(value) + + class MockControl: def __init__(self, cluster, container, port): self._cluster = cluster @@ -30,8 +37,8 @@ class MockControl: ) assert response == "OK", response - def setup_error_at_object_upload(self, count=None, after=None): - url = f"http://localhost:{self._port}/mock_settings/error_at_object_upload?nothing=1" + def setup_action(self, when, count=None, after=None, action=None, action_args=None): + url = f"http://localhost:{self._port}/mock_settings/{when}?nothing=1" if count is not None: url += f"&count={count}" @@ -39,25 +46,12 @@ class MockControl: if after is not None: url += f"&after={after}" - response = self._cluster.exec_in_container( - self._cluster.get_container_id(self._container), - [ - "curl", - "-s", - url, - ], - nothrow=True, - ) - assert response == "OK", response + if action is not None: + url += f"&action={action}" - def setup_error_at_part_upload(self, count=None, after=None): - url = f"http://localhost:{self._port}/mock_settings/error_at_part_upload?nothing=1" - - if count is not None: - url += f"&count={count}" - - if after is not None: - url += f"&after={after}" + if action_args is not None: + for x in action_args: + url += f"&action_args={x}" response = self._cluster.exec_in_container( self._cluster.get_container_id(self._container), @@ -70,22 +64,14 @@ class MockControl: ) assert response == "OK", response - def setup_error_at_create_multi_part_upload(self, count=None): - url = f"http://localhost:{self._port}/mock_settings/error_at_create_multi_part_upload" + def setup_at_object_upload(self, **kwargs): + self.setup_action("at_object_upload", **kwargs) - if count is not None: - url += f"?count={count}" + def setup_at_part_upload(self, **kwargs): + self.setup_action("at_part_upload", **kwargs) - response = self._cluster.exec_in_container( - self._cluster.get_container_id(self._container), - [ - "curl", - "-s", - url, - ], - nothrow=True, - ) - assert response == "OK", response + def setup_at_create_multi_part_upload(self, **kwargs): + self.setup_action("at_create_multi_part_upload", **kwargs) def setup_fake_puts(self, part_length): response = self._cluster.exec_in_container( @@ -140,8 +126,14 @@ class MockControl: class _ServerRuntime: class SlowPut: def __init__( - self, probability_=None, timeout_=None, minimal_length_=None, count_=None + self, + lock, + probability_=None, + timeout_=None, + minimal_length_=None, + count_=None, ): + self.lock = lock self.probability = probability_ if probability_ is not None else 1 self.timeout = timeout_ if timeout_ is not None else 0.1 self.minimal_length = minimal_length_ if minimal_length_ is not None else 0 @@ -156,42 +148,135 @@ class _ServerRuntime: ) def get_timeout(self, content_length): - if content_length > self.minimal_length: - if self.count > 0: - if ( - _runtime.slow_put.probability == 1 - or random.random() <= _runtime.slow_put.probability - ): - self.count -= 1 - return _runtime.slow_put.timeout + with self.lock: + if content_length > self.minimal_length: + if self.count > 0: + if ( + _runtime.slow_put.probability == 1 + or random.random() <= _runtime.slow_put.probability + ): + self.count -= 1 + return _runtime.slow_put.timeout return None + class Expected500ErrorAction: + def inject_error(self, request_handler): + data = ( + '' + "" + "ExpectedError" + "mock s3 injected error" + "txfbd566d03042474888193-00608d7537" + "" + ) + request_handler.write_error(data) + + class RedirectAction: + def __init__(self, host="localhost", port=1): + self.dst_host = _and_then(host, str) + self.dst_port = _and_then(port, int) + + def inject_error(self, request_handler): + request_handler.redirect(host=self.dst_host, port=self.dst_port) + + class ConnectionResetByPeerAction: + def __init__(self, with_partial_data=None): + self.partial_data = "" + if with_partial_data is not None and with_partial_data == "1": + self.partial_data = ( + '\n' + "\n" + ) + + def inject_error(self, request_handler): + request_handler.read_all_input() + + if self.partial_data: + request_handler.send_response(200) + request_handler.send_header("Content-Type", "text/xml") + request_handler.send_header("Content-Length", 10000) + request_handler.end_headers() + request_handler.wfile.write(bytes(self.partial_data, "UTF-8")) + + time.sleep(1) + request_handler.connection.setsockopt( + socket.SOL_SOCKET, socket.SO_LINGER, struct.pack("ii", 1, 0) + ) + request_handler.connection.close() + + class BrokenPipeAction: + def inject_error(self, request_handler): + # partial read + self.rfile.read(50) + + time.sleep(1) + request_handler.connection.setsockopt( + socket.SOL_SOCKET, socket.SO_LINGER, struct.pack("ii", 1, 0) + ) + request_handler.connection.close() + + class ConnectionRefusedAction(RedirectAction): + pass + class CountAfter: - def __init__(self, count_=None, after_=None): + def __init__( + self, lock, count_=None, after_=None, action_=None, action_args_=[] + ): + self.lock = lock + self.count = count_ if count_ is not None else INF_COUNT self.after = after_ if after_ is not None else 0 + self.action = action_ + self.action_args = action_args_ + + if self.action == "connection_refused": + self.error_handler = _ServerRuntime.ConnectionRefusedAction() + elif self.action == "connection_reset_by_peer": + self.error_handler = _ServerRuntime.ConnectionResetByPeerAction( + *self.action_args + ) + elif self.action == "broken_pipe": + self.error_handler = _ServerRuntime.BrokenPipeAction() + elif self.action == "redirect_to": + self.error_handler = _ServerRuntime.RedirectAction(*self.action_args) + else: + self.error_handler = _ServerRuntime.Expected500ErrorAction() + + @staticmethod + def from_cgi_params(lock, params): + return _ServerRuntime.CountAfter( + lock=lock, + count_=_and_then(params.get("count", [None])[0], int), + after_=_and_then(params.get("after", [None])[0], int), + action_=params.get("action", [None])[0], + action_args_=params.get("action_args", []), + ) def __str__(self): - return f"count:{self.count} after:{self.after}" + return f"count:{self.count} after:{self.after} action:{self.action} action_args:{self.action_args}" def has_effect(self): - if self.after: - self.after -= 1 - if self.after == 0: - if self.count: - self.count -= 1 - return True - return False + with self.lock: + if self.after: + self.after -= 1 + if self.after == 0: + if self.count: + self.count -= 1 + return True + return False + + def inject_error(self, request_handler): + self.error_handler.inject_error(request_handler) def __init__(self): self.lock = threading.Lock() - self.error_at_part_upload = None - self.error_at_object_upload = None + self.at_part_upload = None + self.at_object_upload = None self.fake_put_when_length_bigger = None self.fake_uploads = dict() self.slow_put = None self.fake_multipart_upload = None - self.error_at_create_multi_part_upload = None + self.at_create_multi_part_upload = None def register_fake_upload(self, upload_id, key): with self.lock: @@ -205,23 +290,18 @@ class _ServerRuntime: def reset(self): with self.lock: - self.error_at_part_upload = None - self.error_at_object_upload = None + self.at_part_upload = None + self.at_object_upload = None self.fake_put_when_length_bigger = None self.fake_uploads = dict() self.slow_put = None self.fake_multipart_upload = None - self.error_at_create_multi_part_upload = None + self.at_create_multi_part_upload = None _runtime = _ServerRuntime() -def _and_then(value, func): - assert callable(func) - return None if value is None else func(value) - - def get_random_string(length): # choose from all lowercase letter letters = string.ascii_lowercase @@ -239,7 +319,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): def _ping(self): self._ok() - def _read_out(self): + def read_all_input(self): content_length = int(self.headers.get("Content-Length", 0)) to_read = content_length while to_read > 0: @@ -250,36 +330,36 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): str(self.rfile.read(size)) to_read -= size - def _redirect(self): - self._read_out() + def redirect(self, host=None, port=None): + if host is None and port is None: + host = self.server.upstream_host + port = self.server.upstream_port + + self.read_all_input() self.send_response(307) - url = ( - f"http://{self.server.upstream_host}:{self.server.upstream_port}{self.path}" - ) + url = f"http://{host}:{port}{self.path}" + self.log_message("redirect to %s", url) self.send_header("Location", url) self.end_headers() self.wfile.write(b"Redirected") - def _error(self, data): - self._read_out() + def write_error(self, data, content_length=None): + if content_length is None: + content_length = len(data) + self.log_message("write_error %s", data) + self.read_all_input() self.send_response(500) self.send_header("Content-Type", "text/xml") + self.send_header("Content-Length", str(content_length)) self.end_headers() - self.wfile.write(bytes(data, "UTF-8")) - - def _error_expected_500(self): - self._error( - '' - "" - "ExpectedError" - "mock s3 injected error" - "txfbd566d03042474888193-00608d7537" - "" - ) + if data: + self.wfile.write(bytes(data, "UTF-8")) def _fake_put_ok(self): - self._read_out() + self.log_message("fake put") + + self.read_all_input() self.send_response(200) self.send_header("Content-Type", "text/xml") @@ -288,7 +368,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.end_headers() def _fake_uploads(self, path, upload_id): - self._read_out() + self.read_all_input() parts = [x for x in path.split("/") if x] bucket = parts[0] @@ -310,7 +390,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): self.wfile.write(bytes(data, "UTF-8")) def _fake_post_ok(self, path): - self._read_out() + self.read_all_input() parts = [x for x in path.split("/") if x] bucket = parts[0] @@ -338,22 +418,22 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): path = [x for x in parts.path.split("/") if x] assert path[0] == "mock_settings", path if len(path) < 2: - return self._error("_mock_settings: wrong command") + return self.write_error("_mock_settings: wrong command") - if path[1] == "error_at_part_upload": + if path[1] == "at_part_upload": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) - _runtime.error_at_part_upload = _ServerRuntime.CountAfter( - count_=_and_then(params.get("count", [None])[0], int), - after_=_and_then(params.get("after", [None])[0], int), + _runtime.at_part_upload = _ServerRuntime.CountAfter.from_cgi_params( + _runtime.lock, params ) + self.log_message("set at_part_upload %s", _runtime.at_part_upload) return self._ok() - if path[1] == "error_at_object_upload": + if path[1] == "at_object_upload": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) - _runtime.error_at_object_upload = _ServerRuntime.CountAfter( - count_=_and_then(params.get("count", [None])[0], int), - after_=_and_then(params.get("after", [None])[0], int), + _runtime.at_object_upload = _ServerRuntime.CountAfter.from_cgi_params( + _runtime.lock, params ) + self.log_message("set at_object_upload %s", _runtime.at_object_upload) return self._ok() if path[1] == "fake_puts": @@ -361,11 +441,13 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): _runtime.fake_put_when_length_bigger = int( params.get("when_length_bigger", [1024 * 1024])[0] ) + self.log_message("set fake_puts %s", _runtime.fake_put_when_length_bigger) return self._ok() if path[1] == "slow_put": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) _runtime.slow_put = _ServerRuntime.SlowPut( + lock=_runtime.lock, minimal_length_=_and_then(params.get("minimal_length", [None])[0], int), probability_=_and_then(params.get("probability", [None])[0], float), timeout_=_and_then(params.get("timeout", [None])[0], float), @@ -376,20 +458,26 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if path[1] == "setup_fake_multpartuploads": _runtime.fake_multipart_upload = True + self.log_message("set setup_fake_multpartuploads") return self._ok() - if path[1] == "error_at_create_multi_part_upload": + if path[1] == "at_create_multi_part_upload": params = urllib.parse.parse_qs(parts.query, keep_blank_values=False) - _runtime.error_at_create_multi_part_upload = int( - params.get("count", [INF_COUNT])[0] + _runtime.at_create_multi_part_upload = ( + _ServerRuntime.CountAfter.from_cgi_params(_runtime.lock, params) + ) + self.log_message( + "set at_create_multi_part_upload %s", + _runtime.at_create_multi_part_upload, ) return self._ok() if path[1] == "reset": _runtime.reset() + self.log_message("reset") return self._ok() - return self._error("_mock_settings: wrong command") + return self.write_error("_mock_settings: wrong command") def do_GET(self): if self.path == "/": @@ -398,7 +486,8 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if self.path.startswith("/mock_settings"): return self._mock_settings() - return self._redirect() + self.log_message("get redirect") + return self.redirect() def do_PUT(self): content_length = int(self.headers.get("Content-Length", 0)) @@ -414,30 +503,52 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): upload_id = params.get("uploadId", [None])[0] if upload_id is not None: - if _runtime.error_at_part_upload is not None: - if _runtime.error_at_part_upload.has_effect(): - return self._error_expected_500() + if _runtime.at_part_upload is not None: + self.log_message( + "put at_part_upload %s, %s, %s", + _runtime.at_part_upload, + upload_id, + parts, + ) + + if _runtime.at_part_upload.has_effect(): + return _runtime.at_part_upload.inject_error(self) if _runtime.fake_multipart_upload: if _runtime.is_fake_upload(upload_id, parts.path): return self._fake_put_ok() else: - if _runtime.error_at_object_upload is not None: - if _runtime.error_at_object_upload.has_effect(): - return self._error_expected_500() + if _runtime.at_object_upload is not None: + if _runtime.at_object_upload.has_effect(): + self.log_message( + "put error_at_object_upload %s, %s", + _runtime.at_object_upload, + parts, + ) + return _runtime.at_object_upload.inject_error(self) if _runtime.fake_put_when_length_bigger is not None: if content_length > _runtime.fake_put_when_length_bigger: + self.log_message( + "put fake_put_when_length_bigger %s, %s, %s", + _runtime.fake_put_when_length_bigger, + content_length, + parts, + ) return self._fake_put_ok() - return self._redirect() + self.log_message( + "put redirect %s", + parts, + ) + return self.redirect() def do_POST(self): parts = urllib.parse.urlsplit(self.path) params = urllib.parse.parse_qs(parts.query, keep_blank_values=True) uploads = params.get("uploads", [None])[0] if uploads is not None: - if _runtime.error_at_create_multi_part_upload: - _runtime.error_at_create_multi_part_upload -= 1 - return self._error_expected_500() + if _runtime.at_create_multi_part_upload is not None: + if _runtime.at_create_multi_part_upload.has_effect(): + return _runtime.at_create_multi_part_upload.inject_error(self) if _runtime.fake_multipart_upload: upload_id = get_random_string(5) @@ -448,13 +559,13 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): if _runtime.is_fake_upload(upload_id, parts.path): return self._fake_post_ok(parts.path) - return self._redirect() + return self.redirect() def do_HEAD(self): - self._redirect() + self.redirect() def do_DELETE(self): - self._redirect() + self.redirect() class _ThreadedHTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer): diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index d060218456a..d056225fee4 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -48,6 +48,7 @@ "test_system_metrics/test.py::test_readonly_metrics", "test_system_replicated_fetches/test.py::test_system_replicated_fetches", "test_zookeeper_config_load_balancing/test.py::test_round_robin", + "test_zookeeper_fallback_session/test.py::test_fallback_session", "test_global_overcommit_tracker/test.py::test_global_overcommit", @@ -68,5 +69,28 @@ "test_server_reload/test.py::test_remove_postgresql_port", "test_server_reload/test.py::test_remove_tcp_port", - "test_keeper_map/test.py::test_keeper_map_without_zk" + "test_keeper_map/test.py::test_keeper_map_without_zk", + + "test_replicated_merge_tree_wait_on_shutdown/test.py::test_shutdown_and_wait", + + "test_http_failover/test.py::test_url_destination_host_with_multiple_addrs", + "test_http_failover/test.py::test_url_invalid_hostname", + "test_http_failover/test.py::test_url_ip_change", + + "test_system_logs/test_system_logs.py::test_max_size_0", + "test_system_logs/test_system_logs.py::test_reserved_size_greater_max_size", + "test_system_flush_logs/test.py::test_log_buffer_size_rows_flush_threshold", + "test_system_flush_logs/test.py::test_log_max_size", + "test_crash_log/test.py::test_pkill_query_log", + "test_crash_log/test.py::test_pkill", + + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_tcp", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_postgres", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_mysql", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_http", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_http_named_session", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_grpc", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_tcp_and_others", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_setting_in_query", + "test_profile_max_sessions_for_user/test.py::test_profile_max_sessions_for_user_client_suggestions_load" ] diff --git a/tests/integration/runner b/tests/integration/runner index df52f587eee..4c2b1054538 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -11,6 +11,7 @@ import subprocess import sys import string import random +import shlex def random_str(length=6): @@ -135,9 +136,7 @@ def check_args_and_update_paths(args): def docker_kill_handler_handler(signum, frame): subprocess.check_call( - 'docker ps --all --quiet --filter name={name} --format="{{{{.ID}}}}"'.format( - name=CONTAINER_NAME - ), + "docker ps --all --quiet --filter name={name}".format(name=CONTAINER_NAME), shell=True, ) raise KeyboardInterrupt("Killed by Ctrl+C") @@ -283,6 +282,14 @@ if __name__ == "__main__": help="Use tmpfs for dockerd files", ) + parser.add_argument( + "--analyzer", + action="store_true", + default=False, + dest="analyzer", + help="Use new analyzer infrastructure", + ) + parser.add_argument( "--cleanup-containers", action="store_true", @@ -324,18 +331,8 @@ if __name__ == "__main__": if args.docker_compose_images_tags is not None: for img_tag in args.docker_compose_images_tags: [image, tag] = img_tag.split(":") - if image == "clickhouse/mysql-golang-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_GOLANG_CLIENT_TAG", tag) - elif image == "clickhouse/dotnet-client": + if image == "clickhouse/dotnet-client": env_tags += "-e {}={} ".format("DOCKER_DOTNET_CLIENT_TAG", tag) - elif image == "clickhouse/mysql-java-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_JAVA_CLIENT_TAG", tag) - elif image == "clickhouse/mysql-js-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_JS_CLIENT_TAG", tag) - elif image == "clickhouse/mysql-php-client": - env_tags += "-e {}={} ".format("DOCKER_MYSQL_PHP_CLIENT_TAG", tag) - elif image == "clickhouse/postgresql-java-client": - env_tags += "-e {}={} ".format("DOCKER_POSTGRESQL_JAVA_CLIENT_TAG", tag) elif image == "clickhouse/integration-helper": env_tags += "-e {}={} ".format("DOCKER_HELPER_TAG", tag) elif image == "clickhouse/integration-test": @@ -344,6 +341,18 @@ if __name__ == "__main__": env_tags += "-e {}={} ".format("DOCKER_KERBERIZED_HADOOP_TAG", tag) elif image == "clickhouse/kerberos-kdc": env_tags += "-e {}={} ".format("DOCKER_KERBEROS_KDC_TAG", tag) + elif image == "clickhouse/mysql-golang-client": + env_tags += "-e {}={} ".format("DOCKER_MYSQL_GOLANG_CLIENT_TAG", tag) + elif image == "clickhouse/mysql-java-client": + env_tags += "-e {}={} ".format("DOCKER_MYSQL_JAVA_CLIENT_TAG", tag) + elif image == "clickhouse/mysql-js-client": + env_tags += "-e {}={} ".format("DOCKER_MYSQL_JS_CLIENT_TAG", tag) + elif image == "clickhouse/mysql-php-client": + env_tags += "-e {}={} ".format("DOCKER_MYSQL_PHP_CLIENT_TAG", tag) + elif image == "clickhouse/nginx-dav": + env_tags += "-e {}={} ".format("DOCKER_NGINX_DAV_TAG", tag) + elif image == "clickhouse/postgresql-java-client": + env_tags += "-e {}={} ".format("DOCKER_POSTGRESQL_JAVA_CLIENT_TAG", tag) else: logging.info("Unknown image %s" % (image)) @@ -395,8 +404,18 @@ if __name__ == "__main__": if args.keyword_expression: args.pytest_args += ["-k", args.keyword_expression] - pytest_opts = " ".join(args.pytest_args).replace("'", "\\'") - tests_list = " ".join(args.tests_list) + use_analyzer = "" + if args.analyzer: + use_analyzer = "-e CLICKHOUSE_USE_NEW_ANALYZER=1" + + # NOTE: since pytest options is in the argument value already we need to additionally escape '"' + pytest_opts = " ".join( + map(lambda x: shlex.quote(x).replace('"', '\\"'), args.pytest_args) + ) + tests_list = " ".join( + map(lambda x: shlex.quote(x).replace('"', '\\"'), args.tests_list) + ) + cmd_base = ( f"docker run {net} {tty} --rm --name {CONTAINER_NAME} " "--privileged --dns-search='.' " # since recent dns search leaks from host @@ -407,8 +426,8 @@ if __name__ == "__main__": f"--volume={args.cases_dir}:/ClickHouse/tests/integration " f"--volume={args.src_dir}/Server/grpc_protos:/ClickHouse/src/Server/grpc_protos " f"--volume=/run:/run/host:ro {dockerd_internal_volume} {env_tags} {env_cleanup} " - "-e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 -e PYTHONUNBUFFERED=1 " - f"-e PYTEST_OPTS='{parallel_args} {pytest_opts} {tests_list} {rand_args} -vvv'" + f"-e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 {use_analyzer} -e PYTHONUNBUFFERED=1 " + f'-e PYTEST_ADDOPTS="{parallel_args} {pytest_opts} {tests_list} {rand_args} -vvv"' f" {DIND_INTEGRATION_TESTS_IMAGE_NAME}:{args.docker_image_version}" ) @@ -419,7 +438,7 @@ if __name__ == "__main__": ) containers = subprocess.check_output( - f"docker ps --all --quiet --filter name={CONTAINER_NAME} --format={{{{.ID}}}}", + f"docker ps --all --quiet --filter name={CONTAINER_NAME}", shell=True, universal_newlines=True, ).splitlines() diff --git a/tests/integration/test_access_for_functions/test.py b/tests/integration/test_access_for_functions/test.py index be4d71502d2..5069468110c 100644 --- a/tests/integration/test_access_for_functions/test.py +++ b/tests/integration/test_access_for_functions/test.py @@ -22,7 +22,7 @@ def test_access_rights_for_function(): instance.query("CREATE USER A") instance.query("CREATE USER B") assert ( - "it's necessary to have grant CREATE FUNCTION ON *.*" + "it's necessary to have the grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user="A") ) @@ -32,7 +32,7 @@ def test_access_rights_for_function(): assert instance.query("SELECT MySum(1, 2)") == "3\n" assert ( - "it's necessary to have grant DROP FUNCTION ON *.*" + "it's necessary to have the grant DROP FUNCTION ON *.*" in instance.query_and_get_error("DROP FUNCTION MySum", user="B") ) @@ -44,7 +44,7 @@ def test_access_rights_for_function(): instance.query("REVOKE CREATE FUNCTION ON *.* FROM A") assert ( - "it's necessary to have grant CREATE FUNCTION ON *.*" + "it's necessary to have the grant CREATE FUNCTION ON *.*" in instance.query_and_get_error(create_function_query, user="A") ) diff --git a/tests/integration/test_alter_moving_garbage/configs/config.d/storage_conf.xml b/tests/integration/test_alter_moving_garbage/configs/config.d/storage_conf.xml index 1450a459257..67c4cc2d489 100644 --- a/tests/integration/test_alter_moving_garbage/configs/config.d/storage_conf.xml +++ b/tests/integration/test_alter_moving_garbage/configs/config.d/storage_conf.xml @@ -1,12 +1,18 @@ - + s3 http://minio1:9001/root/data/ minio minio123 - + + + s3 + http://minio1:9001/root/data2/ + minio + minio123 + @@ -15,10 +21,17 @@ default - s3 + s31 + + + + s32 + + + diff --git a/tests/integration/test_alter_moving_garbage/test.py b/tests/integration/test_alter_moving_garbage/test.py index 330df3ac490..21be46a7e1b 100644 --- a/tests/integration/test_alter_moving_garbage/test.py +++ b/tests/integration/test_alter_moving_garbage/test.py @@ -39,7 +39,7 @@ def cluster(): def create_table(node, table_name, replicated, additional_settings): settings = { "storage_policy": "two_disks", - "old_parts_lifetime": 1, + "old_parts_lifetime": 0, "index_granularity": 512, "temporary_directories_lifetime": 0, "merge_tree_clear_old_temporary_directories_interval_seconds": 1, @@ -73,9 +73,13 @@ def create_table(node, table_name, replicated, additional_settings): "allow_remote_fs_zero_copy_replication,replicated_engine", [(False, False), (False, True), (True, True)], ) -def test_create_table( +def test_alter_moving( cluster, allow_remote_fs_zero_copy_replication, replicated_engine ): + """ + Test that we correctly move parts during ALTER TABLE + """ + if replicated_engine: nodes = list(cluster.instances.values()) else: @@ -126,7 +130,7 @@ def test_create_table( partition = f"2021-01-{i:02d}" try: random.choice(nodes).query( - f"ALTER TABLE {table_name} MOVE PARTITION '{partition}' TO DISK 's3'", + f"ALTER TABLE {table_name} MOVE PARTITION '{partition}' TO DISK 's31'", ) except QueryRuntimeException as e: if "PART_IS_TEMPORARILY_LOCKED" in str(e): @@ -153,3 +157,94 @@ def test_create_table( ) assert data_digest == "1000\n" + + +def test_delete_race_leftovers(cluster): + """ + Test that we correctly delete outdated parts and do not leave any leftovers on s3 + """ + + node = cluster.instances["node1"] + + table_name = "test_delete_race_leftovers" + additional_settings = { + # use another disk not to interfere with other tests + "storage_policy": "one_disk", + # always remove parts in parallel + "concurrent_part_removal_threshold": 1, + } + + create_table( + node, table_name, replicated=True, additional_settings=additional_settings + ) + + # Stop merges to have several small parts in active set + node.query(f"SYSTEM STOP MERGES {table_name}") + + # Creare several small parts in one partition + for i in range(1, 11): + node.query( + f"INSERT INTO {table_name} SELECT toDate('2021-01-01'), number as id, toString(sipHash64(number, {i})) FROM numbers(10_000)" + ) + table_digest_query = f"SELECT count(), sum(sipHash64(id, data)) FROM {table_name}" + table_digest = node.query(table_digest_query) + + # Execute several noop deletes to have parts with updated mutation id without changes in data + # New parts will have symlinks to old parts + node.query(f"SYSTEM START MERGES {table_name}") + for i in range(10): + node.query(f"DELETE FROM {table_name} WHERE data = ''") + + # Make existing parts outdated + # Also we don't want have changing parts set, + # because it will be difficult match objects on s3 and in remote_data_paths to check correctness + node.query(f"OPTIMIZE TABLE {table_name} FINAL") + + inactive_parts_query = ( + f"SELECT count() FROM system.parts " + f"WHERE not active AND table = '{table_name}' AND database = 'default'" + ) + + # Try to wait for deletion of outdated parts + # However, we do not want to wait too long + # If some parts are not deleted after several iterations, we will just continue + for i in range(20): + inactive_parts_count = int(node.query(inactive_parts_query).strip()) + if inactive_parts_count == 0: + print(f"Inactive parts are deleted after {i} iterations") + break + + print(f"Inactive parts count: {inactive_parts_count}") + time.sleep(5) + + # Check that we correctly deleted all outdated parts and no leftovers on s3 + # Do it with retries because we delete blobs in the background + # and it can be race condition between removing from remote_data_paths and deleting blobs + all_remote_paths = set() + known_remote_paths = set() + for i in range(3): + known_remote_paths = set( + node.query( + f"SELECT remote_path FROM system.remote_data_paths WHERE disk_name = 's32'" + ).splitlines() + ) + + all_remote_paths = set( + obj.object_name + for obj in cluster.minio_client.list_objects( + cluster.minio_bucket, "data2/", recursive=True + ) + ) + + # Some blobs can be deleted after we listed remote_data_paths + # It's alright, thus we check only that all remote paths are known + # (in other words, all remote paths is subset of known paths) + if all_remote_paths == {p for p in known_remote_paths if p in all_remote_paths}: + break + + time.sleep(1) + + assert all_remote_paths == {p for p in known_remote_paths if p in all_remote_paths} + + # Check that we have all data + assert table_digest == node.query(table_digest_query) diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/__init__.py b/tests/integration/test_async_connect_to_multiple_ips/__init__.py similarity index 100% rename from tests/integration/test_replicated_merge_tree_hdfs_zero_copy/__init__.py rename to tests/integration/test_async_connect_to_multiple_ips/__init__.py diff --git a/tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml b/tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml new file mode 100644 index 00000000000..399d886ee6a --- /dev/null +++ b/tests/integration/test_async_connect_to_multiple_ips/configs/enable_hedged.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_async_connect_to_multiple_ips/configs/listen_host.xml b/tests/integration/test_async_connect_to_multiple_ips/configs/listen_host.xml new file mode 100644 index 00000000000..df0247fd651 --- /dev/null +++ b/tests/integration/test_async_connect_to_multiple_ips/configs/listen_host.xml @@ -0,0 +1,4 @@ + + :: + + diff --git a/tests/integration/test_async_connect_to_multiple_ips/test.py b/tests/integration/test_async_connect_to_multiple_ips/test.py new file mode 100644 index 00000000000..acc4d24d0fa --- /dev/null +++ b/tests/integration/test_async_connect_to_multiple_ips/test.py @@ -0,0 +1,72 @@ +import pytest +from helpers.cluster import ClickHouseCluster + + +cluster = ClickHouseCluster(__file__) + + +@pytest.fixture(scope="module") +def cluster_without_dns_cache_update(): + try: + cluster.start() + + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + pass + + +node1 = cluster.add_instance( + "node1", + main_configs=["configs/listen_host.xml"], + user_configs=["configs/enable_hedged.xml"], + with_zookeeper=True, + ipv4_address="10.5.95.11", +) + +node2 = cluster.add_instance( + "node2", + main_configs=["configs/listen_host.xml"], + user_configs=["configs/enable_hedged.xml"], + with_zookeeper=True, + ipv4_address="10.5.95.12", +) + + +# node1 - source with table, have invalid ipv6 +# node2 - destination, doing remote query +def test(cluster_without_dns_cache_update): + node1.query( + "CREATE TABLE test(t Date, label UInt8) ENGINE = MergeTree PARTITION BY t ORDER BY label;" + ) + node1.query("INSERT INTO test SELECT toDate('2022-12-28'), 1;") + assert node1.query("SELECT count(*) FROM test") == "1\n" + + wrong_ip = "2001:3984:3989::1:1118" + + node2.exec_in_container( + (["bash", "-c", "echo '{} {}' >> /etc/hosts".format(wrong_ip, node1.name)]) + ) + node2.exec_in_container( + ( + [ + "bash", + "-c", + "echo '{} {}' >> /etc/hosts".format(node1.ipv4_address, node1.name), + ] + ) + ) + + assert node1.query("SELECT count(*) from test") == "1\n" + node2.query("SYSTEM DROP DNS CACHE") + node1.query("SYSTEM DROP DNS CACHE") + assert ( + node2.query( + f"SELECT count(*) FROM remote('{node1.name}', default.test) limit 1;" + ) + == "1\n" + ) diff --git a/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml b/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml index cb87abcc693..d69fe96a3e2 100644 --- a/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml +++ b/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml @@ -45,5 +45,6 @@ true + 1.0 diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index c19cca4126a..a6ba0c8918f 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -882,7 +882,7 @@ def test_required_privileges(): instance.query("CREATE USER u1") backup_name = new_backup_name() - expected_error = "necessary to have grant BACKUP ON test.table" + expected_error = "necessary to have the grant BACKUP ON test.table" assert expected_error in instance.query_and_get_error( f"BACKUP TABLE test.table TO {backup_name}", user="u1" ) @@ -890,12 +890,12 @@ def test_required_privileges(): instance.query("GRANT BACKUP ON test.table TO u1") instance.query(f"BACKUP TABLE test.table TO {backup_name}", user="u1") - expected_error = "necessary to have grant INSERT, CREATE TABLE ON test.table" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON test.table" assert expected_error in instance.query_and_get_error( f"RESTORE TABLE test.table FROM {backup_name}", user="u1" ) - expected_error = "necessary to have grant INSERT, CREATE TABLE ON test.table2" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON test.table2" assert expected_error in instance.query_and_get_error( f"RESTORE TABLE test.table AS test.table2 FROM {backup_name}", user="u1" ) @@ -907,7 +907,7 @@ def test_required_privileges(): instance.query("DROP TABLE test.table") - expected_error = "necessary to have grant INSERT, CREATE TABLE ON test.table" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON test.table" assert expected_error in instance.query_and_get_error( f"RESTORE ALL FROM {backup_name}", user="u1" ) @@ -1014,14 +1014,14 @@ def test_system_users_required_privileges(): backup_name = new_backup_name() - expected_error = "necessary to have grant BACKUP ON system.users" + expected_error = "necessary to have the grant BACKUP ON system.users" assert expected_error in instance.query_and_get_error( f"BACKUP TABLE system.users, TABLE system.roles TO {backup_name}", user="u2" ) instance.query("GRANT BACKUP ON system.users TO u2") - expected_error = "necessary to have grant BACKUP ON system.roles" + expected_error = "necessary to have the grant BACKUP ON system.roles" assert expected_error in instance.query_and_get_error( f"BACKUP TABLE system.users, TABLE system.roles TO {backup_name}", user="u2" ) @@ -1035,7 +1035,7 @@ def test_system_users_required_privileges(): instance.query("DROP ROLE r1") expected_error = ( - "necessary to have grant CREATE USER, CREATE ROLE, ROLE ADMIN ON *.*" + "necessary to have the grant CREATE USER, CREATE ROLE, ROLE ADMIN ON *.*" ) assert expected_error in instance.query_and_get_error( f"RESTORE ALL FROM {backup_name}", user="u2" @@ -1043,7 +1043,7 @@ def test_system_users_required_privileges(): instance.query("GRANT CREATE USER, CREATE ROLE, ROLE ADMIN ON *.* TO u2") - expected_error = "necessary to have grant SELECT ON test.* WITH GRANT OPTION" + expected_error = "necessary to have the grant SELECT ON test.* WITH GRANT OPTION" assert expected_error in instance.query_and_get_error( f"RESTORE ALL FROM {backup_name}", user="u2" ) diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 6af3a7dbab8..a9ebbeb66c2 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -561,7 +561,7 @@ def test_required_privileges(): node1.query("GRANT CLUSTER ON *.* TO u1") backup_name = new_backup_name() - expected_error = "necessary to have grant BACKUP ON default.tbl" + expected_error = "necessary to have the grant BACKUP ON default.tbl" assert expected_error in node1.query_and_get_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}", user="u1" ) @@ -571,7 +571,7 @@ def test_required_privileges(): node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC") - expected_error = "necessary to have grant INSERT, CREATE TABLE ON default.tbl2" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON default.tbl2" assert expected_error in node1.query_and_get_error( f"RESTORE TABLE tbl AS tbl2 ON CLUSTER 'cluster' FROM {backup_name}", user="u1" ) @@ -580,19 +580,21 @@ def test_required_privileges(): node1.query( f"RESTORE TABLE tbl AS tbl2 ON CLUSTER 'cluster' FROM {backup_name}", user="u1" ) + node2.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl2") assert node2.query("SELECT * FROM tbl2") == "100\n" node1.query(f"DROP TABLE tbl2 ON CLUSTER 'cluster' SYNC") node1.query("REVOKE ALL FROM u1") - expected_error = "necessary to have grant INSERT, CREATE TABLE ON default.tbl" + expected_error = "necessary to have the grant INSERT, CREATE TABLE ON default.tbl" assert expected_error in node1.query_and_get_error( f"RESTORE ALL ON CLUSTER 'cluster' FROM {backup_name}", user="u1" ) node1.query("GRANT INSERT, CREATE TABLE ON tbl TO u1") node1.query(f"RESTORE ALL ON CLUSTER 'cluster' FROM {backup_name}", user="u1") + node2.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl") assert node2.query("SELECT * FROM tbl") == "100\n" @@ -605,7 +607,7 @@ def test_system_users(): node1.query("CREATE USER u2 SETTINGS allow_backup=false") node1.query("GRANT CLUSTER ON *.* TO u2") - expected_error = "necessary to have grant BACKUP ON system.users" + expected_error = "necessary to have the grant BACKUP ON system.users" assert expected_error in node1.query_and_get_error( f"BACKUP TABLE system.users ON CLUSTER 'cluster' TO {backup_name}", user="u2" ) @@ -617,14 +619,16 @@ def test_system_users(): node1.query("DROP USER u1") - expected_error = "necessary to have grant CREATE USER ON *.*" + expected_error = "necessary to have the grant CREATE USER ON *.*" assert expected_error in node1.query_and_get_error( f"RESTORE TABLE system.users ON CLUSTER 'cluster' FROM {backup_name}", user="u2" ) node1.query("GRANT CREATE USER ON *.* TO u2") - expected_error = "necessary to have grant SELECT ON default.tbl WITH GRANT OPTION" + expected_error = ( + "necessary to have the grant SELECT ON default.tbl WITH GRANT OPTION" + ) assert expected_error in node1.query_and_get_error( f"RESTORE TABLE system.users ON CLUSTER 'cluster' FROM {backup_name}", user="u2" ) diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index d0ce2e03016..c9f20333654 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -5,6 +5,7 @@ import time import concurrent from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV, assert_eq_with_retry +import re cluster = ClickHouseCluster(__file__) @@ -110,6 +111,73 @@ def create_and_fill_table(): nodes[i].query(f"INSERT INTO tbl SELECT number FROM numbers(40000000)") +def wait_for_fail_backup(node, backup_id, backup_name): + expected_errors = [ + "Concurrent backups not supported", + f"Backup {backup_name} already exists", + ] + status = node.query( + f"SELECT status FROM system.backups WHERE id == '{backup_id}'" + ).rstrip("\n") + # It is possible that the second backup was picked up first, and then the async backup + if status == "BACKUP_FAILED": + error = node.query( + f"SELECT error FROM system.backups WHERE id == '{backup_id}'" + ).rstrip("\n") + assert any([expected_error in error for expected_error in expected_errors]) + return + elif status == "CREATING_BACKUP": + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id = '{backup_id}'", + "BACKUP_FAILED", + sleep_time=2, + retry_count=50, + ) + error = node.query( + f"SELECT error FROM system.backups WHERE id == '{backup_id}'" + ).rstrip("\n") + assert re.search(f"Backup {backup_name} already exists", error) + return + else: + assert False, "Concurrent backups both passed, when one is expected to fail" + + +def wait_for_fail_restore(node, restore_id): + expected_errors = [ + "Concurrent restores not supported", + "Cannot restore the table default.tbl because it already contains some data", + ] + status = node.query( + f"SELECT status FROM system.backups WHERE id == '{restore_id}'" + ).rstrip("\n") + # It is possible that the second backup was picked up first, and then the async backup + if status == "RESTORE_FAILED": + error = node.query( + f"SELECT error FROM system.backups WHERE id == '{restore_id}'" + ).rstrip("\n") + assert any([expected_error in error for expected_error in expected_errors]) + return + elif status == "RESTORING": + assert_eq_with_retry( + node, + f"SELECT status FROM system.backups WHERE id = '{backup_id}'", + "RESTORE_FAILED", + sleep_time=2, + retry_count=50, + ) + error = node.query( + f"SELECT error FROM system.backups WHERE id == '{backup_id}'" + ).rstrip("\n") + assert re.search( + "Cannot restore the table default.tbl because it already contains some data", + error, + ) + return + else: + assert False, "Concurrent restores both passed, when one is expected to fail" + + # All the tests have concurrent backup/restores with same backup names # The same works with different backup names too. Since concurrency # check comes before backup name check, separate tests are not added for different names @@ -133,13 +201,17 @@ def test_concurrent_backups_on_same_node(): ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - error = nodes[0].query_and_get_error( + result, error = nodes[0].query_and_get_answer_with_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) + expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", ] + if not error: + wait_for_fail_backup(nodes[0], id, backup_name) + assert any([expected_error in error for expected_error in expected_errors]) assert_eq_with_retry( @@ -179,13 +251,18 @@ def test_concurrent_backups_on_different_nodes(): ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - error = nodes[0].query_and_get_error( + result, error = nodes[0].query_and_get_answer_with_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) + expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", ] + + if not error: + wait_for_fail_backup(nodes[1], id, backup_name) + assert any([expected_error in error for expected_error in expected_errors]) assert_eq_with_retry( @@ -224,13 +301,18 @@ def test_concurrent_restores_on_same_node(): ) assert status in ["RESTORING", "RESTORED"] - error = nodes[0].query_and_get_error( + result, error = nodes[0].query_and_get_answer_with_error( f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" ) + expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", ] + + if not error: + wait_for_fail_restore(nodes[0], restore_id) + assert any([expected_error in error for expected_error in expected_errors]) assert_eq_with_retry( @@ -269,13 +351,18 @@ def test_concurrent_restores_on_different_node(): ) assert status in ["RESTORING", "RESTORED"] - error = nodes[1].query_and_get_error( + result, error = nodes[1].query_and_get_answer_with_error( f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" ) + expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", ] + + if not error: + wait_for_fail_restore(nodes[0], restore_id) + assert any([expected_error in error for expected_error in expected_errors]) assert_eq_with_retry( diff --git a/tests/integration/test_backup_restore_s3/configs/disk_s3.xml b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml index c1fd059bc67..d635e39e13f 100644 --- a/tests/integration/test_backup_restore_s3/configs/disk_s3.xml +++ b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml @@ -21,6 +21,13 @@ minio123 33554432 + + cache + disk_s3 + /tmp/s3_cache/ + 1000000000 + 1 +
@@ -37,11 +44,19 @@ + + +
+ disk_s3_cache +
+
+
default disk_s3 disk_s3_plain + disk_s3_cache diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 0285500d044..f8ec39d240b 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -2,6 +2,7 @@ from typing import Dict, Iterable import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV +import uuid cluster = ClickHouseCluster(__file__) @@ -37,51 +38,64 @@ def new_backup_name(): return f"backup{backup_id_counter}" -def get_events(events_names: Iterable[str]) -> Dict[str, int]: - _events = TSV( +def get_events_for_query(query_id: str) -> Dict[str, int]: + events = TSV( node.query( - f"SELECT event, value FROM system.events WHERE event in {events_names} SETTINGS system_events_show_zero_values = 1;" + f""" + SYSTEM FLUSH LOGS; + + WITH arrayJoin(ProfileEvents) as pe + SELECT pe.1, pe.2 + FROM system.query_log + WHERE query_id = '{query_id}' + """ ) ) return { event: int(value) - for event, value in [line.split("\t") for line in _events.lines] + for event, value in [line.split("\t") for line in events.lines] } +def format_settings(settings): + if not settings: + return "" + return "SETTINGS " + ",".join(f"{k}={v}" for k, v in settings.items()) + + def check_backup_and_restore( - storage_policy, backup_destination, size=1000, backup_name=None, check_events=False + storage_policy, + backup_destination, + size=1000, + backup_settings=None, + restore_settings=None, + insert_settings=None, + optimize_table=True, ): - s3_backup_events = ( - "WriteBufferFromS3Microseconds", - "WriteBufferFromS3Bytes", - "WriteBufferFromS3RequestsErrors", - ) - s3_restore_events = ( - "ReadBufferFromS3Microseconds", - "ReadBufferFromS3Bytes", - "ReadBufferFromS3RequestsErrors", - ) + optimize_table_query = "OPTIMIZE TABLE data FINAL;" if optimize_table else "" node.query( f""" DROP TABLE IF EXISTS data SYNC; CREATE TABLE data (key Int, value String, array Array(String)) Engine=MergeTree() ORDER BY tuple() SETTINGS storage_policy='{storage_policy}'; - INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT {size}; - OPTIMIZE TABLE data FINAL; + INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT {size} {format_settings(insert_settings)}; + {optimize_table_query} """ ) + try: - events_before_backups = get_events(s3_backup_events) - node.query(f"BACKUP TABLE data TO {backup_destination}") - events_after_backups = get_events(s3_backup_events) - events_before_restore = get_events(s3_restore_events) + backup_query_id = uuid.uuid4().hex + node.query( + f"BACKUP TABLE data TO {backup_destination} {format_settings(backup_settings)}", + query_id=backup_query_id, + ) + restore_query_id = uuid.uuid4().hex node.query( f""" - RESTORE TABLE data AS data_restored FROM {backup_destination}; - """ + RESTORE TABLE data AS data_restored FROM {backup_destination} {format_settings(restore_settings)}; + """, + query_id=restore_query_id, ) - events_after_restore = get_events(s3_restore_events) node.query( """ SELECT throwIf( @@ -91,55 +105,10 @@ def check_backup_and_restore( ); """ ) - if check_events and backup_name: - objects = node.cluster.minio_client.list_objects( - "root", f"data/backups/multipart/{backup_name}/" - ) - backup_meta_size = 0 - for obj in objects: - if ".backup" in obj.object_name: - backup_meta_size = obj.size - break - backup_total_size = int( - node.query( - f"SELECT sum(total_size) FROM system.backups WHERE status = 'BACKUP_CREATED' AND name like '%{backup_name}%'" - ).strip() - ) - restore_total_size = int( - node.query( - f"SELECT sum(total_size) FROM system.backups WHERE status = 'RESTORED' AND name like '%{backup_name}%'" - ).strip() - ) - # backup - # NOTE: ~35 bytes is used by .lock file, so set up 100 bytes to avoid flaky test - assert ( - abs( - backup_total_size - - ( - events_after_backups["WriteBufferFromS3Bytes"] - - events_before_backups["WriteBufferFromS3Bytes"] - - backup_meta_size - ) - ) - < 100 - ) - assert ( - events_after_backups["WriteBufferFromS3Microseconds"] - > events_before_backups["WriteBufferFromS3Microseconds"] - ) - assert events_after_backups["WriteBufferFromS3RequestsErrors"] == 0 - # restore - assert ( - events_after_restore["ReadBufferFromS3Bytes"] - - events_before_restore["ReadBufferFromS3Bytes"] - - backup_meta_size - == restore_total_size - ) - assert ( - events_after_restore["ReadBufferFromS3Microseconds"] - > events_before_restore["ReadBufferFromS3Microseconds"] - ) - assert events_after_restore["ReadBufferFromS3RequestsErrors"] == 0 + return [ + get_events_for_query(backup_query_id), + get_events_for_query(restore_query_id), + ] finally: node.query( """ @@ -158,6 +127,7 @@ def check_system_tables(): expected_disks = ( ("default", "local"), ("disk_s3", "s3"), + ("disk_s3_cache", "s3"), ("disk_s3_other_bucket", "s3"), ("disk_s3_plain", "s3_plain"), ) @@ -224,17 +194,62 @@ def test_backup_to_s3_multipart(): storage_policy = "default" backup_name = new_backup_name() backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart/{backup_name}', 'minio', 'minio123')" - check_backup_and_restore( + (backup_events, restore_events) = check_backup_and_restore( storage_policy, backup_destination, size=1000000, - backup_name=backup_name, - check_events=True, ) assert node.contains_in_log( f"copyDataToS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}" ) + s3_backup_events = ( + "WriteBufferFromS3Microseconds", + "WriteBufferFromS3Bytes", + "WriteBufferFromS3RequestsErrors", + ) + s3_restore_events = ( + "ReadBufferFromS3Microseconds", + "ReadBufferFromS3Bytes", + "ReadBufferFromS3RequestsErrors", + ) + + objects = node.cluster.minio_client.list_objects( + "root", f"data/backups/multipart/{backup_name}/" + ) + backup_meta_size = 0 + for obj in objects: + if ".backup" in obj.object_name: + backup_meta_size = obj.size + break + backup_total_size = int( + node.query( + f"SELECT sum(total_size) FROM system.backups WHERE status = 'BACKUP_CREATED' AND name like '%{backup_name}%'" + ).strip() + ) + restore_total_size = int( + node.query( + f"SELECT sum(total_size) FROM system.backups WHERE status = 'RESTORED' AND name like '%{backup_name}%'" + ).strip() + ) + # backup + # NOTE: ~35 bytes is used by .lock file, so set up 100 bytes to avoid flaky test + assert ( + abs( + backup_total_size + - (backup_events["WriteBufferFromS3Bytes"] - backup_meta_size) + ) + < 100 + ) + assert backup_events["WriteBufferFromS3Microseconds"] > 0 + assert "WriteBufferFromS3RequestsErrors" not in backup_events + # restore + assert ( + restore_events["ReadBufferFromS3Bytes"] - backup_meta_size == restore_total_size + ) + assert restore_events["ReadBufferFromS3Microseconds"] > 0 + assert "ReadBufferFromS3RequestsErrors" not in restore_events + def test_backup_to_s3_native_copy(): storage_policy = "policy_s3" @@ -242,9 +257,12 @@ def test_backup_to_s3_native_copy(): backup_destination = ( f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" ) - check_backup_and_restore(storage_policy, backup_destination) - assert node.contains_in_log("BackupWriterS3.*using native copy") - assert node.contains_in_log("BackupReaderS3.*using native copy") + (backup_events, restore_events) = check_backup_and_restore( + storage_policy, backup_destination + ) + # single part upload + assert backup_events["S3CopyObject"] > 0 + assert restore_events["S3CopyObject"] > 0 assert node.contains_in_log( f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" ) @@ -256,9 +274,12 @@ def test_backup_to_s3_native_copy_other_bucket(): backup_destination = ( f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" ) - check_backup_and_restore(storage_policy, backup_destination) - assert node.contains_in_log("BackupWriterS3.*using native copy") - assert node.contains_in_log("BackupReaderS3.*using native copy") + (backup_events, restore_events) = check_backup_and_restore( + storage_policy, backup_destination + ) + # single part upload + assert backup_events["S3CopyObject"] > 0 + assert restore_events["S3CopyObject"] > 0 assert node.contains_in_log( f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" ) @@ -268,9 +289,12 @@ def test_backup_to_s3_native_copy_multipart(): storage_policy = "policy_s3" backup_name = new_backup_name() backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart/{backup_name}', 'minio', 'minio123')" - check_backup_and_restore(storage_policy, backup_destination, size=1000000) - assert node.contains_in_log("BackupWriterS3.*using native copy") - assert node.contains_in_log("BackupReaderS3.*using native copy") + (backup_events, restore_events) = check_backup_and_restore( + storage_policy, backup_destination, size=1000000 + ) + # multi part upload + assert backup_events["S3CreateMultipartUpload"] > 0 + assert restore_events["S3CreateMultipartUpload"] > 0 assert node.contains_in_log( f"copyS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}/" ) @@ -301,3 +325,77 @@ def test_incremental_backup_append_table_def(): assert node.query("SELECT count(), sum(x) FROM data") == "100\t4950\n" assert "parts_to_throw_insert = 100" in node.query("SHOW CREATE TABLE data") + + +@pytest.mark.parametrize( + "in_cache_initially, allow_backup_read_cache, allow_s3_native_copy", + [ + (False, True, False), + (True, False, False), + (True, True, False), + (True, True, True), + ], +) +def test_backup_with_fs_cache( + in_cache_initially, allow_backup_read_cache, allow_s3_native_copy +): + storage_policy = "policy_s3_cache" + + backup_name = new_backup_name() + backup_destination = ( + f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" + ) + + insert_settings = { + "enable_filesystem_cache_on_write_operations": int(in_cache_initially) + } + + backup_settings = { + "read_from_filesystem_cache": int(allow_backup_read_cache), + "allow_s3_native_copy": int(allow_s3_native_copy), + } + + restore_settings = {"allow_s3_native_copy": int(allow_s3_native_copy)} + + backup_events, restore_events = check_backup_and_restore( + storage_policy, + backup_destination, + size=10, + insert_settings=insert_settings, + optimize_table=False, + backup_settings=backup_settings, + restore_settings=restore_settings, + ) + + # print(f"backup_events = {backup_events}") + # print(f"restore_events = {restore_events}") + + # BACKUP never updates the filesystem cache but it may read it if `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` allows that. + if allow_backup_read_cache and in_cache_initially: + assert backup_events["CachedReadBufferReadFromCacheBytes"] > 0 + assert not "CachedReadBufferReadFromSourceBytes" in backup_events + elif allow_backup_read_cache: + assert not "CachedReadBufferReadFromCacheBytes" in backup_events + assert backup_events["CachedReadBufferReadFromSourceBytes"] > 0 + else: + assert not "CachedReadBufferReadFromCacheBytes" in backup_events + assert not "CachedReadBufferReadFromSourceBytes" in backup_events + + assert not "CachedReadBufferCacheWriteBytes" in backup_events + assert not "CachedWriteBufferCacheWriteBytes" in backup_events + + # RESTORE doesn't use the filesystem cache during write operations. + # However while attaching parts it may use the cache while reading such files as "columns.txt" or "checksums.txt" or "primary.idx", + # see IMergeTreeDataPart::loadColumnsChecksumsIndexes() + if "CachedReadBufferReadFromSourceBytes" in restore_events: + assert ( + restore_events["CachedReadBufferReadFromSourceBytes"] + == restore_events["CachedReadBufferCacheWriteBytes"] + ) + + assert not "CachedReadBufferReadFromCacheBytes" in restore_events + + # "format_version.txt" is written when a table is created, + # see MergeTreeData::initializeDirectoriesAndFormatVersion() + if "CachedWriteBufferCacheWriteBytes" in restore_events: + assert restore_events["CachedWriteBufferCacheWriteBytes"] <= 1 diff --git a/tests/integration/test_storage_nats/__init__.py b/tests/integration/test_backup_restore_storage_policy/__init__.py similarity index 100% rename from tests/integration/test_storage_nats/__init__.py rename to tests/integration/test_backup_restore_storage_policy/__init__.py diff --git a/tests/integration/test_backup_restore_storage_policy/configs/storage_config.xml b/tests/integration/test_backup_restore_storage_policy/configs/storage_config.xml new file mode 100644 index 00000000000..de9f68cbe87 --- /dev/null +++ b/tests/integration/test_backup_restore_storage_policy/configs/storage_config.xml @@ -0,0 +1,33 @@ + + + + + local + /var/lib/disks/one/ + + + local + /var/lib/disks/two/ + + + + + + + one + + + + + + + two + + + + + + + /backups + + diff --git a/tests/integration/test_backup_restore_storage_policy/test.py b/tests/integration/test_backup_restore_storage_policy/test.py new file mode 100644 index 00000000000..4e9d309a220 --- /dev/null +++ b/tests/integration/test_backup_restore_storage_policy/test.py @@ -0,0 +1,76 @@ +import pytest +from helpers.cluster import ClickHouseCluster + + +backup_id_counter = 0 + +cluster = ClickHouseCluster(__file__) + +instance = cluster.add_instance( + "instance", + main_configs=["configs/storage_config.xml"], +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def cleanup_after_test(): + try: + yield + finally: + instance.query("DROP DATABASE IF EXISTS test") + + +def new_backup_name(): + global backup_id_counter + backup_id_counter += 1 + return f"File('/backups/{backup_id_counter}/')" + + +def create_table_backup(backup_name, storage_policy=None): + instance.query("CREATE DATABASE test") + create_query = "CREATE TABLE test.table(x UInt32) ENGINE=MergeTree ORDER BY x" + if storage_policy is not None: + create_query += f" SETTINGS storage_policy = '{storage_policy}'" + instance.query(create_query) + instance.query(f"INSERT INTO test.table SELECT number FROM numbers(10)") + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + instance.query("DROP TABLE test.table SYNC") + + +def restore_table(backup_name, storage_policy=None): + restore_query = f"RESTORE TABLE test.table FROM {backup_name}" + if storage_policy is not None: + restore_query += f" SETTINGS storage_policy = '{storage_policy}'" + instance.query(restore_query) + + +@pytest.mark.parametrize( + "origin_policy, restore_policy, expected_policy", + [ + (None, "", "default"), + (None, None, "default"), + (None, "policy1", "policy1"), + ("policy1", "policy1", "policy1"), + ("policy1", "policy2", "policy2"), + ("policy1", "", "default"), + ("policy1", None, "policy1"), + ], +) +def test_storage_policies(origin_policy, restore_policy, expected_policy): + backup_name = new_backup_name() + create_table_backup(backup_name, origin_policy) + restore_table(backup_name, restore_policy) + + assert ( + instance.query("SELECT storage_policy FROM system.tables WHERE name='table'") + == f"{expected_policy}\n" + ) diff --git a/tests/integration/test_storage_postgresql_replica/__init__.py b/tests/integration/test_backup_s3_storage_class/__init__.py similarity index 100% rename from tests/integration/test_storage_postgresql_replica/__init__.py rename to tests/integration/test_backup_s3_storage_class/__init__.py diff --git a/tests/integration/test_backup_s3_storage_class/test.py b/tests/integration/test_backup_s3_storage_class/test.py new file mode 100644 index 00000000000..2b11f20afc6 --- /dev/null +++ b/tests/integration/test_backup_s3_storage_class/test.py @@ -0,0 +1,47 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + stay_alive=True, + with_minio=True, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_backup_s3_storage_class(started_cluster): + node.query( + """ + CREATE TABLE test_s3_storage_class + ( + `id` UInt64, + `value` String + ) + ENGINE = MergeTree + ORDER BY id; + """, + ) + node.query( + """ + INSERT INTO test_s3_storage_class VALUES (1, 'a'); + """, + ) + result = node.query( + """ + BACKUP TABLE test_s3_storage_class TO S3('http://minio1:9001/root/data', 'minio', 'minio123') + SETTINGS s3_storage_class='STANDARD'; + """ + ) + + minio = cluster.minio_client + lst = list(minio.list_objects(cluster.minio_bucket, "data/.backup")) + assert lst[0].storage_class == "STANDARD" diff --git a/tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml b/tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml new file mode 100644 index 00000000000..c69be846c46 --- /dev/null +++ b/tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml @@ -0,0 +1,5 @@ + + + 0 + + diff --git a/tests/integration/test_backward_compatibility/test.py b/tests/integration/test_backward_compatibility/test.py index ea1d3ab9c07..6f21b184a95 100644 --- a/tests/integration/test_backward_compatibility/test.py +++ b/tests/integration/test_backward_compatibility/test.py @@ -10,11 +10,13 @@ node1 = cluster.add_instance( tag="19.17.8.54", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", main_configs=["configs/wide_parts_only.xml", "configs/no_compress_marks.xml"], with_zookeeper=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py b/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py index 01c9736c354..cf258987cbf 100644 --- a/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py +++ b/tests/integration/test_backward_compatibility/test_aggregate_fixed_key.py @@ -9,9 +9,10 @@ node1 = cluster.add_instance( image="yandex/clickhouse-server", tag="21.3", with_installed_binary=True, + allow_analyzer=False, ) -node2 = cluster.add_instance("node2", with_zookeeper=True) -node3 = cluster.add_instance("node3", with_zookeeper=True) +node2 = cluster.add_instance("node2", with_zookeeper=True, allow_analyzer=False) +node3 = cluster.add_instance("node3", with_zookeeper=True, allow_analyzer=False) @pytest.fixture(scope="module") diff --git a/tests/integration/test_backward_compatibility/test_aggregate_function_state.py b/tests/integration/test_backward_compatibility/test_aggregate_function_state.py index 1f6d405603a..3a936239cc8 100644 --- a/tests/integration/test_backward_compatibility/test_aggregate_function_state.py +++ b/tests/integration/test_backward_compatibility/test_aggregate_function_state.py @@ -10,6 +10,7 @@ node1 = cluster.add_instance( tag="19.16.9.37", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -18,9 +19,10 @@ node2 = cluster.add_instance( tag="19.16.9.37", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) -node3 = cluster.add_instance("node3", with_zookeeper=False) -node4 = cluster.add_instance("node4", with_zookeeper=False) +node3 = cluster.add_instance("node3", with_zookeeper=False, allow_analyzer=False) +node4 = cluster.add_instance("node4", with_zookeeper=False, allow_analyzer=False) @pytest.fixture(scope="module") diff --git a/tests/integration/test_backward_compatibility/test_convert_ordinary.py b/tests/integration/test_backward_compatibility/test_convert_ordinary.py index 8b1afd358eb..36facdd59b1 100644 --- a/tests/integration/test_backward_compatibility/test_convert_ordinary.py +++ b/tests/integration/test_backward_compatibility/test_convert_ordinary.py @@ -9,6 +9,7 @@ node = cluster.add_instance( stay_alive=True, with_zookeeper=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_cte_distributed.py b/tests/integration/test_backward_compatibility/test_cte_distributed.py index 7ea0d2d9f21..c68468aad75 100644 --- a/tests/integration/test_backward_compatibility/test_cte_distributed.py +++ b/tests/integration/test_backward_compatibility/test_cte_distributed.py @@ -3,7 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance("node1", with_zookeeper=False) +node1 = cluster.add_instance("node1", with_zookeeper=False, allow_analyzer=False) node2 = cluster.add_instance( "node2", with_zookeeper=False, @@ -11,6 +11,7 @@ node2 = cluster.add_instance( tag="21.7.3.14", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) @@ -31,7 +32,7 @@ WITH quantile(0.05)(cnt) as p05, quantile(0.95)(cnt) as p95, p95 - p05 as inter_percentile_range -SELECT +SELECT sum(cnt) as total_requests, count() as data_points, inter_percentile_range @@ -49,7 +50,7 @@ WITH quantile(0.05)(cnt) as p05, quantile(0.95)(cnt) as p95, p95 - p05 as inter_percentile_range -SELECT +SELECT sum(cnt) as total_requests, count() as data_points, inter_percentile_range diff --git a/tests/integration/test_backward_compatibility/test_data_skipping_indices.py b/tests/integration/test_backward_compatibility/test_data_skipping_indices.py index c65dc6d3841..46ab27d2ab0 100644 --- a/tests/integration/test_backward_compatibility/test_data_skipping_indices.py +++ b/tests/integration/test_backward_compatibility/test_data_skipping_indices.py @@ -12,6 +12,7 @@ node = cluster.add_instance( tag="21.6", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index afb19901e74..c86c3ba0ab2 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -9,7 +9,7 @@ from helpers.cluster import ClickHouseCluster from helpers.client import QueryRuntimeException cluster = ClickHouseCluster(__file__) -upstream = cluster.add_instance("upstream") +upstream = cluster.add_instance("upstream", allow_analyzer=False) backward = cluster.add_instance( "backward", image="clickhouse/clickhouse-server", @@ -19,6 +19,7 @@ backward = cluster.add_instance( # Affected at least: singleValueOrNull, last_value, min, max, any, anyLast, anyHeavy, first_value, argMin, argMax tag="22.6", with_installed_binary=True, + allow_analyzer=False, ) @@ -142,6 +143,7 @@ def test_string_functions(start_cluster): "position", "substring", "CAST", + "getTypeSerializationStreams", # NOTE: no need to ignore now()/now64() since they will fail because they don't accept any argument # 22.8 Backward Incompatible Change: Extended range of Date32 "toDate32OrZero", diff --git a/tests/integration/test_backward_compatibility/test_in_memory_parts_still_read.py b/tests/integration/test_backward_compatibility/test_in_memory_parts_still_read.py index d55f155918e..cd67f1f6344 100644 --- a/tests/integration/test_backward_compatibility/test_in_memory_parts_still_read.py +++ b/tests/integration/test_backward_compatibility/test_in_memory_parts_still_read.py @@ -12,6 +12,7 @@ node = cluster.add_instance( tag="23.4", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_insert_profile_events.py b/tests/integration/test_backward_compatibility/test_insert_profile_events.py index 0fd453e57d4..8564c6b5952 100644 --- a/tests/integration/test_backward_compatibility/test_insert_profile_events.py +++ b/tests/integration/test_backward_compatibility/test_insert_profile_events.py @@ -7,12 +7,13 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -upstream_node = cluster.add_instance("upstream_node") +upstream_node = cluster.add_instance("upstream_node", allow_analyzer=False) old_node = cluster.add_instance( "old_node", image="clickhouse/clickhouse-server", tag="22.5.1.2079", with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py b/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py index bb40dff27ac..04016755a24 100644 --- a/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py +++ b/tests/integration/test_backward_compatibility/test_ip_types_binary_compatibility.py @@ -10,6 +10,7 @@ node_22_6 = cluster.add_instance( tag="22.6", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py b/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py index d76c4eba409..96b41c81384 100644 --- a/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py +++ b/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py @@ -10,6 +10,7 @@ node1 = cluster.add_instance( tag="21.1", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -18,8 +19,9 @@ node2 = cluster.add_instance( tag="21.1", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) -node3 = cluster.add_instance("node3", with_zookeeper=False) +node3 = cluster.add_instance("node3", with_zookeeper=False, allow_analyzer=False) @pytest.fixture(scope="module") diff --git a/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py b/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py index fcdedd29dad..3cd708d5029 100644 --- a/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py +++ b/tests/integration/test_backward_compatibility/test_normalized_count_comparison.py @@ -3,7 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance("node1", with_zookeeper=False) +node1 = cluster.add_instance("node1", with_zookeeper=False, allow_analyzer=False) node2 = cluster.add_instance( "node2", with_zookeeper=False, @@ -11,6 +11,7 @@ node2 = cluster.add_instance( tag="21.7.2.7", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py b/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py index 8bdae54a889..7e10b6ab430 100644 --- a/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py +++ b/tests/integration/test_backward_compatibility/test_select_aggregate_alias_column.py @@ -3,7 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance("node1", with_zookeeper=False) +node1 = cluster.add_instance("node1", with_zookeeper=False, allow_analyzer=False) node2 = cluster.add_instance( "node2", with_zookeeper=False, @@ -11,6 +11,7 @@ node2 = cluster.add_instance( tag="21.7.2.7", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_backward_compatibility/test_short_strings_aggregation.py b/tests/integration/test_backward_compatibility/test_short_strings_aggregation.py index 17a7282b7b5..e4fda618031 100644 --- a/tests/integration/test_backward_compatibility/test_short_strings_aggregation.py +++ b/tests/integration/test_backward_compatibility/test_short_strings_aggregation.py @@ -10,6 +10,7 @@ node1 = cluster.add_instance( tag="19.16.9.37", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -18,8 +19,9 @@ node2 = cluster.add_instance( tag="19.16.9.37", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) -node3 = cluster.add_instance("node3", with_zookeeper=False) +node3 = cluster.add_instance("node3", with_zookeeper=False, allow_analyzer=False) @pytest.fixture(scope="module") diff --git a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py index 3d006caad0d..9c9d1a4d312 100644 --- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py +++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py @@ -11,12 +11,17 @@ node_old = cluster.add_instance( stay_alive=True, with_installed_binary=True, with_zookeeper=True, + allow_analyzer=False, ) node_new = cluster.add_instance( "node2", - main_configs=["configs/no_compress_marks.xml"], + main_configs=[ + "configs/no_compress_marks.xml", + "configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml", + ], with_zookeeper=True, stay_alive=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_broken_detached_part_clean_up/test.py b/tests/integration/test_broken_detached_part_clean_up/test.py index 9a70ebe0d48..bdf993ddedf 100644 --- a/tests/integration/test_broken_detached_part_clean_up/test.py +++ b/tests/integration/test_broken_detached_part_clean_up/test.py @@ -57,27 +57,30 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): ] ) - node.exec_in_container(["mkdir", f"{path_to_detached}../unexpected_all_42_1337_5"]) - node.exec_in_container( - [ - "touch", - "-t", - "1312031429.30", - f"{path_to_detached}../unexpected_all_42_1337_5", - ] - ) - result = node.exec_in_container( - ["stat", f"{path_to_detached}../unexpected_all_42_1337_5"] - ) - print(result) - assert "Modify: 2013-12-03" in result - node.exec_in_container( - [ - "mv", - f"{path_to_detached}../unexpected_all_42_1337_5", - f"{path_to_detached}unexpected_all_42_1337_5", - ] - ) + for name in [ + "unexpected_all_42_1337_5", + "deleting_all_123_456_7", + "covered-by-broken_all_12_34_5", + ]: + node.exec_in_container(["mkdir", f"{path_to_detached}../{name}"]) + node.exec_in_container( + [ + "touch", + "-t", + "1312031429.30", + f"{path_to_detached}../{name}", + ] + ) + result = node.exec_in_container(["stat", f"{path_to_detached}../{name}"]) + print(result) + assert "Modify: 2013-12-03" in result + node.exec_in_container( + [ + "mv", + f"{path_to_detached}../{name}", + f"{path_to_detached}{name}", + ] + ) result = node.query( f"CHECK TABLE {table}", settings={"check_query_single_value_result": 0} @@ -87,17 +90,20 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): node.query(f"DETACH TABLE {table}") node.query(f"ATTACH TABLE {table}") - result = node.exec_in_container(["ls", path_to_detached]) - print(result) - assert f"{expect_broken_prefix}_all_3_3_0" in result - assert "all_1_1_0" in result - assert "trash" in result - assert "broken_all_fake" in result - assert "unexpected_all_42_1337_5" in result - - time.sleep(15) - assert node.contains_in_log( - "Removed broken detached part unexpected_all_42_1337_5 due to a timeout" + node.wait_for_log_line( + "Removing detached part deleting_all_123_456_7", + timeout=90, + look_behind_lines=1000000, + ) + node.wait_for_log_line( + f"Removed broken detached part {expect_broken_prefix}_all_3_3_0 due to a timeout", + timeout=10, + look_behind_lines=1000000, + ) + node.wait_for_log_line( + "Removed broken detached part unexpected_all_42_1337_5 due to a timeout", + timeout=10, + look_behind_lines=1000000, ) result = node.exec_in_container(["ls", path_to_detached]) @@ -106,7 +112,16 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): assert "all_1_1_0" in result assert "trash" in result assert "broken_all_fake" in result + assert "covered-by-broken_all_12_34_5" in result assert "unexpected_all_42_1337_5" not in result + assert "deleting_all_123_456_7" not in result + + node.query( + f"ALTER TABLE {table} DROP DETACHED PART 'covered-by-broken_all_12_34_5'", + settings={"allow_drop_detached": 1}, + ) + result = node.exec_in_container(["ls", path_to_detached]) + assert "covered-by-broken_all_12_34_5" not in result node.query(f"DROP TABLE {table} SYNC") diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index a80ad93d53d..c40e2a31a8b 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -91,7 +91,7 @@ def get_counters(node, query_id, log_type="ExceptionWhileProcessing"): def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression): node = cluster.instances["node"] - broken_s3.setup_error_at_create_multi_part_upload() + broken_s3.setup_at_create_multi_part_upload() insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_FAIL_CREATE_MPU_{compression}" error = node.query_and_get_error( @@ -134,7 +134,7 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( node = cluster.instances["node"] broken_s3.setup_fake_multpartuploads() - broken_s3.setup_error_at_part_upload(count=1, after=2) + broken_s3.setup_at_part_upload(count=1, after=2) insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_FAIL_UPLOAD_PART_{compression}" error = node.query_and_get_error( @@ -165,3 +165,302 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload( assert count_create_multi_part_uploads == 1 assert count_upload_parts >= 2 assert count_s3_errors >= 2 + + +def test_when_s3_connection_refused_is_retried(cluster, broken_s3): + node = cluster.instances["node"] + + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_part_upload(count=3, after=2, action="connection_refused") + + insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED" + node.query( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_refused_at_write_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters( + node, insert_query_id, log_type="QueryFinish" + ) + assert count_create_multi_part_uploads == 1 + assert count_upload_parts == 39 + assert count_s3_errors == 3 + + broken_s3.setup_at_part_upload(count=1000, after=2, action="connection_refused") + insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED_1" + error = node.query_and_get_error( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_refused_at_write_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + assert "Code: 499" in error, error + assert ( + "Poco::Exception. Code: 1000, e.code() = 111, Connection refused" in error + ), error + + +@pytest.mark.parametrize("send_something", [True, False]) +def test_when_s3_connection_reset_by_peer_at_upload_is_retried( + cluster, broken_s3, send_something +): + node = cluster.instances["node"] + + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_part_upload( + count=3, + after=2, + action="connection_reset_by_peer", + action_args=["1"] if send_something else ["0"], + ) + + insert_query_id = ( + f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_UPLOAD_{send_something}" + ) + node.query( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters( + node, insert_query_id, log_type="QueryFinish" + ) + + assert count_create_multi_part_uploads == 1 + assert count_upload_parts == 39 + assert count_s3_errors == 3 + + broken_s3.setup_at_part_upload( + count=1000, + after=2, + action="connection_reset_by_peer", + action_args=["1"] if send_something else ["0"], + ) + insert_query_id = ( + f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_UPLOAD_{send_something}_1" + ) + error = node.query_and_get_error( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + assert "Code: 1000" in error, error + assert ( + "DB::Exception: Connection reset by peer." in error + or "DB::Exception: Poco::Exception. Code: 1000, e.code() = 104, Connection reset by peer" + in error + ), error + + +@pytest.mark.parametrize("send_something", [True, False]) +def test_when_s3_connection_reset_by_peer_at_create_mpu_retried( + cluster, broken_s3, send_something +): + node = cluster.instances["node"] + + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_create_multi_part_upload( + count=3, + after=0, + action="connection_reset_by_peer", + action_args=["1"] if send_something else ["0"], + ) + + insert_query_id = ( + f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_MULTIPARTUPLOAD_{send_something}" + ) + node.query( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_create_mpu_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters( + node, insert_query_id, log_type="QueryFinish" + ) + + assert count_create_multi_part_uploads == 1 + assert count_upload_parts == 39 + assert count_s3_errors == 3 + + broken_s3.setup_at_create_multi_part_upload( + count=1000, + after=0, + action="connection_reset_by_peer", + action_args=["1"] if send_something else ["0"], + ) + + insert_query_id = ( + f"TEST_WHEN_S3_CONNECTION_RESET_BY_PEER_AT_MULTIPARTUPLOAD_{send_something}_1" + ) + error = node.query_and_get_error( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_create_mpu_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=100, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + assert "Code: 1000" in error, error + assert ( + "DB::Exception: Connection reset by peer." in error + or "DB::Exception: Poco::Exception. Code: 1000, e.code() = 104, Connection reset by peer" + in error + ), error + + +def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): + node = cluster.instances["node"] + + broken_s3.setup_fake_multpartuploads() + broken_s3.setup_at_part_upload( + count=3, + after=2, + action="broken_pipe", + ) + + insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD" + node.query( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=1000000, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + count_create_multi_part_uploads, count_upload_parts, count_s3_errors = get_counters( + node, insert_query_id, log_type="QueryFinish" + ) + + assert count_create_multi_part_uploads == 1 + assert count_upload_parts == 7 + assert count_s3_errors == 3 + + broken_s3.setup_at_part_upload( + count=1000, + after=2, + action="broken_pipe", + ) + insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD_1" + error = node.query_and_get_error( + f""" + INSERT INTO + TABLE FUNCTION s3( + 'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried', + 'minio', 'minio123', + 'CSV', auto, 'none' + ) + SELECT + * + FROM system.numbers + LIMIT 1000000 + SETTINGS + s3_max_single_part_upload_size=100, + s3_min_upload_part_size=1000000, + s3_check_objects_after_upload=0 + """, + query_id=insert_query_id, + ) + + assert "Code: 1000" in error, error + assert ( + "DB::Exception: Poco::Exception. Code: 1000, e.code() = 32, I/O error: Broken pipe" + in error + ), error diff --git a/tests/integration/test_cluster_copier/test_two_nodes.py b/tests/integration/test_cluster_copier/test_two_nodes.py index 2b6fcf6cac2..1bd3561f24f 100644 --- a/tests/integration/test_cluster_copier/test_two_nodes.py +++ b/tests/integration/test_cluster_copier/test_two_nodes.py @@ -595,24 +595,3 @@ def execute_task(started_cluster, task, cmd_options): task.check() finally: zk.delete(task.zk_task_path, recursive=True) - - -# Tests -@pytest.mark.skip(reason="Too flaky :(") -def test_different_schema(started_cluster): - execute_task(started_cluster, TaskWithDifferentSchema(started_cluster), []) - - -@pytest.mark.skip(reason="Too flaky :(") -def test_ttl_columns(started_cluster): - execute_task(started_cluster, TaskTTL(started_cluster), []) - - -@pytest.mark.skip(reason="Too flaky :(") -def test_skip_index(started_cluster): - execute_task(started_cluster, TaskSkipIndex(started_cluster), []) - - -@pytest.mark.skip(reason="Too flaky :(") -def test_ttl_move_to_volume(started_cluster): - execute_task(started_cluster, TaskTTLMoveToVolume(started_cluster), []) diff --git a/tests/integration/test_compression_codec_read/test.py b/tests/integration/test_compression_codec_read/test.py index 38cd61e241d..b39e5147d38 100644 --- a/tests/integration/test_compression_codec_read/test.py +++ b/tests/integration/test_compression_codec_read/test.py @@ -11,6 +11,7 @@ node1 = cluster.add_instance( tag="20.8.11.17", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_concurrent_ttl_merges/configs/users.xml b/tests/integration/test_concurrent_ttl_merges/configs/users.xml new file mode 100644 index 00000000000..b0990ca3a60 --- /dev/null +++ b/tests/integration/test_concurrent_ttl_merges/configs/users.xml @@ -0,0 +1,7 @@ + + + + 0 + + + diff --git a/tests/integration/test_concurrent_ttl_merges/test.py b/tests/integration/test_concurrent_ttl_merges/test.py index 07e91dcbc9f..3a3981d65ba 100644 --- a/tests/integration/test_concurrent_ttl_merges/test.py +++ b/tests/integration/test_concurrent_ttl_merges/test.py @@ -7,10 +7,16 @@ from helpers.test_tools import assert_eq_with_retry, TSV cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/fast_background_pool.xml"], with_zookeeper=True + "node1", + main_configs=["configs/fast_background_pool.xml"], + user_configs=["configs/users.xml"], + with_zookeeper=True, ) node2 = cluster.add_instance( - "node2", main_configs=["configs/fast_background_pool.xml"], with_zookeeper=True + "node2", + main_configs=["configs/fast_background_pool.xml"], + user_configs=["configs/users.xml"], + with_zookeeper=True, ) @@ -97,10 +103,6 @@ def test_no_ttl_merges_in_busy_pool(started_cluster): rows_count.append(int(node1.query("SELECT count() FROM test_ttl").strip())) time.sleep(0.5) - # at least several seconds we didn't run any TTL merges and rows count equal - # to the original value - assert sum([1 for count in rows_count if count == 30]) > 4 - assert_eq_with_retry(node1, "SELECT COUNT() FROM test_ttl", "0") node1.query("DROP TABLE test_ttl SYNC") diff --git a/tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.reference b/tests/integration/test_config_decryption/__init__.py similarity index 100% rename from tests/queries/0_stateless/02726_set_allow_experimental_query_cache_as_obsolete.reference rename to tests/integration/test_config_decryption/__init__.py diff --git a/tests/integration/test_config_decryption/configs/config.xml b/tests/integration/test_config_decryption/configs/config.xml new file mode 100644 index 00000000000..4b0d3a77659 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config.xml @@ -0,0 +1,15 @@ + + + + + 00112233445566778899aabbccddeeff + + + 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + + diff --git a/tests/integration/test_config_decryption/configs/config.yaml b/tests/integration/test_config_decryption/configs/config.yaml new file mode 100644 index 00000000000..1b20b65b652 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config.yaml @@ -0,0 +1,13 @@ +encryption_codecs: + aes_128_gcm_siv: + key_hex: 00112233445566778899aabbccddeeff + aes_256_gcm_siv: + key_hex: 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + +max_table_size_to_drop: + '#text': 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + '@encrypted_by': AES_128_GCM_SIV + +max_partition_size_to_drop: + '@encrypted_by': AES_256_GCM_SIV + '#text': 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 diff --git a/tests/integration/test_config_decryption/configs/config_invalid_chars.xml b/tests/integration/test_config_decryption/configs/config_invalid_chars.xml new file mode 100644 index 00000000000..53345b897dc --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_invalid_chars.xml @@ -0,0 +1,16 @@ + + + + + 00112233445566778899aabbccddeeff + + + 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + + + + + --96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + + diff --git a/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml b/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml new file mode 100644 index 00000000000..830c75f7378 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml @@ -0,0 +1,7 @@ + + + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + + diff --git a/tests/integration/test_config_decryption/configs/config_subnodes.xml b/tests/integration/test_config_decryption/configs/config_subnodes.xml new file mode 100644 index 00000000000..8213270f747 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_subnodes.xml @@ -0,0 +1,14 @@ + + + + + 00112233445566778899aabbccddeeff + + + + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + + + diff --git a/tests/integration/test_config_decryption/configs/config_wrong_method.xml b/tests/integration/test_config_decryption/configs/config_wrong_method.xml new file mode 100644 index 00000000000..b96c13d5105 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_wrong_method.xml @@ -0,0 +1,15 @@ + + + + + 00112233445566778899aabbccddeeff + + + 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + + diff --git a/tests/integration/test_config_decryption/test.py b/tests/integration/test_config_decryption/test.py new file mode 100644 index 00000000000..dd8cdc2e4e1 --- /dev/null +++ b/tests/integration/test_config_decryption/test.py @@ -0,0 +1,40 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance("node1", main_configs=["configs/config.xml"]) +node2 = cluster.add_instance("node2", main_configs=["configs/config.yaml"]) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def check_node(started_cluster, node): + assert ( + node.query( + "select value from system.server_settings where name ='max_table_size_to_drop'" + ) + == "60000000000\n" + ) + assert ( + node.query( + "select value from system.server_settings where name ='max_partition_size_to_drop'" + ) + == "40000000000\n" + ) + + +def test_successful_decryption_xml(started_cluster): + check_node(started_cluster, node1) + + +def test_successful_decryption_yaml(started_cluster): + check_node(started_cluster, node2) diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py new file mode 100644 index 00000000000..c6987d12324 --- /dev/null +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -0,0 +1,37 @@ +import pytest +from helpers.cluster import ClickHouseCluster + + +def start_clickhouse(config, err_msg): + cluster = ClickHouseCluster(__file__) + node = cluster.add_instance("node", main_configs=[config]) + caught_exception = "" + try: + cluster.start() + except Exception as e: + caught_exception = str(e) + assert err_msg in caught_exception + + +def test_wrong_method(): + start_clickhouse( + "configs/config_wrong_method.xml", "Unknown encryption method. Got WRONG" + ) + + +def test_invalid_chars(): + start_clickhouse( + "configs/config_invalid_chars.xml", + "Cannot read encrypted text, check for valid characters", + ) + + +def test_no_encryption_key(): + start_clickhouse( + "configs/config_no_encryption_key.xml", + "There is no key 0 in config for AES_128_GCM_SIV encryption codec", + ) + + +def test_subnodes(): + start_clickhouse("configs/config_subnodes.xml", "cannot contain nested elements") diff --git a/tests/integration/test_crash_log/__init__.py b/tests/integration/test_crash_log/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_crash_log/configs/crash_log.xml b/tests/integration/test_crash_log/configs/crash_log.xml new file mode 100644 index 00000000000..f4fbfaba08e --- /dev/null +++ b/tests/integration/test_crash_log/configs/crash_log.xml @@ -0,0 +1,16 @@ + + + 1000000 + 1 + 1 + 1 + true + + + 1000000 + 100 + 100 + 100 + true + + diff --git a/tests/integration/test_crash_log/test.py b/tests/integration/test_crash_log/test.py new file mode 100644 index 00000000000..a5b82039a84 --- /dev/null +++ b/tests/integration/test_crash_log/test.py @@ -0,0 +1,74 @@ +import os +import time +import pytest + +import helpers.cluster +import helpers.test_tools + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +@pytest.fixture(scope="module") +def started_node(): + cluster = helpers.cluster.ClickHouseCluster(__file__) + try: + node = cluster.add_instance( + "node", main_configs=["configs/crash_log.xml"], stay_alive=True + ) + + cluster.start() + yield node + finally: + cluster.shutdown() + + +def send_signal(started_node, signal): + started_node.exec_in_container( + ["bash", "-c", f"pkill -{signal} clickhouse"], user="root" + ) + + +def wait_for_clickhouse_stop(started_node): + result = None + for attempt in range(120): + time.sleep(1) + pid = started_node.get_process_pid("clickhouse") + if pid is None: + result = "OK" + break + assert result == "OK", "ClickHouse process is still running" + + +def test_pkill(started_node): + if ( + started_node.is_built_with_thread_sanitizer() + or started_node.is_built_with_address_sanitizer() + or started_node.is_built_with_memory_sanitizer() + ): + pytest.skip("doesn't fit in timeouts for stacktrace generation") + + crashes_count = 0 + for signal in ["SEGV", "4"]: + send_signal(started_node, signal) + wait_for_clickhouse_stop(started_node) + started_node.restart_clickhouse() + crashes_count += 1 + assert ( + started_node.query("SELECT COUNT(*) FROM system.crash_log") + == f"{crashes_count}\n" + ) + + +def test_pkill_query_log(started_node): + for signal in ["SEGV", "4"]: + # force create query_log if it was not created + started_node.query("SYSTEM FLUSH LOGS") + started_node.query("TRUNCATE TABLE IF EXISTS system.query_log") + started_node.query("SELECT COUNT(*) FROM system.query_log") + # logs don't flush + assert started_node.query("SELECT COUNT(*) FROM system.query_log") == f"{0}\n" + + send_signal(started_node, signal) + wait_for_clickhouse_stop(started_node) + started_node.restart_clickhouse() + assert started_node.query("SELECT COUNT(*) FROM system.query_log") >= f"3\n" diff --git a/tests/integration/test_default_compression_codec/test.py b/tests/integration/test_default_compression_codec/test.py index c7c30f5eea4..c8b75ea9751 100644 --- a/tests/integration/test_default_compression_codec/test.py +++ b/tests/integration/test_default_compression_codec/test.py @@ -24,6 +24,7 @@ node3 = cluster.add_instance( tag="20.3.16", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node4 = cluster.add_instance("node4") diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py deleted file mode 100644 index 90287e19bd0..00000000000 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_cassandra.py +++ /dev/null @@ -1,99 +0,0 @@ -import os -import math -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/33006 - -pytestmark = pytest.mark.skip - -from .common import * - -from helpers.cluster import ClickHouseCluster -from helpers.dictionary import Field, Row, Dictionary, DictionaryStructure, Layout -from helpers.external_sources import SourceCassandra - -SOURCE = None -cluster = None -node = None -simple_tester = None -complex_tester = None -ranged_tester = None -test_name = "cassandra" - - -def setup_module(module): - global cluster - global node - global simple_tester - global complex_tester - global ranged_tester - - cluster = ClickHouseCluster(__file__) - - SOURCE = SourceCassandra( - "Cassandra", - None, - cluster.cassandra_port, - cluster.cassandra_host, - cluster.cassandra_port, - "", - "", - ) - - simple_tester = SimpleLayoutTester(test_name) - simple_tester.cleanup() - simple_tester.create_dictionaries(SOURCE) - - complex_tester = ComplexLayoutTester(test_name) - complex_tester.create_dictionaries(SOURCE) - - ranged_tester = RangedLayoutTester(test_name) - ranged_tester.create_dictionaries(SOURCE) - # Since that all .xml configs were created - - main_configs = [] - main_configs.append(os.path.join("configs", "disable_ssl_verification.xml")) - - dictionaries = simple_tester.list_dictionaries() - - node = cluster.add_instance( - "cass_node", - main_configs=main_configs, - dictionaries=dictionaries, - with_cassandra=True, - ) - - -def teardown_module(module): - simple_tester.cleanup() - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - - simple_tester.prepare(cluster) - complex_tester.prepare(cluster) - ranged_tester.prepare(cluster) - - yield cluster - - finally: - cluster.shutdown() - - -@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE)) -def test_simple(started_cluster, layout_name): - simple_tester.execute(layout_name, node) - - -@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX)) -def test_complex(started_cluster, layout_name): - complex_tester.execute(layout_name, node) - - -@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED)) -def test_ranged(started_cluster, layout_name): - ranged_tester.execute(layout_name, node) diff --git a/tests/integration/test_dictionaries_complex_key_cache_string/test.py b/tests/integration/test_dictionaries_complex_key_cache_string/test.py index ae9cd4e7c91..bca7122a2a8 100644 --- a/tests/integration/test_dictionaries_complex_key_cache_string/test.py +++ b/tests/integration/test_dictionaries_complex_key_cache_string/test.py @@ -27,71 +27,3 @@ def started_cluster(): yield cluster finally: cluster.shutdown() - - -@pytest.mark.skip(reason="SSD cache test can run on disk only") -@pytest.mark.parametrize("type", ["memory", "ssd"]) -def test_memory_consumption(started_cluster, type): - node = started_cluster.instances[f"node_{type}"] - node.query( - "insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format( - "w" * 8 - ) - ) - node.query( - "insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format( - "x" * 16 - ) - ) - node.query( - "insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format( - "y" * 32 - ) - ) - node.query( - "insert into radars_table select toString(rand() % 5000), '{0}', '{0}' from numbers(1000)".format( - "z" * 64 - ) - ) - - # Fill dictionary - node.query( - "select dictGetString('radars', 'client_id', tuple(toString(number))) from numbers(0, 5000)" - ) - - allocated_first = int( - node.query( - "select bytes_allocated from system.dictionaries where name = 'radars'" - ).strip() - ) - - alloc_array = [] - for i in range(5): - node.query( - "select dictGetString('radars', 'client_id', tuple(toString(number))) from numbers(0, 5000)" - ) - - allocated = int( - node.query( - "select bytes_allocated from system.dictionaries where name = 'radars'" - ).strip() - ) - alloc_array.append(allocated) - - # size doesn't grow - assert all(allocated_first >= a for a in alloc_array) - - for i in range(5): - node.query( - "select dictGetString('radars', 'client_id', tuple(toString(number))) from numbers(0, 5000)" - ) - - allocated = int( - node.query( - "select bytes_allocated from system.dictionaries where name = 'radars'" - ).strip() - ) - alloc_array.append(allocated) - - # size doesn't grow - assert all(allocated_first >= a for a in alloc_array) diff --git a/tests/integration/test_dictionaries_update_and_reload/test.py b/tests/integration/test_dictionaries_update_and_reload/test.py index a973b697d0d..99d08f1b6ea 100644 --- a/tests/integration/test_dictionaries_update_and_reload/test.py +++ b/tests/integration/test_dictionaries_update_and_reload/test.py @@ -281,7 +281,7 @@ def test_reload_after_fail_in_cache_dictionary(started_cluster): query_and_get_error = instance.query_and_get_error # Can't get a value from the cache dictionary because the source (table `test.xypairs`) doesn't respond. - expected_error = "Table test.xypairs doesn't exist" + expected_error = "Table test.xypairs does not exist" update_error = "Could not update cache dictionary cache_xypairs now" assert expected_error in query_and_get_error( "SELECT dictGetUInt64('cache_xypairs', 'y', toUInt64(1))" diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/test_default_reading.py b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_reading.py index 85c45d5df3c..921fb4e4154 100644 --- a/tests/integration/test_dictionary_allow_read_expired_keys/test_default_reading.py +++ b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_reading.py @@ -38,7 +38,6 @@ def started_cluster(): cluster.shutdown() -# @pytest.mark.skip(reason="debugging") def test_default_reading(started_cluster): assert None != dictionary_node.get_process_pid( "clickhouse" diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py index 92d681698bc..6eedf63f95c 100644 --- a/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py +++ b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py @@ -53,7 +53,6 @@ def started_cluster(): cluster.shutdown() -# @pytest.mark.skip(reason="debugging") def test_return_real_values(started_cluster): assert None != dictionary_node.get_process_pid( "clickhouse" diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get.py b/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get.py index 05f638ec337..8e45af44640 100644 --- a/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get.py +++ b/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get.py @@ -38,7 +38,6 @@ def started_cluster(): cluster.shutdown() -# @pytest.mark.skip(reason="debugging") def test_simple_dict_get(started_cluster): assert None != dictionary_node.get_process_pid( "clickhouse" diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py b/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py index 1da8fd3325a..8d6242f4711 100644 --- a/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py +++ b/tests/integration/test_dictionary_allow_read_expired_keys/test_dict_get_or_default.py @@ -38,7 +38,6 @@ def started_cluster(): cluster.shutdown() -# @pytest.mark.skip(reason="debugging") def test_simple_dict_get_or_default(started_cluster): assert None != dictionary_node.get_process_pid( "clickhouse" diff --git a/tests/integration/test_dictionary_custom_settings/test.py b/tests/integration/test_dictionary_custom_settings/test.py index 715219ceb87..eb394da8bb6 100644 --- a/tests/integration/test_dictionary_custom_settings/test.py +++ b/tests/integration/test_dictionary_custom_settings/test.py @@ -2,6 +2,8 @@ import os import pytest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + DICTIONARY_FILES = [ "configs/dictionaries/FileSourceConfig.xml", @@ -78,5 +80,11 @@ def test_work(start_cluster): assert caught_exception.find("Limit for result exceeded") != -1 - assert query("SELECT dictGetString('test_http', 'first', toUInt64(1))") == "\\'a\n" - assert query("SELECT dictGetString('test_http', 'second', toUInt64(1))") == '"b\n' + # It is possible that the HTTP server takes long time to start accepting connections + + assert_eq_with_retry( + instance, "SELECT dictGetString('test_http', 'first', toUInt64(1))", "\\'a\n" + ) + assert_eq_with_retry( + instance, "SELECT dictGetString('test_http', 'second', toUInt64(1))", '"b\n' + ) diff --git a/tests/integration/test_disabled_access_control_improvements/test_row_policy.py b/tests/integration/test_disabled_access_control_improvements/test_row_policy.py index 509b4de1a37..b620e88e7eb 100644 --- a/tests/integration/test_disabled_access_control_improvements/test_row_policy.py +++ b/tests/integration/test_disabled_access_control_improvements/test_row_policy.py @@ -76,7 +76,7 @@ def test_introspection(): "mydb", "filtered_table1", "6068883a-0e9d-f802-7e22-0144f8e66d3c", - "users.xml", + "users_xml", "1", 0, 0, @@ -89,7 +89,7 @@ def test_introspection(): "mydb", "filtered_table2", "c019e957-c60b-d54e-cc52-7c90dac5fb01", - "users.xml", + "users_xml", "1", 0, 0, @@ -102,7 +102,7 @@ def test_introspection(): "mydb", "filtered_table3", "4cb080d0-44e8-dbef-6026-346655143628", - "users.xml", + "users_xml", "1", 0, 0, @@ -115,7 +115,7 @@ def test_introspection(): "mydb", "local", "5b23c389-7e18-06bf-a6bc-dd1afbbc0a97", - "users.xml", + "users_xml", "a = 1", 0, 0, @@ -128,7 +128,7 @@ def test_introspection(): "mydb", "filtered_table1", "9e8a8f62-4965-2b5e-8599-57c7b99b3549", - "users.xml", + "users_xml", "a = 1", 0, 0, @@ -141,7 +141,7 @@ def test_introspection(): "mydb", "filtered_table2", "cffae79d-b9bf-a2ef-b798-019c18470b25", - "users.xml", + "users_xml", "a + b < 1 or c - d > 5", 0, 0, @@ -154,7 +154,7 @@ def test_introspection(): "mydb", "filtered_table3", "12fc5cef-e3da-3940-ec79-d8be3911f42b", - "users.xml", + "users_xml", "c = 1", 0, 0, @@ -167,7 +167,7 @@ def test_introspection(): "mydb", "local", "cdacaeb5-1d97-f99d-2bb0-4574f290629c", - "users.xml", + "users_xml", "1", 0, 0, diff --git a/tests/integration/test_disabled_access_control_improvements/test_select_from_system_tables.py b/tests/integration/test_disabled_access_control_improvements/test_select_from_system_tables.py index 5d760c9fc2c..894464fd813 100644 --- a/tests/integration/test_disabled_access_control_improvements/test_select_from_system_tables.py +++ b/tests/integration/test_disabled_access_control_improvements/test_select_from_system_tables.py @@ -43,7 +43,7 @@ def test_system_db(): assert node.query("SELECT count() FROM system.tables WHERE name='table2'") == "1\n" assert node.query("SELECT count()>0 FROM system.settings", user="another") == "1\n" - expected_error = "necessary to have grant SHOW USERS ON *.*" + expected_error = "necessary to have the grant SHOW USERS ON *.*" assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.users", user="another" ) @@ -62,7 +62,7 @@ def test_system_db(): ) assert node.query("SELECT count()>0 FROM system.settings", user="sqluser") == "1\n" - expected_error = "necessary to have grant SHOW USERS ON *.*" + expected_error = "necessary to have the grant SHOW USERS ON *.*" assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.users", user="sqluser" ) diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index d62adfb3343..bc60e5c865e 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -36,6 +36,7 @@ def cluster(): with_installed_binary=True, image="clickhouse/clickhouse-server", tag="22.8.14.53", + allow_analyzer=False, ) cluster.start() diff --git a/tests/integration/test_distributed_backward_compatability/test.py b/tests/integration/test_distributed_backward_compatability/test.py index cb51142d249..c48a7ad1fa1 100644 --- a/tests/integration/test_distributed_backward_compatability/test.py +++ b/tests/integration/test_distributed_backward_compatability/test.py @@ -11,6 +11,7 @@ node_old = cluster.add_instance( tag="20.8.9.6", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node_new = cluster.add_instance( "node2", diff --git a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py index faa38af6533..e66631460f7 100644 --- a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py +++ b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py @@ -68,7 +68,7 @@ def test_distributed_directory_monitor_split_batch_on_failure_OFF(started_cluste settings={ # max_memory_usage is the limit for the batch on the remote node # (local query should not be affected since 30MB is enough for 100K rows) - "max_memory_usage": "30Mi", + "max_memory_usage": "20Mi", "max_untracked_memory": "0", }, ) diff --git a/tests/integration/test_distributed_insert_backward_compatibility/test.py b/tests/integration/test_distributed_insert_backward_compatibility/test.py index ad61a2ad6f5..1e566d5e2da 100644 --- a/tests/integration/test_distributed_insert_backward_compatibility/test.py +++ b/tests/integration/test_distributed_insert_backward_compatibility/test.py @@ -14,6 +14,7 @@ node_dist = cluster.add_instance( tag="21.11.9.1", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 6dd25789f36..1aeaddcf3c5 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -33,6 +33,7 @@ backward = make_instance( # version without DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 tag="23.2.3", with_installed_binary=True, + allow_analyzer=False, ) users = pytest.mark.parametrize( @@ -109,10 +110,6 @@ def start_cluster(): cluster.shutdown() -def query_with_id(node, id_, query, **kwargs): - return node.query("WITH '{}' AS __id {}".format(id_, query), **kwargs) - - # @return -- [user, initial_user] def get_query_user_info(node, query_pattern): node.query("SYSTEM FLUSH LOGS") @@ -333,7 +330,7 @@ def test_secure_disagree_insert(): @users def test_user_insecure_cluster(user, password): id_ = "query-dist_insecure-" + user - query_with_id(n1, id_, "SELECT * FROM dist_insecure", user=user, password=password) + n1.query(f"SELECT *, '{id_}' FROM dist_insecure", user=user, password=password) assert get_query_user_info(n1, id_) == [ user, user, @@ -344,7 +341,7 @@ def test_user_insecure_cluster(user, password): @users def test_user_secure_cluster(user, password): id_ = "query-dist_secure-" + user - query_with_id(n1, id_, "SELECT * FROM dist_secure", user=user, password=password) + n1.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) assert get_query_user_info(n1, id_) == [user, user] assert get_query_user_info(n2, id_) == [user, user] @@ -352,16 +349,14 @@ def test_user_secure_cluster(user, password): @users def test_per_user_inline_settings_insecure_cluster(user, password): id_ = "query-ddl-settings-dist_insecure-" + user - query_with_id( - n1, - id_, - """ - SELECT * FROM dist_insecure - SETTINGS - prefer_localhost_replica=0, - max_memory_usage_for_user=1e9, - max_untracked_memory=0 - """, + n1.query( + f""" + SELECT *, '{id_}' FROM dist_insecure + SETTINGS + prefer_localhost_replica=0, + max_memory_usage_for_user=1e9, + max_untracked_memory=0 + """, user=user, password=password, ) @@ -371,16 +366,14 @@ def test_per_user_inline_settings_insecure_cluster(user, password): @users def test_per_user_inline_settings_secure_cluster(user, password): id_ = "query-ddl-settings-dist_secure-" + user - query_with_id( - n1, - id_, - """ - SELECT * FROM dist_secure - SETTINGS - prefer_localhost_replica=0, - max_memory_usage_for_user=1e9, - max_untracked_memory=0 - """, + n1.query( + f""" + SELECT *, '{id_}' FROM dist_secure + SETTINGS + prefer_localhost_replica=0, + max_memory_usage_for_user=1e9, + max_untracked_memory=0 + """, user=user, password=password, ) @@ -392,10 +385,8 @@ def test_per_user_inline_settings_secure_cluster(user, password): @users def test_per_user_protocol_settings_insecure_cluster(user, password): id_ = "query-protocol-settings-dist_insecure-" + user - query_with_id( - n1, - id_, - "SELECT * FROM dist_insecure", + n1.query( + f"SELECT *, '{id_}' FROM dist_insecure", user=user, password=password, settings={ @@ -410,10 +401,8 @@ def test_per_user_protocol_settings_insecure_cluster(user, password): @users def test_per_user_protocol_settings_secure_cluster(user, password): id_ = "query-protocol-settings-dist_secure-" + user - query_with_id( - n1, - id_, - "SELECT * FROM dist_secure", + n1.query( + f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password, settings={ @@ -430,8 +419,8 @@ def test_per_user_protocol_settings_secure_cluster(user, password): @users def test_user_secure_cluster_with_backward(user, password): id_ = "with-backward-query-dist_secure-" + user - query_with_id( - n1, id_, "SELECT * FROM dist_secure_backward", user=user, password=password + n1.query( + f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password ) assert get_query_user_info(n1, id_) == [user, user] assert get_query_user_info(backward, id_) == [user, user] @@ -440,13 +429,7 @@ def test_user_secure_cluster_with_backward(user, password): @users def test_user_secure_cluster_from_backward(user, password): id_ = "from-backward-query-dist_secure-" + user - query_with_id( - backward, - id_, - "SELECT * FROM dist_secure_backward", - user=user, - password=password, - ) + backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) assert get_query_user_info(n1, id_) == [user, user] assert get_query_user_info(backward, id_) == [user, user] diff --git a/tests/integration/test_dns_cache/test.py b/tests/integration/test_dns_cache/test.py index 98417930713..9c1c9797383 100644 --- a/tests/integration/test_dns_cache/test.py +++ b/tests/integration/test_dns_cache/test.py @@ -55,6 +55,13 @@ def cluster_without_dns_cache_update(): # node1 is a source, node2 downloads data # node2 has long dns_cache_update_period, so dns cache update wouldn't work def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): + # In this case we should manually set up the static DNS entries on the source host + # to exclude resplving addresses automatically added by docker. + # We use ipv6 for hosts, but resolved DNS entries may contain an unexpected ipv4 address. + node2.set_hosts([("2001:3984:3989::1:1111", "node1")]) + # drop DNS cache + node2.query("SYSTEM DROP DNS CACHE") + # First we check, that normal replication works node1.query( "INSERT INTO test_table_drop VALUES ('2018-10-01', 1), ('2018-10-02', 2), ('2018-10-03', 3)" @@ -64,6 +71,7 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): # We change source node ip cluster.restart_instance_with_ip_change(node1, "2001:3984:3989::1:7777") + node2.set_hosts([("2001:3984:3989::1:7777", "node1")]) # Put some data to source node1 node1.query( @@ -163,17 +171,8 @@ def test_ip_change_update_dns_cache(cluster_with_dns_cache_update): assert_eq_with_retry(node4, "SELECT count(*) from test_table_update", "7") -def set_hosts(node, hosts): - new_content = "\\n".join(["127.0.0.1 localhost", "::1 localhost"] + hosts) - node.exec_in_container( - ["bash", "-c", 'echo -e "{}" > /etc/hosts'.format(new_content)], - privileged=True, - user="root", - ) - - def test_dns_cache_update(cluster_with_dns_cache_update): - set_hosts(node4, ["127.255.255.255 lost_host"]) + node4.set_hosts([("127.255.255.255", "lost_host")]) with pytest.raises(QueryRuntimeException): node4.query("SELECT * FROM remote('lost_host', 'system', 'one')") @@ -184,7 +183,7 @@ def test_dns_cache_update(cluster_with_dns_cache_update): with pytest.raises(QueryRuntimeException): node4.query("SELECT * FROM distributed_lost_host") - set_hosts(node4, ["127.0.0.1 lost_host"]) + node4.set_hosts([("127.0.0.1", "lost_host")]) # Wait a bit until dns cache will be updated assert_eq_with_retry( @@ -239,11 +238,10 @@ def test_user_access_ip_change(cluster_with_dns_cache_update, node): == "0\n" ) - set_hosts( - node, + node.set_hosts( [ - "127.255.255.255 node3", - "2001:3984:3989::1:88{}4 unknown_host".format(node_num), + ("127.255.255.255", "node3"), + (f"2001:3984:3989::1:88{node_num}4", "unknown_host"), ], ) @@ -260,7 +258,7 @@ def test_user_access_ip_change(cluster_with_dns_cache_update, node): node4.query("SELECT * FROM remote('{}', 'system', 'one')".format(node_name)) # now wrong addresses are cached - set_hosts(node, []) + node.set_hosts([]) retry_count = 60 if node_name == "node5": # client is not allowed to connect, so execute it directly in container to send query from localhost @@ -298,7 +296,7 @@ def test_host_is_drop_from_cache_after_consecutive_failures( # Note that the list of hosts in variable since lost_host will be there too (and it's dropped and added back) # dns_update_short -> dns_max_consecutive_failures set to 6 assert node4.wait_for_log_line( - "Cannot resolve host \\(InvalidHostThatDoesNotExist\\), error 0: Host not found." + "Code: 198. DB::Exception: Not found address of host: InvalidHostThatDoesNotExist." ) assert node4.wait_for_log_line( "Cached hosts not found:.*InvalidHostThatDoesNotExist**", diff --git a/tests/integration/test_drop_is_lock_free/test.py b/tests/integration/test_drop_is_lock_free/test.py index 8d92d784226..61d52a1d9b1 100644 --- a/tests/integration/test_drop_is_lock_free/test.py +++ b/tests/integration/test_drop_is_lock_free/test.py @@ -104,7 +104,7 @@ def test_query_is_lock_free(lock_free_query, exclusive_table): select_handler = node.get_query_request( f""" - SELECT sleepEachRow(3) FROM {exclusive_table}; + SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0; """, query_id=query_id, ) @@ -125,7 +125,7 @@ def test_query_is_lock_free(lock_free_query, exclusive_table): SELECT count() FROM {exclusive_table}; """ ) - assert f"Table default.{exclusive_table} doesn't exist" in result + assert f"Table default.{exclusive_table} does not exist" in result else: assert 0 == int( node.query( @@ -173,7 +173,7 @@ def test_query_is_permanent(transaction, permanent, exclusive_table): select_handler = node.get_query_request( f""" - SELECT sleepEachRow(3) FROM {exclusive_table}; + SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0; """, query_id=query_id, ) diff --git a/tests/integration/test_drop_replica/test.py b/tests/integration/test_drop_replica/test.py index 0941e664982..e0928c6ab08 100644 --- a/tests/integration/test_drop_replica/test.py +++ b/tests/integration/test_drop_replica/test.py @@ -159,11 +159,11 @@ def test_drop_replica(start_cluster): for i in range(1, 5): node_1_1.query("DETACH DATABASE test{}".format(i)) - assert "doesn't exist" in node_1_3.query_and_get_error( + assert "does not exist" in node_1_3.query_and_get_error( "SYSTEM DROP REPLICA 'node_1_1' FROM TABLE test.test_table" ) - assert "doesn't exist" in node_1_3.query_and_get_error( + assert "does not exist" in node_1_3.query_and_get_error( "SYSTEM DROP REPLICA 'node_1_1' FROM DATABASE test1" ) diff --git a/tests/integration/test_extreme_deduplication/configs/conf.d/merge_tree.xml b/tests/integration/test_extreme_deduplication/configs/conf.d/merge_tree.xml index d9062e8da20..6f1d05d125e 100644 --- a/tests/integration/test_extreme_deduplication/configs/conf.d/merge_tree.xml +++ b/tests/integration/test_extreme_deduplication/configs/conf.d/merge_tree.xml @@ -4,6 +4,7 @@ 1 1 0 + 1 1 diff --git a/tests/integration/test_extreme_deduplication/test.py b/tests/integration/test_extreme_deduplication/test.py index 71f783d37c9..3632369154a 100644 --- a/tests/integration/test_extreme_deduplication/test.py +++ b/tests/integration/test_extreme_deduplication/test.py @@ -49,90 +49,25 @@ def test_deduplication_window_in_seconds(started_cluster): node.query("INSERT INTO simple VALUES (0, 1)") assert TSV(node.query("SELECT count() FROM simple")) == TSV("2\n") - # wait clean thread - time.sleep(2) + # Wait for the cleanup thread. + for i in range(100): + time.sleep(5) + + if ( + TSV.toMat( + node.query( + "SELECT count() FROM system.zookeeper WHERE path = '/clickhouse/tables/0/simple/blocks'" + ) + )[0][0] + <= "1" + ): + break + else: + raise Exception("The blocks from Keeper were not removed in time") - assert ( - TSV.toMat( - node.query( - "SELECT count() FROM system.zookeeper WHERE path='/clickhouse/tables/0/simple/blocks'" - ) - )[0][0] - == "1" - ) node.query( "INSERT INTO simple VALUES (0, 0)" - ) # deduplication doesn't works here, the first hash node was deleted + ) # Deduplication doesn't work here as the first hash node was deleted assert TSV.toMat(node.query("SELECT count() FROM simple"))[0][0] == "3" node1.query("""DROP TABLE simple ON CLUSTER test_cluster""") - - -# Currently this test just reproduce incorrect behavior that sould be fixed -@pytest.mark.skip(reason="Flapping test") -def test_deduplication_works_in_case_of_intensive_inserts(started_cluster): - inserters = [] - fetchers = [] - - node1.query( - """ - CREATE TABLE simple ON CLUSTER test_cluster (date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/simple', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id""" - ) - - node1.query("INSERT INTO simple VALUES (0, 0)") - - for node in nodes: - host = node.ip_address - - inserters.append( - CommandRequest( - ["/bin/bash"], - timeout=10, - stdin=""" -set -e -for i in `seq 1000`; do - {} --host {} -q "INSERT INTO simple VALUES (0, 0)" -done -""".format( - cluster.get_client_cmd(), host - ), - ) - ) - - fetchers.append( - CommandRequest( - ["/bin/bash"], - timeout=10, - stdin=""" -set -e -for i in `seq 1000`; do - res=`{} --host {} -q "SELECT count() FROM simple"` - if [[ $? -ne 0 || $res -ne 1 ]]; then - echo "Selected $res elements! Host: {}" 1>&2 - exit -1 - fi; -done -""".format( - cluster.get_client_cmd(), host, node.name - ), - ) - ) - - # There were not errors during INSERTs - for inserter in inserters: - try: - inserter.get_answer() - except QueryTimeoutExceedException: - # Only timeout is accepted - pass - - # There were not errors during SELECTs - for fetcher in fetchers: - try: - fetcher.get_answer() - except QueryTimeoutExceedException: - # Only timeout is accepted - pass - - node1.query("""DROP TABLE simple ON CLUSTER test_cluster""") diff --git a/tests/integration/test_format_avro_confluent/secrets/password b/tests/integration/test_format_avro_confluent/secrets/password new file mode 100644 index 00000000000..a367925c806 --- /dev/null +++ b/tests/integration/test_format_avro_confluent/secrets/password @@ -0,0 +1,3 @@ +schemauser: MD5:0d107d09f5bbe40cade3de5c71e9e9b7,user +schemauser/slash: MD5:0d107d09f5bbe40cade3de5c71e9e9b7,user +complexschemauser: MD5:fcaeda86837fcd37755044e7258edc5d,user diff --git a/tests/integration/test_format_avro_confluent/secrets/schema_registry_jaas.conf b/tests/integration/test_format_avro_confluent/secrets/schema_registry_jaas.conf new file mode 100644 index 00000000000..7d0e6e2bf35 --- /dev/null +++ b/tests/integration/test_format_avro_confluent/secrets/schema_registry_jaas.conf @@ -0,0 +1,5 @@ +RealmFooBar { + org.eclipse.jetty.jaas.spi.PropertyFileLoginModule required + file="/etc/schema-registry/secrets/password" + debug="true"; +}; \ No newline at end of file diff --git a/tests/integration/test_format_avro_confluent/test.py b/tests/integration/test_format_avro_confluent/test.py index 42b7ddce193..540f90ae05e 100644 --- a/tests/integration/test_format_avro_confluent/test.py +++ b/tests/integration/test_format_avro_confluent/test.py @@ -1,5 +1,6 @@ import io import logging +import time import avro.schema import pytest @@ -8,13 +9,14 @@ from confluent_kafka.avro.cached_schema_registry_client import ( ) from confluent_kafka.avro.serializer.message_serializer import MessageSerializer from helpers.cluster import ClickHouseCluster, ClickHouseInstance +from urllib import parse @pytest.fixture(scope="module") def started_cluster(): try: cluster = ClickHouseCluster(__file__) - cluster.add_instance("dummy", with_kafka=True) + cluster.add_instance("dummy", with_kafka=True, with_secrets=True) logging.info("Starting cluster...") cluster.start() logging.info("Cluster started") @@ -40,14 +42,15 @@ def run_query(instance, query, data=None, settings=None): def test_select(started_cluster): # type: (ClickHouseCluster) -> None - schema_registry_client = CachedSchemaRegistryClient( - "http://localhost:{}".format(started_cluster.schema_registry_port) - ) + reg_url = "http://localhost:{}".format(started_cluster.schema_registry_port) + arg = {"url": reg_url} + + schema_registry_client = CachedSchemaRegistryClient(arg) serializer = MessageSerializer(schema_registry_client) schema = avro.schema.make_avsc_object( { - "name": "test_record", + "name": "test_record1", "type": "record", "fields": [{"name": "value", "type": "long"}], } @@ -56,14 +59,14 @@ def test_select(started_cluster): buf = io.BytesIO() for x in range(0, 3): message = serializer.encode_record_with_schema( - "test_subject", schema, {"value": x} + "test_subject1", schema, {"value": x} ) buf.write(message) data = buf.getvalue() instance = started_cluster.instances["dummy"] # type: ClickHouseInstance schema_registry_url = "http://{}:{}".format( - started_cluster.schema_registry_host, 8081 + started_cluster.schema_registry_host, started_cluster.schema_registry_port ) run_query(instance, "create table avro_data(value Int64) engine = Memory()") @@ -75,3 +78,164 @@ def test_select(started_cluster): ["1"], ["2"], ] + + +def test_select_auth(started_cluster): + # type: (ClickHouseCluster) -> None + + reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port) + arg = { + "url": reg_url, + "basic.auth.credentials.source": "USER_INFO", + "basic.auth.user.info": "schemauser:letmein", + } + + schema_registry_client = CachedSchemaRegistryClient(arg) + serializer = MessageSerializer(schema_registry_client) + + schema = avro.schema.make_avsc_object( + { + "name": "test_record_auth", + "type": "record", + "fields": [{"name": "value", "type": "long"}], + } + ) + + buf = io.BytesIO() + for x in range(0, 3): + message = serializer.encode_record_with_schema( + "test_subject_auth", schema, {"value": x} + ) + buf.write(message) + data = buf.getvalue() + + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + schema_registry_url = "http://{}:{}@{}:{}".format( + "schemauser", + "letmein", + started_cluster.schema_registry_auth_host, + started_cluster.schema_registry_auth_port, + ) + + run_query(instance, "create table avro_data_auth(value Int64) engine = Memory()") + settings = {"format_avro_schema_registry_url": schema_registry_url} + run_query( + instance, "insert into avro_data_auth format AvroConfluent", data, settings + ) + stdout = run_query(instance, "select * from avro_data_auth") + assert list(map(str.split, stdout.splitlines())) == [ + ["0"], + ["1"], + ["2"], + ] + + +def test_select_auth_encoded(started_cluster): + # type: (ClickHouseCluster) -> None + + reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port) + arg = { + "url": reg_url, + "basic.auth.credentials.source": "USER_INFO", + "basic.auth.user.info": "schemauser:letmein", + } + + schema_registry_client = CachedSchemaRegistryClient(arg) + serializer = MessageSerializer(schema_registry_client) + + schema = avro.schema.make_avsc_object( + { + "name": "test_record_auth_encoded", + "type": "record", + "fields": [{"name": "value", "type": "long"}], + } + ) + + buf = io.BytesIO() + for x in range(0, 3): + message = serializer.encode_record_with_schema( + "test_subject_auth_encoded", schema, {"value": x} + ) + buf.write(message) + data = buf.getvalue() + + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + schema_registry_url = "http://{}:{}@{}:{}".format( + parse.quote_plus("schemauser/slash"), + parse.quote_plus("letmein"), + started_cluster.schema_registry_auth_host, + started_cluster.schema_registry_auth_port, + ) + + run_query( + instance, "create table avro_data_auth_encoded(value Int64) engine = Memory()" + ) + settings = {"format_avro_schema_registry_url": schema_registry_url} + run_query( + instance, + "insert into avro_data_auth_encoded format AvroConfluent", + data, + settings, + ) + stdout = run_query(instance, "select * from avro_data_auth_encoded") + assert list(map(str.split, stdout.splitlines())) == [ + ["0"], + ["1"], + ["2"], + ] + + +def test_select_auth_encoded_complex(started_cluster): + # type: (ClickHouseCluster) -> None + + reg_url = "http://localhost:{}".format(started_cluster.schema_registry_auth_port) + arg = { + "url": reg_url, + "basic.auth.credentials.source": "USER_INFO", + "basic.auth.user.info": "schemauser:letmein", + } + + schema_registry_client = CachedSchemaRegistryClient(arg) + serializer = MessageSerializer(schema_registry_client) + + schema = avro.schema.make_avsc_object( + { + "name": "test_record_auth_encoded_complex", + "type": "record", + "fields": [{"name": "value", "type": "long"}], + } + ) + + buf = io.BytesIO() + for x in range(0, 3): + message = serializer.encode_record_with_schema( + "test_subject_auth_encoded_complex", schema, {"value": x} + ) + buf.write(message) + data = buf.getvalue() + + instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + schema_registry_url = "http://{}:{}@{}:{}".format( + parse.quote_plus("complexschemauser"), + parse.quote_plus("letmein%@:/"), + started_cluster.schema_registry_auth_host, + started_cluster.schema_registry_auth_port, + ) + + run_query( + instance, + "create table avro_data_auth_encoded_complex(value Int64) engine = Memory()", + ) + settings = {"format_avro_schema_registry_url": schema_registry_url} + run_query( + instance, + "insert into avro_data_auth_encoded_complex format AvroConfluent", + data, + settings, + ) + stdout = run_query(instance, "select * from avro_data_auth_encoded_complex") + assert list(map(str.split, stdout.splitlines())) == [ + ["0"], + ["1"], + ["2"], + ] diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index ee5d4b5df93..35f4d81b66a 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -188,7 +188,7 @@ def test_grant_all_on_table(): instance.query("SHOW GRANTS FOR B") == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, " "DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, SHOW ROW POLICIES, " - "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, " + "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, " "SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.table TO B\n" ) instance.query("REVOKE ALL ON test.table FROM B", user="A") @@ -449,7 +449,7 @@ def test_introspection(): ] ) - expected_error = "necessary to have grant SHOW USERS" + expected_error = "necessary to have the grant SHOW USERS" assert expected_error in instance.query_and_get_error("SHOW GRANTS FOR B", user="A") expected_access1 = ( @@ -471,7 +471,7 @@ def test_introspection(): [ [ "A", - "local directory", + "local_directory", "no_password", "{}", "['::/0']", @@ -484,7 +484,7 @@ def test_introspection(): ], [ "B", - "local directory", + "local_directory", "no_password", "{}", "['::/0']", @@ -556,7 +556,7 @@ def test_grant_with_replace_option(): ) expected_error = ( - "it's necessary to have grant INSERT ON test.table WITH GRANT OPTION" + "it's necessary to have the grant INSERT ON test.table WITH GRANT OPTION" ) assert expected_error in instance.query_and_get_error( "GRANT INSERT ON test.table TO B WITH REPLACE OPTION", user="A" @@ -568,7 +568,7 @@ def test_grant_with_replace_option(): instance.query("GRANT INSERT ON test.table TO A WITH GRANT OPTION") expected_error = ( - "it's necessary to have grant SELECT ON test.table WITH GRANT OPTION" + "it's necessary to have the grant SELECT ON test.table WITH GRANT OPTION" ) assert expected_error in instance.query_and_get_error( "GRANT INSERT ON test.table TO B WITH REPLACE OPTION", user="A" diff --git a/tests/integration/test_graphite_merge_tree/test.py b/tests/integration/test_graphite_merge_tree/test.py index c4364a03fd9..7c19888df6b 100644 --- a/tests/integration/test_graphite_merge_tree/test.py +++ b/tests/integration/test_graphite_merge_tree/test.py @@ -509,6 +509,6 @@ CREATE TABLE test.graphite_not_created ) # The order of retentions is not guaranteed - assert "age and precision should only grow up: " in str(exc.value) + assert "Age and precision should only grow up: " in str(exc.value) assert "36000:600" in str(exc.value) assert "72000:300" in str(exc.value) diff --git a/tests/integration/test_groupBitmapAnd_on_distributed/test.py b/tests/integration/test_groupBitmapAnd_on_distributed/test.py index 4dbc81236e7..8cf7e0fb2c1 100644 --- a/tests/integration/test_groupBitmapAnd_on_distributed/test.py +++ b/tests/integration/test_groupBitmapAnd_on_distributed/test.py @@ -5,13 +5,22 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node1", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node2 = cluster.add_instance( - "node2", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node2", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node3 = cluster.add_instance( - "node3", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node3", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node4 = cluster.add_instance( "node4", @@ -19,6 +28,7 @@ node4 = cluster.add_instance( image="yandex/clickhouse-server", tag="21.5", with_zookeeper=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py b/tests/integration/test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py index 774b7708587..115e6009801 100644 --- a/tests/integration/test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py +++ b/tests/integration/test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py @@ -5,13 +5,22 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node1", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node2 = cluster.add_instance( - "node2", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node2", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node3 = cluster.add_instance( - "node3", main_configs=["configs/clusters.xml"], with_zookeeper=True + "node3", + main_configs=["configs/clusters.xml"], + with_zookeeper=True, + allow_analyzer=False, ) node4 = cluster.add_instance( "node4", @@ -20,6 +29,7 @@ node4 = cluster.add_instance( tag="21.6", with_installed_binary=True, with_zookeeper=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index dead4d447ec..42d6d2bfda3 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -352,9 +352,13 @@ def test_authentication(): def test_logs(): - logs = query_and_get_logs("SELECT 1", settings={"send_logs_level": "debug"}) - assert "SELECT 1" in logs - assert "Read 1 rows" in logs + query = "SELECT has(groupArray(number), 42) FROM numbers(100000) SETTINGS max_block_size=10000" + logs = query_and_get_logs( + query, + settings={"send_logs_level": "debug"}, + ) + assert query in logs + assert "Read 100000 rows" in logs assert "Peak memory usage" in logs diff --git a/tests/integration/test_hive_query/configs/config.xml b/tests/integration/test_hive_query/configs/config.xml deleted file mode 100644 index 7de1391e56c..00000000000 --- a/tests/integration/test_hive_query/configs/config.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - - - - localhost - 9000 - - - - - - - true - /tmp/clickhouse_local_cache,/tmp/clickhouse_local_cache1 - 207374182400 - 1048576 - - - - /etc/clickhouse-server/extra_conf.d/hdfs-site.xml - - - - system - metric_log
- 1000 - 1000 -
- -
diff --git a/tests/integration/test_hive_query/configs/hdfs-site.xml b/tests/integration/test_hive_query/configs/hdfs-site.xml deleted file mode 100644 index 82c525ea414..00000000000 --- a/tests/integration/test_hive_query/configs/hdfs-site.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - dfs.replication - 1 - - diff --git a/tests/integration/test_hive_query/data/prepare_hive_data.sh b/tests/integration/test_hive_query/data/prepare_hive_data.sh deleted file mode 100755 index 495ea201870..00000000000 --- a/tests/integration/test_hive_query/data/prepare_hive_data.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -hive -e "create database test" - -hive -e "drop table if exists test.demo; create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; " -hive -e "drop table if exists test.parquet_demo; create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" -hive -e "drop table if exists test.demo_text; create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text " -hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text" - -hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;" -hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;" - -hive -e "drop table if exists test.test_hive_types; CREATE TABLE test.test_hive_types( f_tinyint tinyint, f_smallint smallint, f_int int, f_integer int, f_bigint bigint, f_float float, f_double double, f_decimal decimal(10,0), f_timestamp timestamp, f_date date, f_string string, f_varchar varchar(100), f_char char(100), f_bool boolean, f_array_int array, f_array_string array, f_array_float array, f_map_int map, f_map_string map, f_map_float map, f_struct struct>) PARTITIONED BY( day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';" - -hive -e "insert into test.test_hive_types partition(day='2022-02-20') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-20 14:47:04', '2022-02-20', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3), 'd', named_struct('x', 10, 'y', 'xyz')); insert into test.test_hive_types partition(day='2022-02-19') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-19 14:47:04', '2022-02-19', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3), 'd', named_struct('x', 11, 'y', 'abc'));" - diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py deleted file mode 100644 index 791ae03f9f6..00000000000 --- a/tests/integration/test_hive_query/test.py +++ /dev/null @@ -1,498 +0,0 @@ -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/43541 - -pytestmark = pytest.mark.skip - -import logging -import os - -import time -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -logging.getLogger().setLevel(logging.INFO) -logging.getLogger().addHandler(logging.StreamHandler()) - -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster = ClickHouseCluster(__file__) - cluster.add_instance( - "h0_0_0", - main_configs=["configs/config.xml"], - extra_configs=["configs/hdfs-site.xml", "data/prepare_hive_data.sh"], - with_hive=True, - ) - - logging.info("Starting cluster ...") - cluster.start() - cluster.copy_file_to_container( - "roottesthivequery_hdfs1_1", - "/ClickHouse/tests/integration/test_hive_query/data/prepare_hive_data.sh", - "/prepare_hive_data.sh", - ) - cluster.exec_in_container( - "roottesthivequery_hdfs1_1", ["bash", "-c", "bash /prepare_hive_data.sh"] - ) - yield cluster - finally: - cluster.shutdown() - - -def test_create_parquet_table(started_cluster): - logging.info("Start testing creating hive table ...") - node = started_cluster.instances["h0_0_0"] - test_passed = False - for i in range(10): - node.query("set input_format_parquet_allow_missing_columns = true") - result = node.query( - """ -DROP TABLE IF EXISTS default.demo_parquet; -CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - logging.info("create result {}".format(result)) - if result.strip() == "": - test_passed = True - break - time.sleep(60) - assert test_passed - - -def test_create_parquet_table_1(started_cluster): - logging.info("Start testing creating hive table ...") - node = started_cluster.instances["h0_0_0"] - for i in range(10): - node.query("set input_format_parquet_allow_missing_columns = true") - result = node.query( - """ -DROP TABLE IF EXISTS default.demo_parquet_parts; -CREATE TABLE default.demo_parquet_parts (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String), `hour` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'parquet_demo') PARTITION BY(day, hour); - """ - ) - logging.info("create result {}".format(result)) - if result.strip() == "": - test_passed = True - break - time.sleep(60) - assert test_passed - - -def test_create_orc_table(started_cluster): - logging.info("Start testing creating hive table ...") - node = started_cluster.instances["h0_0_0"] - test_passed = False - for i in range(10): - result = node.query( - """ - DROP TABLE IF EXISTS default.demo_orc; - CREATE TABLE default.demo_orc (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day) - """ - ) - logging.info("create result {}".format(result)) - if result.strip() == "": - test_passed = True - break - time.sleep(60) - - assert test_passed - - -def test_create_text_table(started_cluster): - logging.info("Start testing creating hive table ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.demo_text; - CREATE TABLE default.demo_text (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_text') PARTITION BY (tuple()) - """ - ) - logging.info("create result {}".format(result)) - - assert result.strip() == "" - - -def test_parquet_groupby(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT day, count(*) FROM default.demo_parquet group by day order by day - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -def test_parquet_in_filter(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT count(*) FROM default.demo_parquet_parts where day = '2021-11-05' and hour in ('00') - """ - ) - expected_result = """2 -""" - logging.info("query result:{}".format(result)) - assert result == expected_result - - -def test_orc_groupby(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.demo_orc; - CREATE TABLE default.demo_orc (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day); - SELECT day, count(*) FROM default.demo_orc group by day order by day - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -@pytest.mark.parametrize( - "table,use_local_cache_for_remote_storage,enable_orc_file_minmax_index,enable_orc_stripe_minmax_index", - [ - pytest.param( - "demo_orc_no_cache_no_index", - "false", - "false", - "false", - id="demo_orc_no_cache_no_index", - ), - pytest.param( - "demo_orc_with_cache_no_index", - "true", - "false", - "false", - id="demo_orc_with_cache_no_index", - ), - pytest.param( - "demo_orc_no_cache_file_index", - "false", - "true", - "false", - id="demo_orc_no_cache_file_index", - ), - pytest.param( - "demo_orc_with_cache_file_index", - "true", - "true", - "false", - id="demo_orc_with_cache_file_index", - ), - pytest.param( - "demo_orc_no_cache_stripe_index", - "false", - "true", - "true", - id="demo_orc_no_cache_stripe_index", - ), - pytest.param( - "demo_orc_with_cache_stripe_index", - "true", - "true", - "true", - id="demo_orc_with_cache_stripe_index", - ), - ], -) -def test_orc_minmax_index( - started_cluster, - table, - use_local_cache_for_remote_storage, - enable_orc_file_minmax_index, - enable_orc_stripe_minmax_index, -): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.{table}; - CREATE TABLE default.{table} (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day) - SETTINGS enable_orc_file_minmax_index = {enable_orc_file_minmax_index}, enable_orc_stripe_minmax_index = {enable_orc_stripe_minmax_index}; - """.format( - table=table, - enable_orc_file_minmax_index=enable_orc_file_minmax_index, - enable_orc_stripe_minmax_index=enable_orc_stripe_minmax_index, - ) - ) - assert result.strip() == "" - - for i in range(2): - result = node.query( - """ - SELECT day, id, score FROM default.{table} where day >= '2021-11-05' and day <= '2021-11-16' and score >= 15 and score <= 30 order by day, id - SETTINGS use_local_cache_for_remote_storage = {use_local_cache_for_remote_storage} - """.format( - table=table, - use_local_cache_for_remote_storage=use_local_cache_for_remote_storage, - ) - ) - - assert ( - result - == """2021-11-05 abd 15 -2021-11-16 aaa 22 -""" - ) - - -@pytest.mark.parametrize( - "table,use_local_cache_for_remote_storage,enable_parquet_rowgroup_minmax_index", - [ - pytest.param( - "demo_parquet_no_cache_no_index", - "false", - "false", - id="demo_parquet_no_cache_no_index", - ), - pytest.param( - "demo_parquet_with_cache_no_index", - "true", - "false", - id="demo_parquet_with_cache_no_index", - ), - pytest.param( - "demo_parquet_no_cache_rowgroup_index", - "false", - "true", - id="demo_parquet_no_cache_rowgroup_index", - ), - pytest.param( - "demo_parquet_with_cache_rowgroup_index", - "true", - "true", - id="demo_parquet_with_cache_rowgroup_index", - ), - ], -) -def test_parquet_minmax_index( - started_cluster, - table, - use_local_cache_for_remote_storage, - enable_parquet_rowgroup_minmax_index, -): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.{table}; - CREATE TABLE default.{table} (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - SETTINGS enable_parquet_rowgroup_minmax_index = {enable_parquet_rowgroup_minmax_index} - """.format( - table=table, - enable_parquet_rowgroup_minmax_index=enable_parquet_rowgroup_minmax_index, - ) - ) - assert result.strip() == "" - - for i in range(2): - result = node.query( - """ - SELECT day, id, score FROM default.{table} where day >= '2021-11-05' and day <= '2021-11-16' and score >= 15 and score <= 30 order by day, id - SETTINGS use_local_cache_for_remote_storage = {use_local_cache_for_remote_storage} - """.format( - table=table, - use_local_cache_for_remote_storage=use_local_cache_for_remote_storage, - ) - ) - - assert ( - result - == """2021-11-05 abd 15 -2021-11-16 aaa 22 -""" - ) - - -def test_hive_columns_prunning(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT count(*) FROM default.demo_parquet_parts where day = '2021-11-05' - """ - ) - expected_result = """4 -""" - logging.info("query result:{}".format(result)) - assert result == expected_result - - -def test_text_count(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - DROP TABLE IF EXISTS default.demo_orc; - CREATE TABLE default.demo_orc (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day); - SELECT day, count(*) FROM default.demo_orc group by day order by day SETTINGS format_csv_delimiter = '\x01' - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -def test_parquet_groupby_with_cache(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT day, count(*) FROM default.demo_parquet group by day order by day - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -def test_parquet_groupby_by_hive_function(started_cluster): - logging.info("Start testing groupby ...") - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - SELECT day, count(*) FROM hive('thrift://hivetest:9083', 'test', 'demo', '`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)', 'day') group by day order by day - """ - ) - expected_result = """2021-11-01 1 -2021-11-05 2 -2021-11-11 1 -2021-11-16 2 -""" - assert result == expected_result - - -def test_cache_read_bytes(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - CREATE TABLE IF NOT EXISTS default.demo_parquet_1 (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - test_passed = False - for i in range(10): - result = node.query( - """ - SELECT * FROM default.demo_parquet_1 settings input_format_parquet_allow_missing_columns = true - """ - ) - node.query("system flush logs") - result = node.query( - "select sum(ProfileEvent_ExternalDataSourceLocalCacheReadBytes) from system.metric_log where ProfileEvent_ExternalDataSourceLocalCacheReadBytes > 0" - ) - if result.strip() == "0": - logging.info("ProfileEvent_ExternalDataSourceLocalCacheReadBytes == 0") - time.sleep(10) - continue - test_passed = True - break - assert test_passed - - -def test_cache_dir_use(started_cluster): - node = started_cluster.instances["h0_0_0"] - result0 = node.exec_in_container( - ["bash", "-c", "ls /tmp/clickhouse_local_cache | wc -l"] - ) - result1 = node.exec_in_container( - ["bash", "-c", "ls /tmp/clickhouse_local_cache1 | wc -l"] - ) - assert result0 != "0" and result1 != "0" - - -def test_hive_struct_type(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query( - """ - CREATE TABLE IF NOT EXISTS default.test_hive_types (`f_tinyint` Int8, `f_smallint` Int16, `f_int` Int32, `f_integer` Int32, `f_bigint` Int64, `f_float` Float32, `f_double` Float64, `f_decimal` Float64, `f_timestamp` DateTime, `f_date` Date, `f_string` String, `f_varchar` String, `f_char` String, `f_bool` Boolean, `f_array_int` Array(Int32), `f_array_string` Array(String), `f_array_float` Array(Float32), `f_map_int` Map(String, Int32), `f_map_string` Map(String, String), `f_map_float` Map(String, Float32), `f_struct` Tuple(a String, b Int32, c Float32, d Tuple(x Int32, y String)), `day` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'test_hive_types') PARTITION BY (day) - """ - ) - result = node.query( - """ - SELECT * FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 - """ - ) - expected_result = """1 2 3 4 5 6.11 7.22 8 2022-02-20 14:47:04 2022-02-20 hello world hello world hello world true [1,2,3] ['hello world','hello world'] [1.1,1.2] {'a':100,'b':200,'c':300} {'a':'aa','b':'bb','c':'cc'} {'a':111.1,'b':222.2,'c':333.3} ('aaa',200,333.3,(10,'xyz')) 2022-02-20""" - assert result.strip() == expected_result - - result = node.query( - """ - SELECT day, f_struct.a, f_struct.d.x FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 - """ - ) - expected_result = """2022-02-20 aaa 10""" - - -def test_table_alter_add(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query("DROP TABLE IF EXISTS default.demo_parquet_1") - result = node.query( - """ -CREATE TABLE IF NOT EXISTS default.demo_parquet_1 (`score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - result = node.query( - """ -ALTER TABLE default.demo_parquet_1 ADD COLUMN id Nullable(String) FIRST - """ - ) - result = node.query("""DESC default.demo_parquet_1 FORMAT TSV""") - - expected_result = "id\tNullable(String)\t\t\t\t\t\nscore\tNullable(Int32)\t\t\t\t\t\nday\tNullable(String)" - assert result.strip() == expected_result - - -def test_table_alter_drop(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query("DROP TABLE IF EXISTS default.demo_parquet_1") - result = node.query( - """ -CREATE TABLE IF NOT EXISTS default.demo_parquet_1 (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - result = node.query( - """ -ALTER TABLE default.demo_parquet_1 DROP COLUMN id - """ - ) - - result = node.query("""DESC default.demo_parquet_1 FORMAT TSV""") - expected_result = """score\tNullable(Int32)\t\t\t\t\t\nday\tNullable(String)""" - assert result.strip() == expected_result - - -def test_table_alter_comment(started_cluster): - node = started_cluster.instances["h0_0_0"] - result = node.query("DROP TABLE IF EXISTS default.demo_parquet_1") - result = node.query( - """ -CREATE TABLE IF NOT EXISTS default.demo_parquet_1 (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """ - ) - - result = node.query( - """ALTER TABLE default.demo_parquet_1 COMMENT COLUMN id 'Text comment'""" - ) - result = node.query("""DESC default.demo_parquet_1 FORMAT TSV""") - expected_result = """id\tNullable(String)\t\t\tText comment\t\t\nscore\tNullable(Int32)\t\t\t\t\t\nday\tNullable(String)""" - assert result.strip() == expected_result diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml b/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml index 7a2141e6c7e..6ca61fb3c44 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml +++ b/tests/integration/test_host_regexp_multiple_ptr_records/configs/host_regexp.xml @@ -1,4 +1,4 @@ - + @@ -8,4 +8,4 @@ default - \ No newline at end of file + \ No newline at end of file diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml b/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml index 58ef55cd3f3..9c27c612f63 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml +++ b/tests/integration/test_host_regexp_multiple_ptr_records/configs/listen_host.xml @@ -1,5 +1,5 @@ - + :: 0.0.0.0 1 - + diff --git a/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com index 9beb415c290..6c6e4cbee2e 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com +++ b/tests/integration/test_host_regexp_multiple_ptr_records/coredns_config/example.com @@ -1 +1 @@ -filled in runtime, but needs to exist in order to be volume mapped in docker \ No newline at end of file +filled in runtime, but needs to exist in order to be volume mapped in docker diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml index 7a2141e6c7e..9329c8dbde2 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/host_regexp.xml @@ -1,4 +1,4 @@ - + @@ -8,4 +8,4 @@ default - \ No newline at end of file + diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml index 58ef55cd3f3..9c27c612f63 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/configs/listen_host.xml @@ -1,5 +1,5 @@ - + :: 0.0.0.0 1 - + diff --git a/tests/integration/test_http_failover/__init__.py b/tests/integration/test_http_failover/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_http_failover/configs/listen.xml b/tests/integration/test_http_failover/configs/listen.xml new file mode 100644 index 00000000000..3abb37d5da2 --- /dev/null +++ b/tests/integration/test_http_failover/configs/listen.xml @@ -0,0 +1 @@ +:: diff --git a/tests/integration/test_http_failover/test.py b/tests/integration/test_http_failover/test.py new file mode 100644 index 00000000000..41b55ef635c --- /dev/null +++ b/tests/integration/test_http_failover/test.py @@ -0,0 +1,113 @@ +import pytest +from contextlib import nullcontext as does_not_raise +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException +from helpers.test_tools import exec_query_with_retry +from helpers.test_tools import assert_eq_with_retry + + +ACCESSIBLE_IPV4 = "10.5.172.10" +OTHER_ACCESSIBLE_IPV4 = "10.5.172.20" +NOT_ACCESSIBLE_IPV4 = "10.5.172.11" + +ACCESSIBLE_IPV6 = "2001:3984:3989::1:1000" +NOT_ACCESSIBLE_IPV6 = "2001:3984:3989::1:1001" + +DST_NODE_IPV4 = ACCESSIBLE_IPV4 +DST_NODE_IPV6 = ACCESSIBLE_IPV6 +SRC_NODE_IPV6 = "2001:3984:3989::1:2000" + + +cluster = ClickHouseCluster(__file__) + +# Destination node +dst_node = cluster.add_instance( + "dst_node", + with_zookeeper=True, + ipv4_address=DST_NODE_IPV4, + ipv6_address=DST_NODE_IPV6, + main_configs=["configs/listen.xml"], +) +# Source node +src_node = cluster.add_instance( + "src_node", + with_zookeeper=True, + ipv6_address=SRC_NODE_IPV6, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + pass + + +@pytest.fixture +def dst_node_addrs(started_cluster, request): + src_node.set_hosts([(ip, "dst_node") for ip in request.param]) + src_node.query("SYSTEM DROP DNS CACHE") + + yield + + # Clear static DNS entries + src_node.set_hosts([]) + src_node.query("SYSTEM DROP DNS CACHE") + + +@pytest.mark.parametrize( + "dst_node_addrs, expectation", + [ + ((ACCESSIBLE_IPV4, ACCESSIBLE_IPV6), does_not_raise()), + ((NOT_ACCESSIBLE_IPV4, ACCESSIBLE_IPV6), does_not_raise()), + ((ACCESSIBLE_IPV4, NOT_ACCESSIBLE_IPV6), does_not_raise()), + ( + (NOT_ACCESSIBLE_IPV4, NOT_ACCESSIBLE_IPV6), + pytest.raises(QueryRuntimeException), + ), + ], + indirect=["dst_node_addrs"], +) +def test_url_destination_host_with_multiple_addrs(dst_node_addrs, expectation): + with expectation: + result = src_node.query( + "SELECT * FROM url('http://dst_node:8123/?query=SELECT+42', TSV, 'column1 UInt32')" + ) + assert result == "42\n" + + +def test_url_invalid_hostname(started_cluster): + with pytest.raises(QueryRuntimeException): + src_node.query( + "SELECT count(*) FROM url('http://notvalidhost:8123/?query=SELECT+1', TSV, 'column1 UInt32');" + ) + + +def test_url_ip_change(started_cluster): + assert ( + src_node.query( + "SELECT * FROM url('http://dst_node:8123/?query=SELECT+42', TSV, 'column1 UInt32')" + ) + == "42\n" + ) + + started_cluster.restart_instance_with_ip_change(dst_node, OTHER_ACCESSIBLE_IPV4) + + # Ensure that only new IPV4 address is accessible + src_node.set_hosts( + [(OTHER_ACCESSIBLE_IPV4, "dst_node"), (NOT_ACCESSIBLE_IPV6, "dst_node")] + ) + src_node.query("SYSTEM DROP DNS CACHE") + + assert ( + src_node.query( + "SELECT * FROM url('http://dst_node:8123/?query=SELECT+42', TSV, 'column1 UInt32')" + ) + == "42\n" + ) diff --git a/tests/integration/test_insert_into_distributed/test.py b/tests/integration/test_insert_into_distributed/test.py index 3bee2149387..09a14c11727 100644 --- a/tests/integration/test_insert_into_distributed/test.py +++ b/tests/integration/test_insert_into_distributed/test.py @@ -246,7 +246,7 @@ def test_inserts_local(started_cluster): def test_inserts_single_replica_local_internal_replication(started_cluster): with pytest.raises( - QueryRuntimeException, match="Table default.single_replicated doesn't exist" + QueryRuntimeException, match="Table default.single_replicated does not exist" ): node1.query( "INSERT INTO distributed_one_replica_internal_replication VALUES ('2000-01-01', 1)", @@ -279,7 +279,8 @@ def test_inserts_single_replica_internal_replication(started_cluster): def test_inserts_single_replica_no_internal_replication(started_cluster): try: with pytest.raises( - QueryRuntimeException, match="Table default.single_replicated doesn't exist" + QueryRuntimeException, + match="Table default.single_replicated does not exist", ): node1.query( "INSERT INTO distributed_one_replica_no_internal_replication VALUES ('2000-01-01', 1)", diff --git a/tests/integration/test_insert_into_distributed_through_materialized_view/test.py b/tests/integration/test_insert_into_distributed_through_materialized_view/test.py index a5f92002450..b1eb0df2d43 100644 --- a/tests/integration/test_insert_into_distributed_through_materialized_view/test.py +++ b/tests/integration/test_insert_into_distributed_through_materialized_view/test.py @@ -108,74 +108,6 @@ def test_reconnect(started_cluster): assert remote.query("SELECT count(*) FROM local1").strip() == "3" -@pytest.mark.skip(reason="Flapping test") -def test_inserts_batching(started_cluster): - instance = instance_test_inserts_batching - - with PartitionManager() as pm: - pm.partition_instances(instance, remote) - - instance.query("INSERT INTO local2_source(d, x) VALUES ('2000-01-01', 1)") - # Sleep a bit so that this INSERT forms a batch of its own. - time.sleep(0.2) - - instance.query("INSERT INTO local2_source(x, d) VALUES (2, '2000-01-01')") - - for i in range(3, 7): - instance.query( - "INSERT INTO local2_source(d, x) VALUES ('2000-01-01', {})".format(i) - ) - - for i in range(7, 9): - instance.query( - "INSERT INTO local2_source(x, d) VALUES ({}, '2000-01-01')".format(i) - ) - - instance.query("INSERT INTO local2_source(d, x) VALUES ('2000-01-01', 9)") - - # After ALTER the structure of the saved blocks will be different - instance.query("DROP TABLE local2_view") - instance.query("ALTER TABLE distributed ADD COLUMN s String") - - # Memory Engine doesn't support ALTER so we just DROP/CREATE everything - instance.query("DROP TABLE local2_source") - instance.query( - "CREATE TABLE local2_source (d Date, x UInt32, s String) ENGINE = Memory" - ) - instance.query( - "CREATE MATERIALIZED VIEW local2_view to distributed AS SELECT d,x,s FROM local2_source" - ) - - for i in range(10, 13): - instance.query( - "INSERT INTO local2_source(d, x) VALUES ('2000-01-01', {})".format(i) - ) - - time.sleep(1.0) - - result = remote.query( - "SELECT _part, groupArray(x) FROM local2 GROUP BY _part ORDER BY _part" - ) - - # Explanation: as merges are turned off on remote instance, active parts in local2 table correspond 1-to-1 - # to inserted blocks. - # Batches of max 3 rows are formed as min_insert_block_size_rows = 3. - # Blocks: - # 1. Failed batch that is retried with the same contents. - # 2. Full batch of inserts regardless of the order of columns thanks to the view. - # 3. Full batch of inserts regardless order of columns thanks to the view. - # 4. Full batch of inserts after ALTER (that have different block structure). - # 5. What was left to insert before ALTER. - expected = """\ -20000101_20000101_1_1_0 [1] -20000101_20000101_2_2_0 [2,3,4] -20000101_20000101_3_3_0 [5,6,7] -20000101_20000101_4_4_0 [10,11,12] -20000101_20000101_5_5_0 [8,9] -""" - assert TSV(result) == TSV(expected) - - def test_inserts_local(started_cluster): instance = instance_test_inserts_local_cluster instance.query("INSERT INTO local_source VALUES ('2000-01-01', 1)") diff --git a/tests/integration/test_keeper_client/test.py b/tests/integration/test_keeper_client/test.py index 00c7908eeed..8f7056a5afd 100644 --- a/tests/integration/test_keeper_client/test.py +++ b/tests/integration/test_keeper_client/test.py @@ -1,6 +1,7 @@ import pytest from helpers.client import CommandRequest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) @@ -13,7 +14,7 @@ node = cluster.add_instance( ) -@pytest.fixture(scope="module") +@pytest.fixture(scope="module", autouse=True) def started_cluster(): try: cluster.start() @@ -23,41 +24,123 @@ def started_cluster(): cluster.shutdown() -def test_base_commands(started_cluster): - _ = started_cluster - - command = CommandRequest( +def keeper_query(query: str): + return CommandRequest( [ - started_cluster.server_bin_path, + cluster.server_bin_path, "keeper-client", "--host", str(cluster.get_instance_ip("zoo1")), "--port", str(cluster.zookeeper_port), "-q", - "create test_create_zk_node1 testvalue1;create test_create_zk_node_2 testvalue2;get test_create_zk_node1;", + query, ], stdin="", ) + +def test_big_family(): + command = keeper_query( + "touch test_big_family;" + "touch test_big_family/1;" + "touch test_big_family/1/1;" + "touch test_big_family/1/2;" + "touch test_big_family/1/3;" + "touch test_big_family/1/4;" + "touch test_big_family/1/5;" + "touch test_big_family/2;" + "touch test_big_family/2/1;" + "touch test_big_family/2/2;" + "touch test_big_family/2/3;" + "find_big_family test_big_family;" + ) + + assert command.get_answer() == TSV( + [ + ["/test_big_family/1", "5"], + ["/test_big_family/2", "3"], + ["/test_big_family/2/3", "0"], + ["/test_big_family/2/2", "0"], + ["/test_big_family/2/1", "0"], + ["/test_big_family/1/5", "0"], + ["/test_big_family/1/4", "0"], + ["/test_big_family/1/3", "0"], + ["/test_big_family/1/2", "0"], + ["/test_big_family/1/1", "0"], + ] + ) + + command = keeper_query("find_big_family test_big_family 1;") + + assert command.get_answer() == TSV( + [ + ["/test_big_family/1", "5"], + ] + ) + + +def test_find_super_nodes(): + command = keeper_query( + "touch test_find_super_nodes;" + "touch test_find_super_nodes/1;" + "touch test_find_super_nodes/1/1;" + "touch test_find_super_nodes/1/2;" + "touch test_find_super_nodes/1/3;" + "touch test_find_super_nodes/1/4;" + "touch test_find_super_nodes/1/5;" + "touch test_find_super_nodes/2;" + "touch test_find_super_nodes/2/1;" + "touch test_find_super_nodes/2/2;" + "touch test_find_super_nodes/2/3;" + "touch test_find_super_nodes/2/4;" + "cd test_find_super_nodes;" + "find_super_nodes 4;" + ) + + assert command.get_answer() == TSV( + [ + ["/test_find_super_nodes/1", "5"], + ["/test_find_super_nodes/2", "4"], + ] + ) + + +def test_delete_stale_backups(): + command = keeper_query( + "touch /clickhouse;" + "touch /clickhouse/backups;" + "touch /clickhouse/backups/1;" + "touch /clickhouse/backups/1/stage;" + "touch /clickhouse/backups/1/stage/alive123;" + "touch /clickhouse/backups/2;" + "touch /clickhouse/backups/2/stage;" + "touch /clickhouse/backups/2/stage/dead123;" + "delete_stale_backups;" + "y;" + "ls clickhouse/backups;" + ) + + assert command.get_answer() == ( + "You are going to delete all inactive backups in /clickhouse/backups. Continue?\n" + 'Found backup "/clickhouse/backups/1", checking if it\'s active\n' + 'Backup "/clickhouse/backups/1" is active, not going to delete\n' + 'Found backup "/clickhouse/backups/2", checking if it\'s active\n' + 'Backup "/clickhouse/backups/2" is not active, deleting it\n' + "1\n" + ) + + +def test_base_commands(): + command = keeper_query( + "create test_create_zk_node1 testvalue1;" + "create test_create_zk_node_2 testvalue2;" + "get test_create_zk_node1;" + ) + assert command.get_answer() == "testvalue1\n" -def test_four_letter_word_commands(started_cluster): - _ = started_cluster - - command = CommandRequest( - [ - started_cluster.server_bin_path, - "keeper-client", - "--host", - str(cluster.get_instance_ip("zoo1")), - "--port", - str(cluster.zookeeper_port), - "-q", - "ruok", - ], - stdin="", - ) - +def test_four_letter_word_commands(): + command = keeper_query("ruok") assert command.get_answer() == "imok\n" diff --git a/tests/integration/test_keeper_disks/configs/enable_keeper.xml b/tests/integration/test_keeper_disks/configs/enable_keeper.xml index 5814979229c..50d0329637a 100644 --- a/tests/integration/test_keeper_disks/configs/enable_keeper.xml +++ b/tests/integration/test_keeper_disks/configs/enable_keeper.xml @@ -1,6 +1,10 @@ + + hdfs + hdfs://hdfs1:9000/ + local /var/lib/clickhouse/coordination/logs/ diff --git a/tests/integration/test_keeper_disks/test.py b/tests/integration/test_keeper_disks/test.py index 11bb215be54..86682bcde01 100644 --- a/tests/integration/test_keeper_disks/test.py +++ b/tests/integration/test_keeper_disks/test.py @@ -9,7 +9,11 @@ import os CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "node", main_configs=["configs/enable_keeper.xml"], stay_alive=True, with_minio=True + "node", + main_configs=["configs/enable_keeper.xml"], + stay_alive=True, + with_minio=True, + with_hdfs=True, ) from kazoo.client import KazooClient, KazooState @@ -117,6 +121,12 @@ def get_local_snapshots(): return get_local_files("/var/lib/clickhouse/coordination/snapshots") +def test_supported_disk_types(started_cluster): + node.stop_clickhouse() + node.start_clickhouse() + node.contains_in_log("Disk type 'hdfs' is not supported for Keeper") + + def test_logs_with_disks(started_cluster): setup_local_storage(started_cluster) diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml index b7f9d1b058e..62109ee9092 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper1.xml @@ -1,5 +1,6 @@ + false 9181 1 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml index b773d59f259..2696c573180 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper2.xml @@ -1,5 +1,6 @@ + false 9181 2 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml index d4c2befc10f..fc0c0fd0300 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper3.xml @@ -1,5 +1,6 @@ + false 9181 3 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml index c039e709c9e..06f1c1d7195 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper4.xml @@ -1,5 +1,6 @@ + false 9181 4 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml index fb43b6524c8..5d3767ae969 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper5.xml @@ -1,5 +1,6 @@ + false 9181 5 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml index 430e662bf36..4d30822741a 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper6.xml @@ -1,5 +1,6 @@ + false 9181 6 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml index aa10774ad7d..b59141042ea 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper7.xml @@ -1,5 +1,6 @@ + false 9181 7 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml b/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml index 4f1c21a1084..711d70cb1ac 100644 --- a/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml +++ b/tests/integration/test_keeper_force_recovery/configs/enable_keeper8.xml @@ -1,5 +1,6 @@ + false 9181 8 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml b/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml index eaf0f01afc9..abd4ef85bf7 100644 --- a/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml +++ b/tests/integration/test_keeper_force_recovery/configs/recovered_keeper1.xml @@ -1,5 +1,6 @@ + false 9181 1 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery/configs/use_keeper.xml b/tests/integration/test_keeper_force_recovery/configs/use_keeper.xml deleted file mode 100644 index f41e8c6e49c..00000000000 --- a/tests/integration/test_keeper_force_recovery/configs/use_keeper.xml +++ /dev/null @@ -1,36 +0,0 @@ - - - - node1 - 9181 - - - node2 - 9181 - - - node3 - 9181 - - - node4 - 9181 - - - node5 - 9181 - - - node6 - 9181 - - - node7 - 9181 - - - node8 - 9181 - - - diff --git a/tests/integration/test_keeper_force_recovery/test.py b/tests/integration/test_keeper_force_recovery/test.py index f7c3787b4d8..f630e5a422b 100644 --- a/tests/integration/test_keeper_force_recovery/test.py +++ b/tests/integration/test_keeper_force_recovery/test.py @@ -22,10 +22,7 @@ def get_nodes(): nodes.append( cluster.add_instance( f"node{i+1}", - main_configs=[ - f"configs/enable_keeper{i+1}.xml", - f"configs/use_keeper.xml", - ], + main_configs=[f"configs/enable_keeper{i+1}.xml"], stay_alive=True, ) ) diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml index 441c1bc185d..94e59128bd3 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1.xml @@ -1,5 +1,6 @@ + false 9181 1 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml index f0cb887b062..6367b4b4c29 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper1_solo.xml @@ -1,5 +1,6 @@ + false 1 9181 1 @@ -11,6 +12,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml index e2e2c1fd7db..548d12c2e0a 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper2.xml @@ -1,5 +1,6 @@ + false 9181 2 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml index e2ac0400d88..65f9675cbd6 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml +++ b/tests/integration/test_keeper_force_recovery_single_node/configs/enable_keeper3.xml @@ -1,5 +1,6 @@ + false 9181 3 /var/lib/clickhouse/coordination/log @@ -10,6 +11,7 @@ 10000 75 trace + 200 diff --git a/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml b/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml deleted file mode 100644 index 384e984f210..00000000000 --- a/tests/integration/test_keeper_force_recovery_single_node/configs/use_keeper.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - node1 - 9181 - - - node2 - 9181 - - - node3 - 9181 - - - diff --git a/tests/integration/test_keeper_force_recovery_single_node/test.py b/tests/integration/test_keeper_force_recovery_single_node/test.py index 1c0d5e9a306..132c5488df6 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/test.py +++ b/tests/integration/test_keeper_force_recovery_single_node/test.py @@ -20,10 +20,7 @@ def get_nodes(): nodes.append( cluster.add_instance( f"node{i+1}", - main_configs=[ - f"configs/enable_keeper{i+1}.xml", - f"configs/use_keeper.xml", - ], + main_configs=[f"configs/enable_keeper{i+1}.xml"], stay_alive=True, ) ) diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index b546d879c75..1d5bc6a6541 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -329,7 +329,7 @@ def test_cmd_srvr(started_cluster): assert result["Received"] == "10" assert result["Sent"] == "10" assert int(result["Connections"]) == 1 - assert int(result["Zxid"]) > 10 + assert int(result["Zxid"], 16) > 10 assert result["Mode"] == "leader" assert result["Node count"] == "14" @@ -369,7 +369,7 @@ def test_cmd_stat(started_cluster): assert result["Received"] == "10" assert result["Sent"] == "10" assert int(result["Connections"]) == 1 - assert int(result["Zxid"]) >= 10 + assert int(result["Zxid"], 16) >= 10 assert result["Mode"] == "leader" assert result["Node count"] == "14" diff --git a/tests/integration/test_keeper_nodes_move/test.py b/tests/integration/test_keeper_nodes_move/test.py index 6884ff29607..8ac7bc9b5e2 100644 --- a/tests/integration/test_keeper_nodes_move/test.py +++ b/tests/integration/test_keeper_nodes_move/test.py @@ -1,12 +1,7 @@ #!/usr/bin/env python3 - -#!/usr/bin/env python3 - import pytest from helpers.cluster import ClickHouseCluster -import random -import string import os import time from multiprocessing.dummy import Pool diff --git a/tests/integration/test_keeper_reconfig_add/__init__.py b/tests/integration/test_keeper_reconfig_add/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_add/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_add/configs/keeper1.xml new file mode 100644 index 00000000000..44e2090e9d8 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_add/configs/keeper1.xml @@ -0,0 +1,20 @@ + + + true + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + 1 node1 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_add/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_add/configs/keeper2.xml new file mode 100644 index 00000000000..e9249f7091c --- /dev/null +++ b/tests/integration/test_keeper_reconfig_add/configs/keeper2.xml @@ -0,0 +1,21 @@ + + + true + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + 1 node1 9234 + 2 node2 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_add/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_add/configs/keeper3.xml new file mode 100644 index 00000000000..a7ff1f6de28 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_add/configs/keeper3.xml @@ -0,0 +1,22 @@ + + + true + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + 1 node1 9234 + 2 node2 9234 + 3 node3 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_add/test.py b/tests/integration/test_keeper_reconfig_add/test.py new file mode 100644 index 00000000000..2c2da7403a1 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_add/test.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as ku +import os +from kazoo.client import KazooClient +from kazoo.exceptions import BadArgumentsException + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") + +node1 = cluster.add_instance("node1", main_configs=["configs/keeper1.xml"]) +node2 = cluster.add_instance("node2", stay_alive=True) +node3 = cluster.add_instance("node3", stay_alive=True) + +server_join_msg = "confirms it will join" +part_of_cluster = "now this node is the part of cluster" +zk1, zk2, zk3 = None, None, None + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node2.stop_clickhouse() + node2.copy_file_to_container( + os.path.join(CONFIG_DIR, "keeper2.xml"), + "/etc/clickhouse-server/config.d/keeper.xml", + ) + + node3.stop_clickhouse() + node3.copy_file_to_container( + os.path.join(CONFIG_DIR, "keeper3.xml"), + "/etc/clickhouse-server/config.d/keeper.xml", + ) + + yield cluster + + finally: + for conn in [zk1, zk2, zk3]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def test_reconfig_add(started_cluster): + """ + Add a node to another node. Then add another node to two. + """ + + zk1 = get_fake_zk(node1) + config = ku.get_config_str(zk1) + print("Initial config", config) + + assert len(config.split("\n")) == 1 + assert "node1" in config + assert "node2" not in config + assert "node3" not in config + + with pytest.raises(BadArgumentsException): + # duplicate id with different endpoint + zk1.reconfig(joining="server.1=localhost:1337", leaving=None, new_members=None) + + with pytest.raises(BadArgumentsException): + # duplicate endpoint + zk1.reconfig(joining="server.8=node1:9234", leaving=None, new_members=None) + + for i in range(100): + zk1.create(f"/test_three_{i}", b"somedata") + + node2.start_clickhouse() + config, _ = zk1.reconfig( + joining="server.2=node2:9234", leaving=None, new_members=None + ) + ku.wait_until_connected(cluster, node2) + + config = config.decode("utf-8") + print("After adding 2", config) + + assert len(config.split("\n")) == 2 + assert "node1" in config + assert "node2" in config + assert "node3" not in config + + zk2 = get_fake_zk(node2) + ku.wait_configs_equal(config, zk2) + + for i in range(100): + assert zk2.exists(f"/test_three_{i}") is not None + zk2.create(f"/test_three_{100 + i}", b"somedata") + + # Why not both? + # One node will process add_srv request, other will pull out updated config, apply + # and return true in config update thread (without calling add_srv again) + assert node1.contains_in_log(server_join_msg) or node2.contains_in_log( + server_join_msg + ) + + assert node2.contains_in_log(part_of_cluster) + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_three_0") + + for i in range(200): + assert zk1.exists(f"/test_three_{i}") is not None + + for i in range(100): + zk2.create(f"/test_four_{i}", b"somedata") + + node3.start_clickhouse() + config, _ = zk2.reconfig( + joining="server.3=node3:9234", leaving=None, new_members=None + ) + ku.wait_until_connected(cluster, node3) + + config = config.decode("utf-8") + print("After adding 3", config) + + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + + zk3 = get_fake_zk(node3) + ku.wait_configs_equal(config, zk3) + + for i in range(100): + assert zk3.exists(f"/test_four_{i}") is not None + zk3.create(f"/test_four_{100 + i}", b"somedata") + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_four_0") + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_four_0") + + for i in range(200): + assert zk1.exists(f"/test_four_{i}") is not None + assert zk2.exists(f"/test_four_{i}") is not None + + assert node3.contains_in_log(part_of_cluster) diff --git a/tests/integration/test_keeper_reconfig_remove/__init__.py b/tests/integration/test_keeper_reconfig_remove/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_remove/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_remove/configs/keeper1.xml new file mode 100644 index 00000000000..bbadc2741af --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove/configs/keeper1.xml @@ -0,0 +1,37 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + 0 + + + 3 + node3 + 9234 + true + 0 + + + + diff --git a/tests/integration/test_keeper_reconfig_remove/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_remove/configs/keeper2.xml new file mode 100644 index 00000000000..0191a522a50 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove/configs/keeper2.xml @@ -0,0 +1,37 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + 0 + + + 3 + node3 + 9234 + true + 0 + + + + diff --git a/tests/integration/test_keeper_reconfig_remove/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_remove/configs/keeper3.xml new file mode 100644 index 00000000000..345bf402336 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove/configs/keeper3.xml @@ -0,0 +1,37 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + 0 + + + 3 + node3 + 9234 + true + 0 + + + + diff --git a/tests/integration/test_keeper_reconfig_remove/test.py b/tests/integration/test_keeper_reconfig_remove/test.py new file mode 100644 index 00000000000..fb0a9472df3 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove/test.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as ku +import os +from kazoo.client import KazooClient +from kazoo.exceptions import BadVersionException, BadArgumentsException + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") +node1 = cluster.add_instance("node1", main_configs=["configs/keeper1.xml"]) +node2 = cluster.add_instance("node2", main_configs=["configs/keeper2.xml"]) +node3 = cluster.add_instance("node3", main_configs=["configs/keeper3.xml"]) + +log_msg_removed = "has been removed from the cluster" +zk1, zk2, zk3 = None, None, None + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + for conn in [zk1, zk2, zk3]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +def test_reconfig_remove_followers_from_3(started_cluster): + """ + Remove 1 follower node from cluster of 3. + Then remove another follower from two left nodes. + Check that remaining node is in standalone mode. + """ + + zk1 = get_fake_zk(node1) + config, _ = zk1.get("/keeper/config") + config = config.decode("utf-8") + print("Initial config", config) + + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + + with pytest.raises(BadVersionException): + zk1.reconfig(joining=None, leaving="1", new_members=None, from_config=20) + with pytest.raises(BadArgumentsException): + zk1.reconfig(joining=None, leaving=None, new_members=None) + with pytest.raises(BadArgumentsException): + # bulk reconfiguration is not supported + zk1.reconfig(joining=None, leaving=None, new_members="3") + with pytest.raises(BadArgumentsException): + zk1.reconfig(joining="1", leaving="1", new_members="3") + with pytest.raises(BadArgumentsException): + # at least one node must be left + zk1.reconfig(joining=None, leaving="1,2,3", new_members=None) + + for i in range(100): + zk1.create(f"/test_two_{i}", b"somedata") + + zk2 = get_fake_zk(node2) + zk2.sync("/test_two_0") + ku.wait_configs_equal(config, zk2) + + zk3 = get_fake_zk(node3) + zk3.sync("/test_two_0") + ku.wait_configs_equal(config, zk3) + + for i in range(100): + assert zk2.exists(f"test_two_{i}") is not None + assert zk3.exists(f"test_two_{i}") is not None + + config, _ = zk1.reconfig(joining=None, leaving="3", new_members=None) + config = config.decode("utf-8") + print("After removing 3", config) + + assert len(config.split("\n")) == 2 + assert "node1" in config + assert "node2" in config + assert "node3" not in config + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + ku.wait_configs_equal(config, zk2) + + for i in range(100): + assert zk2.exists(f"test_two_{i}") is not None + zk2.create(f"/test_two_{100 + i}", b"otherdata") + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_two_0") + + for i in range(200): + assert zk1.exists(f"test_two_{i}") is not None + + with pytest.raises(Exception): + zk3.stop() + zk3.close() + zk3 = get_fake_zk(node3) + zk3.sync("/test_two_0") + + assert node3.contains_in_log(log_msg_removed) + + for i in range(100): + zk2.create(f"/test_two_{200 + i}", b"otherdata") + + config, _ = zk1.reconfig(joining=None, leaving="2", new_members=None) + config = config.decode("utf-8") + + print("After removing 2", config) + assert len(config.split("\n")) == 1 + assert "node1" in config + assert "node2" not in config + assert "node3" not in config + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_two_0") + + for i in range(300): + assert zk1.exists(f"test_two_{i}") is not None + + with pytest.raises(Exception): + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_two_0") + + assert not node1.contains_in_log(log_msg_removed) + assert node2.contains_in_log(log_msg_removed) + assert "Mode: standalone" in zk1.command(b"stat") diff --git a/tests/integration/test_keeper_reconfig_remove_many/__init__.py b/tests/integration/test_keeper_reconfig_remove_many/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper1.xml new file mode 100644 index 00000000000..9976169624b --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper1.xml @@ -0,0 +1,47 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper2.xml new file mode 100644 index 00000000000..edc43142464 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper2.xml @@ -0,0 +1,47 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper3.xml new file mode 100644 index 00000000000..8cebcbc0808 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper3.xml @@ -0,0 +1,47 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper4.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper4.xml new file mode 100644 index 00000000000..99ac7e53f30 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper4.xml @@ -0,0 +1,47 @@ + + + 9181 + 4 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/configs/keeper5.xml b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper5.xml new file mode 100644 index 00000000000..92102ad486b --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/configs/keeper5.xml @@ -0,0 +1,47 @@ + + + 9181 + 5 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + 4 + node4 + 9234 + true + + + 5 + node5 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_remove_many/test.py b/tests/integration/test_keeper_reconfig_remove_many/test.py new file mode 100644 index 00000000000..ec0d8b95eff --- /dev/null +++ b/tests/integration/test_keeper_reconfig_remove_many/test.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as ku +import os +from kazoo.client import KazooClient, KazooState +from kazoo.exceptions import BadVersionException, BadArgumentsException + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") + +nodes = [ + cluster.add_instance(f"node{i}", main_configs=[f"configs/keeper{i}.xml"]) + for i in range(1, 6) +] +node1, node2, node3, node4, node5 = nodes + +log_msg_removed = "has been removed from the cluster" +zk1, zk2, zk3, zk4, zk5 = None, None, None, None, None + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + for conn in [zk1, zk2, zk3, zk4, zk5]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +def test_reconfig_remove_2_and_leader(started_cluster): + """ + Remove 2 followers from a cluster of 5. Remove leader from 3 nodes. + """ + + zk1 = get_fake_zk(node1) + config = ku.get_config_str(zk1) + print("Initial config", config) + + assert len(config.split("\n")) == 5 + + for i in range(100): + zk1.create(f"/test_two_{i}", b"somedata") + + zk4 = get_fake_zk(node4) + zk4.sync("/test_two_0") + ku.wait_configs_equal(config, zk4) + + zk5 = get_fake_zk(node5) + zk5.sync("/test_two_0") + ku.wait_configs_equal(config, zk5) + + for i in range(100): + assert zk4.exists(f"test_two_{i}") is not None + assert zk5.exists(f"test_two_{i}") is not None + + zk4.create(f"/test_two_{100 + i}", b"otherdata") + + zk2 = get_fake_zk(node2) + config, _ = zk2.reconfig(joining=None, leaving="4,5", new_members=None) + config = config.decode("utf-8") + + print("After removing 4,5", config) + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + assert "node5" not in config + + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_two_0") + + ku.wait_configs_equal(config, zk1) + + for i in range(200): + assert zk1.exists(f"test_two_{i}") is not None + assert zk2.exists(f"test_two_{i}") is not None + + with pytest.raises(Exception): + zk4.stop() + zk4.close() + zk4 = get_fake_zk(node4) + zk4.sync("/test_two_0") + + with pytest.raises(Exception): + zk5.stop() + zk5.close() + zk5 = get_fake_zk(node5) + zk5.sync("/test_two_0") + + assert not node1.contains_in_log(log_msg_removed) + assert not node2.contains_in_log(log_msg_removed) + assert not node3.contains_in_log(log_msg_removed) + assert node4.contains_in_log(log_msg_removed) + assert node5.contains_in_log(log_msg_removed) + + assert ku.is_leader(cluster, node1) + + for i in range(100): + zk1.create(f"/test_leader_{i}", b"somedata") + + # when a leader gets a remove request, it must yield leadership + config, _ = zk1.reconfig(joining=None, leaving="1", new_members=None) + config = config.decode("utf-8") + print("After removing 1 (leader)", config) + + assert len(config.split("\n")) == 2 + assert "node1" not in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + assert "node5" not in config + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_leader_0") + ku.wait_configs_equal(config, zk2) + + zk3 = get_fake_zk(node3) + zk3.sync("/test_leader_0") + ku.wait_configs_equal(config, zk3) + + for i in range(100): + assert zk2.exists(f"test_leader_{i}") is not None + assert zk3.exists(f"test_leader_{i}") is not None + + with pytest.raises(Exception): + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_leader_0") + + assert node1.contains_in_log(log_msg_removed) + assert not node2.contains_in_log(log_msg_removed) + assert not node3.contains_in_log(log_msg_removed) diff --git a/tests/integration/test_keeper_reconfig_replace_leader/__init__.py b/tests/integration/test_keeper_reconfig_replace_leader/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper1.xml new file mode 100644 index 00000000000..71f3403aca3 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper1.xml @@ -0,0 +1,35 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper2.xml new file mode 100644 index 00000000000..faefb4d1102 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper2.xml @@ -0,0 +1,35 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper3.xml new file mode 100644 index 00000000000..80a9caa92c2 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper3.xml @@ -0,0 +1,35 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper4.xml b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper4.xml new file mode 100644 index 00000000000..9fd88fe5d63 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/configs/keeper4.xml @@ -0,0 +1,21 @@ + + + 9181 + 4 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + 2 node2 9234 + 3 node3 9234 + 4 node4 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader/test.py b/tests/integration/test_keeper_reconfig_replace_leader/test.py new file mode 100644 index 00000000000..ef1d5394b67 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader/test.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +from os.path import join, dirname, realpath +import time +import helpers.keeper_utils as ku +from kazoo.client import KazooClient, KazooState + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = join(dirname(realpath(__file__)), "configs") + +node1 = cluster.add_instance("node1", main_configs=["configs/keeper1.xml"]) +node2 = cluster.add_instance("node2", main_configs=["configs/keeper2.xml"]) +node3 = cluster.add_instance("node3", main_configs=["configs/keeper3.xml"]) +node4 = cluster.add_instance("node4", stay_alive=True) +zk1, zk2, zk3, zk4 = None, None, None, None + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node4.stop_clickhouse() + node4.copy_file_to_container( + join(CONFIG_DIR, "keeper4.xml"), + "/etc/clickhouse-server/config.d/keeper.xml", + ) + + yield cluster + + finally: + for conn in [zk1, zk2, zk3, zk4]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) + + +def test_reconfig_replace_leader(started_cluster): + """ + Remove leader from a cluster of 3 and add a new node via two commands. + """ + + zk1 = get_fake_zk(node1) + config = ku.get_config_str(zk1) + + assert len(config.split("\n")) == 3 + assert "node1" in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + + for i in range(100): + zk1.create(f"/test_four_{i}", b"somedata") + + zk2 = get_fake_zk(node2) + zk2.sync("/test_four_0") + ku.wait_configs_equal(config, zk2) + + zk3 = get_fake_zk(node3) + zk3.sync("/test_four_0") + ku.wait_configs_equal(config, zk3) + + for i in range(100): + assert zk2.exists(f"/test_four_{i}") is not None + assert zk3.exists(f"/test_four_{i}") is not None + + assert ku.is_leader(cluster, node1) + config, _ = zk2.reconfig(joining=None, leaving="1", new_members=None) + config = config.decode("utf-8") + + print("After removing 1 (leader)", config) + assert len(config.split("\n")) == 2 + assert "node1" not in config + assert "node2" in config + assert "node3" in config + assert "node4" not in config + + ku.wait_configs_equal(config, zk2) + + with pytest.raises(Exception): + zk1.stop() + zk1.close() + zk1 = get_fake_zk(node1) + zk1.sync("/test_four_0") + + node4.start_clickhouse() + config, _ = zk2.reconfig( + joining="server.4=node4:9234", leaving=None, new_members=None + ) + config = config.decode("utf-8") + ku.wait_until_connected(cluster, node4) + + print("After adding 4", config) + assert len(config.split("\n")) == 3 + assert "node1" not in config + assert "node2" in config + assert "node3" in config + assert "node4" in config + + zk4 = get_fake_zk(node4) + ku.wait_configs_equal(config, zk4) + + for i in range(100): + assert zk4.exists(f"test_four_{i}") is not None + zk4.create(f"/test_four_{100 + i}", b"somedata") + + zk2.stop() + zk2.close() + zk2 = get_fake_zk(node2) + zk2.sync("/test_four_0") + ku.wait_configs_equal(config, zk2) + + zk3.stop() + zk3.close() + zk3 = get_fake_zk(node3) + zk3.sync("/test_four_0") + ku.wait_configs_equal(config, zk3) + + for i in range(200): + assert zk2.exists(f"test_four_{i}") is not None + assert zk3.exists(f"test_four_{i}") is not None diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/__init__.py b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper1.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper1.xml new file mode 100644 index 00000000000..71f3403aca3 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper1.xml @@ -0,0 +1,35 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper2.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper2.xml new file mode 100644 index 00000000000..faefb4d1102 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper2.xml @@ -0,0 +1,35 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper3.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper3.xml new file mode 100644 index 00000000000..80a9caa92c2 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper3.xml @@ -0,0 +1,35 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper4.xml b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper4.xml new file mode 100644 index 00000000000..9fd88fe5d63 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/configs/keeper4.xml @@ -0,0 +1,21 @@ + + + 9181 + 4 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + true + + + 5000 + 10000 + trace + + + + 2 node2 9234 + 3 node3 9234 + 4 node4 9234 + + + diff --git a/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py new file mode 100644 index 00000000000..b099d0513e1 --- /dev/null +++ b/tests/integration/test_keeper_reconfig_replace_leader_in_one_command/test.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +from os.path import join, dirname, realpath +import time +import helpers.keeper_utils as ku +from kazoo.client import KazooClient, KazooState + +cluster = ClickHouseCluster(__file__) +CONFIG_DIR = join(dirname(realpath(__file__)), "configs") + +node1 = cluster.add_instance("node1", main_configs=["configs/keeper1.xml"]) +node2 = cluster.add_instance("node2", main_configs=["configs/keeper2.xml"]) +node3 = cluster.add_instance("node3", main_configs=["configs/keeper3.xml"]) +node4 = cluster.add_instance("node4", stay_alive=True) +zk1, zk2, zk3, zk4 = None, None, None, None + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node4.stop_clickhouse() + node4.copy_file_to_container( + join(CONFIG_DIR, "keeper4.xml"), + "/etc/clickhouse-server/config.d/keeper.xml", + ) + + yield cluster + + finally: + for conn in [zk1, zk2, zk3, zk4]: + if conn: + conn.stop() + conn.close() + + cluster.shutdown() + + +def get_fake_zk(node): + return ku.get_fake_zk(cluster, node) diff --git a/tests/integration/test_keeper_s3_snapshot/test.py b/tests/integration/test_keeper_s3_snapshot/test.py index 3e19bc4822c..84ffc964621 100644 --- a/tests/integration/test_keeper_s3_snapshot/test.py +++ b/tests/integration/test_keeper_s3_snapshot/test.py @@ -1,6 +1,7 @@ import pytest from helpers.cluster import ClickHouseCluster from time import sleep +from retry import retry from kazoo.client import KazooClient @@ -88,15 +89,19 @@ def test_s3_upload(started_cluster): for obj in list(cluster.minio_client.list_objects("snapshots")) ] - saved_snapshots = get_saved_snapshots() - assert set(saved_snapshots) == set( - [ - "snapshot_50.bin.zstd", - "snapshot_100.bin.zstd", - "snapshot_150.bin.zstd", - "snapshot_200.bin.zstd", - ] - ) + # Keeper sends snapshots asynchornously, hence we need to retry. + @retry(AssertionError, tries=10, delay=2) + def _check_snapshots(): + assert set(get_saved_snapshots()) == set( + [ + "snapshot_50.bin.zstd", + "snapshot_100.bin.zstd", + "snapshot_150.bin.zstd", + "snapshot_200.bin.zstd", + ] + ) + + _check_snapshots() destroy_zk_client(node1_zk) node1.stop_clickhouse(kill=True) @@ -108,9 +113,11 @@ def test_s3_upload(started_cluster): for _ in range(200): node2_zk.create("/test", sequence=True) - saved_snapshots = get_saved_snapshots() + @retry(AssertionError, tries=10, delay=2) + def _check_snapshots_without_quorum(): + assert len(get_saved_snapshots()) > 4 - assert len(saved_snapshots) > 4 + _check_snapshots_without_quorum() success_upload_message = "Successfully uploaded" assert node2.contains_in_log(success_upload_message) or node3.contains_in_log( diff --git a/tests/integration/test_keeper_session/test.py b/tests/integration/test_keeper_session/test.py index e57057a8258..cd012ad6e9e 100644 --- a/tests/integration/test_keeper_session/test.py +++ b/tests/integration/test_keeper_session/test.py @@ -6,6 +6,7 @@ import socket import struct from kazoo.client import KazooClient +from kazoo.exceptions import NoNodeError # from kazoo.protocol.serialization import Connect, read_buffer, write_buffer @@ -162,17 +163,40 @@ def test_session_timeout(started_cluster): def test_session_close_shutdown(started_cluster): wait_nodes() - node1_zk = get_fake_zk(node1.name) - node2_zk = get_fake_zk(node2.name) + node1_zk = None + node2_zk = None + for i in range(20): + node1_zk = get_fake_zk(node1.name) + node2_zk = get_fake_zk(node2.name) - eph_node = "/test_node" - node2_zk.create(eph_node, ephemeral=True) - node1_zk.sync(eph_node) - assert node1_zk.exists(eph_node) != None + eph_node = "/test_node" + node2_zk.create(eph_node, ephemeral=True) + node1_zk.sync(eph_node) - # shutdown while session is active - node2.stop_clickhouse() + node1_zk.exists(eph_node) != None - assert node1_zk.exists(eph_node) == None + # restart while session is active so it's closed during shutdown + node2.restart_clickhouse() - node2.start_clickhouse() + if node1_zk.exists(eph_node) == None: + break + + assert node2.contains_in_log( + "Sessions cannot be closed during shutdown because there is no active leader" + ) + + try: + node1_zk.delete(eph_node) + except NoNodeError: + pass + + assert node1_zk.exists(eph_node) == None + + destroy_zk_client(node1_zk) + node1_zk = None + destroy_zk_client(node2_zk) + node2_zk = None + + time.sleep(1) + else: + assert False, "Session wasn't properly cleaned up on shutdown" diff --git a/tests/integration/test_lost_part/test.py b/tests/integration/test_lost_part/test.py index 0bc24268040..382539df7de 100644 --- a/tests/integration/test_lost_part/test.py +++ b/tests/integration/test_lost_part/test.py @@ -39,257 +39,261 @@ def test_lost_part_same_replica(start_cluster): node1.query("DROP TABLE IF EXISTS mt0 SYNC") node2.query("DROP TABLE IF EXISTS mt0 SYNC") - for node in [node1, node2]: - node.query( - f"CREATE TABLE mt0 (id UInt64, date Date) ENGINE ReplicatedMergeTree('/clickhouse/tables/t', '{node.name}') ORDER BY tuple() PARTITION BY date " - "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," - "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" - ) + try: + for node in [node1, node2]: + node.query( + f"CREATE TABLE mt0 (id UInt64, date Date) ENGINE ReplicatedMergeTree('/clickhouse/tables/t', '{node.name}') ORDER BY tuple() PARTITION BY date " + "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," + "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + ) - node1.query("SYSTEM STOP MERGES mt0") - node2.query("SYSTEM STOP REPLICATION QUEUES") + node1.query("SYSTEM STOP MERGES mt0") + node2.query("SYSTEM STOP REPLICATION QUEUES") - for i in range(5): - node1.query(f"INSERT INTO mt0 VALUES ({i}, toDate('2020-10-01'))") + for i in range(5): + node1.query(f"INSERT INTO mt0 VALUES ({i}, toDate('2020-10-01'))") - for i in range(20): - parts_to_merge = node1.query( - "SELECT parts_to_merge FROM system.replication_queue WHERE table='mt0' AND length(parts_to_merge) > 0" - ) - if parts_to_merge: - parts_list = list(sorted(ast.literal_eval(parts_to_merge))) - print("Got parts list", parts_list) - if len(parts_list) < 3: - raise Exception(f"Got too small parts list {parts_list}") - break - time.sleep(1) + for i in range(20): + parts_to_merge = node1.query( + "SELECT parts_to_merge FROM system.replication_queue WHERE table='mt0' AND length(parts_to_merge) > 0" + ) + if parts_to_merge: + parts_list = list(sorted(ast.literal_eval(parts_to_merge))) + print("Got parts list", parts_list) + if len(parts_list) < 3: + raise Exception(f"Got too small parts list {parts_list}") + break + time.sleep(1) - victim_part_from_the_middle = random.choice(parts_list[1:-1]) - print("Will corrupt part", victim_part_from_the_middle) + victim_part_from_the_middle = random.choice(parts_list[1:-1]) + print("Will corrupt part", victim_part_from_the_middle) - remove_part_from_disk(node1, "mt0", victim_part_from_the_middle) + remove_part_from_disk(node1, "mt0", victim_part_from_the_middle) - node1.query("DETACH TABLE mt0") + node1.query("DETACH TABLE mt0") - node1.query("ATTACH TABLE mt0") + node1.query("ATTACH TABLE mt0") - node1.query("SYSTEM START MERGES mt0") - res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt0") - print("result: ", res) - print("error: ", res) + node1.query("SYSTEM START MERGES mt0") + res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt0") + print("result: ", res) + print("error: ", res) - for i in range(10): - result = node1.query("SELECT count() FROM system.replication_queue") - if int(result) == 0: - break - time.sleep(1) - else: - assert False, "Still have something in replication queue:\n" + node1.query( - "SELECT count() FROM system.replication_queue FORMAT Vertical" - ) + for i in range(10): + result = node1.query("SELECT count() FROM system.replication_queue") + if int(result) == 0: + break + time.sleep(1) + else: + assert False, "Still have something in replication queue:\n" + node1.query( + "SELECT count() FROM system.replication_queue FORMAT Vertical" + ) - assert node1.contains_in_log( - "Created empty part" - ), f"Seems like empty part {victim_part_from_the_middle} is not created or log message changed" + assert node1.contains_in_log( + "Created empty part" + ), f"Seems like empty part {victim_part_from_the_middle} is not created or log message changed" - assert node1.query("SELECT COUNT() FROM mt0") == "4\n" + assert node1.query("SELECT COUNT() FROM mt0") == "4\n" - node2.query("SYSTEM START REPLICATION QUEUES") + node2.query("SYSTEM START REPLICATION QUEUES") - assert_eq_with_retry(node2, "SELECT COUNT() FROM mt0", "4") - assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") - - node1.query("DROP TABLE IF EXISTS mt0 SYNC") - node2.query("DROP TABLE IF EXISTS mt0 SYNC") + assert_eq_with_retry(node2, "SELECT COUNT() FROM mt0", "4") + assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") + finally: + node1.query("DROP TABLE IF EXISTS mt0 SYNC") + node2.query("DROP TABLE IF EXISTS mt0 SYNC") def test_lost_part_other_replica(start_cluster): node1.query("DROP TABLE IF EXISTS mt1 SYNC") node2.query("DROP TABLE IF EXISTS mt1 SYNC") - for node in [node1, node2]: - node.query( - f"CREATE TABLE mt1 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', '{node.name}') ORDER BY tuple() " - "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," - "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + try: + for node in [node1, node2]: + node.query( + f"CREATE TABLE mt1 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t1', '{node.name}') ORDER BY tuple() " + "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," + "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + ) + + node1.query("SYSTEM STOP MERGES mt1") + node2.query("SYSTEM STOP REPLICATION QUEUES") + + for i in range(5): + node1.query(f"INSERT INTO mt1 VALUES ({i})") + + for i in range(20): + parts_to_merge = node1.query( + "SELECT parts_to_merge FROM system.replication_queue WHERE table='mt1' AND length(parts_to_merge) > 0" + ) + if parts_to_merge: + parts_list = list(sorted(ast.literal_eval(parts_to_merge))) + print("Got parts list", parts_list) + if len(parts_list) < 3: + raise Exception("Got too small parts list {}".format(parts_list)) + break + time.sleep(1) + + victim_part_from_the_middle = random.choice(parts_list[1:-1]) + print("Will corrupt part", victim_part_from_the_middle) + + remove_part_from_disk(node1, "mt1", victim_part_from_the_middle) + + # other way to detect broken parts + node1.query("CHECK TABLE mt1") + + node2.query("SYSTEM START REPLICATION QUEUES") + res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt1") + print("result: ", res) + print("error: ", res) + + for i in range(10): + result = node2.query("SELECT count() FROM system.replication_queue") + if int(result) == 0: + break + time.sleep(1) + else: + assert False, "Still have something in replication queue:\n" + node2.query( + "SELECT * FROM system.replication_queue FORMAT Vertical" + ) + + assert node1.contains_in_log( + "Created empty part" + ), "Seems like empty part {} is not created or log message changed".format( + victim_part_from_the_middle ) - node1.query("SYSTEM STOP MERGES mt1") - node2.query("SYSTEM STOP REPLICATION QUEUES") + assert_eq_with_retry(node2, "SELECT COUNT() FROM mt1", "4") + assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") - for i in range(5): - node1.query(f"INSERT INTO mt1 VALUES ({i})") + node1.query("SYSTEM START MERGES mt1") - for i in range(20): - parts_to_merge = node1.query( - "SELECT parts_to_merge FROM system.replication_queue WHERE table='mt1' AND length(parts_to_merge) > 0" - ) - if parts_to_merge: - parts_list = list(sorted(ast.literal_eval(parts_to_merge))) - print("Got parts list", parts_list) - if len(parts_list) < 3: - raise Exception("Got too small parts list {}".format(parts_list)) - break - time.sleep(1) - - victim_part_from_the_middle = random.choice(parts_list[1:-1]) - print("Will corrupt part", victim_part_from_the_middle) - - remove_part_from_disk(node1, "mt1", victim_part_from_the_middle) - - # other way to detect broken parts - node1.query("CHECK TABLE mt1") - - node2.query("SYSTEM START REPLICATION QUEUES") - res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt1") - print("result: ", res) - print("error: ", res) - - for i in range(10): - result = node2.query("SELECT count() FROM system.replication_queue") - if int(result) == 0: - break - time.sleep(1) - else: - assert False, "Still have something in replication queue:\n" + node2.query( - "SELECT * FROM system.replication_queue FORMAT Vertical" - ) - - assert node1.contains_in_log( - "Created empty part" - ), "Seems like empty part {} is not created or log message changed".format( - victim_part_from_the_middle - ) - - assert_eq_with_retry(node2, "SELECT COUNT() FROM mt1", "4") - assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") - - node1.query("SYSTEM START MERGES mt1") - - assert_eq_with_retry(node1, "SELECT COUNT() FROM mt1", "4") - assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") - - node1.query("DROP TABLE IF EXISTS mt1 SYNC") - node2.query("DROP TABLE IF EXISTS mt1 SYNC") + assert_eq_with_retry(node1, "SELECT COUNT() FROM mt1", "4") + assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") + finally: + node1.query("DROP TABLE IF EXISTS mt1 SYNC") + node2.query("DROP TABLE IF EXISTS mt1 SYNC") def test_lost_part_mutation(start_cluster): node1.query("DROP TABLE IF EXISTS mt2 SYNC") node2.query("DROP TABLE IF EXISTS mt2 SYNC") - for node in [node1, node2]: - node.query( - f"CREATE TABLE mt2 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t2', '{node.name}') ORDER BY tuple() " - "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," - "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + try: + for node in [node1, node2]: + node.query( + f"CREATE TABLE mt2 (id UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/t2', '{node.name}') ORDER BY tuple() " + "SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," + "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + ) + + node1.query("SYSTEM STOP MERGES mt2") + node2.query("SYSTEM STOP REPLICATION QUEUES") + + for i in range(2): + node1.query(f"INSERT INTO mt2 VALUES ({i})") + + node1.query( + "ALTER TABLE mt2 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} ) - node1.query("SYSTEM STOP MERGES mt2") - node2.query("SYSTEM STOP REPLICATION QUEUES") + for i in range(20): + parts_to_mutate = node1.query( + "SELECT count() FROM system.replication_queue WHERE table='mt2'" + ) + # two mutations for both replicas + if int(parts_to_mutate) == 4: + break + time.sleep(1) - for i in range(2): - node1.query(f"INSERT INTO mt2 VALUES ({i})") + remove_part_from_disk(node1, "mt2", "all_1_1_0") - node1.query( - "ALTER TABLE mt2 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} - ) + # other way to detect broken parts + node1.query("CHECK TABLE mt2") - for i in range(20): - parts_to_mutate = node1.query( - "SELECT count() FROM system.replication_queue WHERE table='mt2'" - ) - # two mutations for both replicas - if int(parts_to_mutate) == 4: - break - time.sleep(1) + node1.query("SYSTEM START MERGES mt2") + res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt2") + print("result: ", res) + print("error: ", res) - remove_part_from_disk(node1, "mt2", "all_1_1_0") + for i in range(10): + result = node1.query("SELECT count() FROM system.replication_queue") + if int(result) == 0: + break + time.sleep(1) + else: + assert False, "Still have something in replication queue:\n" + node1.query( + "SELECT * FROM system.replication_queue FORMAT Vertical" + ) - # other way to detect broken parts - node1.query("CHECK TABLE mt2") + assert_eq_with_retry(node1, "SELECT COUNT() FROM mt2", "1") + assert_eq_with_retry(node1, "SELECT SUM(id) FROM mt2", "777") + assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") - node1.query("SYSTEM START MERGES mt2") - res, err = node1.query_and_get_answer_with_error("SYSTEM SYNC REPLICA mt2") - print("result: ", res) - print("error: ", res) + node2.query("SYSTEM START REPLICATION QUEUES") - for i in range(10): - result = node1.query("SELECT count() FROM system.replication_queue") - if int(result) == 0: - break - time.sleep(1) - else: - assert False, "Still have something in replication queue:\n" + node1.query( - "SELECT * FROM system.replication_queue FORMAT Vertical" - ) - - assert_eq_with_retry(node1, "SELECT COUNT() FROM mt2", "1") - assert_eq_with_retry(node1, "SELECT SUM(id) FROM mt2", "777") - assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") - - node2.query("SYSTEM START REPLICATION QUEUES") - - assert_eq_with_retry(node2, "SELECT COUNT() FROM mt2", "1") - assert_eq_with_retry(node2, "SELECT SUM(id) FROM mt2", "777") - assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") - - node1.query("DROP TABLE IF EXISTS mt2 SYNC") - node2.query("DROP TABLE IF EXISTS mt2 SYNC") + assert_eq_with_retry(node2, "SELECT COUNT() FROM mt2", "1") + assert_eq_with_retry(node2, "SELECT SUM(id) FROM mt2", "777") + assert_eq_with_retry(node2, "SELECT COUNT() FROM system.replication_queue", "0") + finally: + node1.query("DROP TABLE IF EXISTS mt2 SYNC") + node2.query("DROP TABLE IF EXISTS mt2 SYNC") def test_lost_last_part(start_cluster): node1.query("DROP TABLE IF EXISTS mt3 SYNC") node2.query("DROP TABLE IF EXISTS mt3 SYNC") - for node in [node1, node2]: - node.query( - f"CREATE TABLE mt3 (id UInt64, p String) ENGINE ReplicatedMergeTree('/clickhouse/tables/t3', '{node.name}') " - "ORDER BY tuple() PARTITION BY p SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," - "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + try: + for node in [node1, node2]: + node.query( + f"CREATE TABLE mt3 (id UInt64, p String) ENGINE ReplicatedMergeTree('/clickhouse/tables/t3', '{node.name}') " + "ORDER BY tuple() PARTITION BY p SETTINGS cleanup_delay_period=1, cleanup_delay_period_random_add=1, cleanup_thread_preferred_points_per_iteration=0," + "merge_selecting_sleep_ms=100, max_merge_selecting_sleep_ms=1000" + ) + + node1.query("SYSTEM STOP MERGES mt3") + node2.query("SYSTEM STOP REPLICATION QUEUES") + + for i in range(1): + node1.query(f"INSERT INTO mt3 VALUES ({i}, 'x')") + + # actually not important + node1.query( + "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} ) - node1.query("SYSTEM STOP MERGES mt3") - node2.query("SYSTEM STOP REPLICATION QUEUES") + partition_id = node1.query("select partitionId('x')").strip() + remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0") - for i in range(1): - node1.query(f"INSERT INTO mt3 VALUES ({i}, 'x')") + # other way to detect broken parts + node1.query("CHECK TABLE mt3") - # actually not important - node1.query( - "ALTER TABLE mt3 UPDATE id = 777 WHERE 1", settings={"mutations_sync": "0"} - ) + node1.query("SYSTEM START MERGES mt3") - partition_id = node1.query("select partitionId('x')").strip() - remove_part_from_disk(node1, "mt3", f"{partition_id}_0_0_0") + for i in range(100): + result = node1.query( + "SELECT count() FROM system.replication_queue WHERE table='mt3'" + ) + assert int(result) <= 2, "Have a lot of entries in queue {}".format( + node1.query("SELECT * FROM system.replication_queue FORMAT Vertical") + ) + if node1.contains_in_log( + "Cannot create empty part" + ) and node1.contains_in_log("DROP/DETACH PARTITION"): + break + if node1.contains_in_log( + "Created empty part 8b8f0fede53df97513a9fb4cb19dc1e4_0_0_0 " + ): + break + time.sleep(0.5) + else: + assert False, "Don't have required messages in node1 log" - # other way to detect broken parts - node1.query("CHECK TABLE mt3") + node1.query(f"ALTER TABLE mt3 DROP PARTITION ID '{partition_id}'") - node1.query("SYSTEM START MERGES mt3") - - for i in range(10): - result = node1.query( - "SELECT count() FROM system.replication_queue WHERE table='mt3'" - ) - assert int(result) <= 2, "Have a lot of entries in queue {}".format( - node1.query("SELECT * FROM system.replication_queue FORMAT Vertical") - ) - if node1.contains_in_log("Cannot create empty part") and node1.contains_in_log( - "DROP/DETACH PARTITION" - ): - break - if node1.contains_in_log( - "Created empty part 8b8f0fede53df97513a9fb4cb19dc1e4_0_0_0 " - ): - break - time.sleep(1) - else: - assert False, "Don't have required messages in node1 log" - - node1.query(f"ALTER TABLE mt3 DROP PARTITION ID '{partition_id}'") - - assert_eq_with_retry(node1, "SELECT COUNT() FROM mt3", "0") - assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") - - node1.query("DROP TABLE IF EXISTS mt3 SYNC") - node2.query("DROP TABLE IF EXISTS mt3 SYNC") + assert_eq_with_retry(node1, "SELECT COUNT() FROM mt3", "0") + assert_eq_with_retry(node1, "SELECT COUNT() FROM system.replication_queue", "0") + finally: + node1.query("DROP TABLE IF EXISTS mt3 SYNC") + node2.query("DROP TABLE IF EXISTS mt3 SYNC") diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index 8cf9e67bf63..e64884707c1 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -13,25 +13,36 @@ from multiprocessing.dummy import Pool from helpers.test_tools import assert_eq_with_retry -def check_query(clickhouse_node, query, result_set, retry_count=10, interval_seconds=3): - lastest_result = "" +def check_query( + clickhouse_node, + query, + result_set, + retry_count=30, + interval_seconds=1, + on_failure=None, +): + latest_result = "" + if "/* expect: " not in query: + query = "/* expect: " + result_set.rstrip("\n") + "*/ " + query for i in range(retry_count): try: - lastest_result = clickhouse_node.query(query) - if result_set == lastest_result: + latest_result = clickhouse_node.query(query) + if result_set == latest_result: return - logging.debug(f"latest_result {lastest_result}") + logging.debug(f"latest_result {latest_result}") time.sleep(interval_seconds) except Exception as e: logging.debug(f"check_query retry {i+1} exception {e}") time.sleep(interval_seconds) else: - result_got = clickhouse_node.query(query) + latest_result = clickhouse_node.query(query) + if on_failure is not None and latest_result != result_set: + on_failure(latest_result, result_set) assert ( - result_got == result_set - ), f"Got result {result_got}, while expected result {result_set}" + latest_result == result_set + ), f"Got result '{latest_result}', expected result '{result_set}'" def dml_with_materialized_mysql_database(clickhouse_node, mysql_node, service_name): @@ -980,6 +991,89 @@ def query_event_with_empty_transaction(clickhouse_node, mysql_node, service_name mysql_node.query("DROP DATABASE test_database_event") +def text_blob_with_charset_test(clickhouse_node, mysql_node, service_name): + db = "text_blob_with_charset_test" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db} DEFAULT CHARACTER SET 'utf8'") + + mysql_node.query( + f"CREATE TABLE {db}.test_table_1 (a INT NOT NULL PRIMARY KEY, b text CHARACTER SET gbk, c tinytext CHARSET big5, d longtext, e varchar(256), f char(4)) ENGINE = InnoDB DEFAULT CHARSET=gbk" + ) + mysql_node.query( + f"CREATE TABLE {db}.test_table_2 (a INT NOT NULL PRIMARY KEY, b blob, c longblob) ENGINE = InnoDB DEFAULT CHARSET=gbk" + ) + mysql_node.query( + f"CREATE TABLE {db}.test_table_3 (a INT NOT NULL PRIMARY KEY, b text CHARACTER SET gbk, c tinytext CHARSET gbk, d tinytext CHARSET big5, e varchar(256), f char(4)) ENGINE = InnoDB" + ) + + mysql_node.query( + f"INSERT INTO {db}.test_table_1 VALUES (1, '你好', '世界', '哈罗', '您Hi您', '您Hi您')" + ) + mysql_node.query( + f"INSERT INTO {db}.test_table_2 VALUES (1, '你好', 0xFAAA00000000000DDCC)" + ) + mysql_node.query( + f"INSERT INTO {db}.test_table_3 VALUES (1, '你好', '世界', 'hello', '您Hi您', '您Hi您')" + ) + + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + assert db in clickhouse_node.query("SHOW DATABASES") + + # from full replication + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "test_table_1\ntest_table_2\ntest_table_3\n", + ) + check_query( + clickhouse_node, + f"SELECT b, c, d, e, f FROM {db}.test_table_1 WHERE a = 1 FORMAT TSV", + "你好\t世界\t哈罗\t您Hi您\t您Hi您\n", + ) + check_query( + clickhouse_node, + f"SELECT hex(b), hex(c) FROM {db}.test_table_2 WHERE a = 1 FORMAT TSV", + "E4BDA0E5A5BD\t0FAAA00000000000DDCC\n", + ) + check_query( + clickhouse_node, + f"SELECT b, c, d, e, f FROM {db}.test_table_3 WHERE a = 1 FORMAT TSV", + "你好\t世界\thello\t您Hi您\t您Hi您\n", + ) + + # from increment replication + mysql_node.query( + f"INSERT INTO {db}.test_table_1 VALUES (2, '你好', '世界', '哈罗', '您Hi您', '您Hi您')" + ) + mysql_node.query( + f"INSERT INTO {db}.test_table_2 VALUES (2, '你好', 0xFAAA00000000000DDCC)" + ) + mysql_node.query( + f"INSERT INTO {db}.test_table_3 VALUES (2, '你好', '世界', 'hello', '您Hi您', '您Hi您')" + ) + + check_query( + clickhouse_node, + f"SELECT b, c, d, e, f FROM {db}.test_table_1 WHERE a = 2 FORMAT TSV", + "你好\t世界\t哈罗\t您Hi您\t您Hi您\n", + ) + check_query( + clickhouse_node, + f"SELECT hex(b), hex(c) FROM {db}.test_table_2 WHERE a = 2 FORMAT TSV", + "E4BDA0E5A5BD\t0FAAA00000000000DDCC\n", + ) + check_query( + clickhouse_node, + f"SELECT b, c, d, e, f FROM {db}.test_table_3 WHERE a = 2 FORMAT TSV", + "你好\t世界\thello\t您Hi您\t您Hi您\n", + ) + clickhouse_node.query(f"DROP DATABASE {db}") + mysql_node.query(f"DROP DATABASE {db}") + + def select_without_columns(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE IF EXISTS db") clickhouse_node.query("DROP DATABASE IF EXISTS db") @@ -992,6 +1086,7 @@ def select_without_columns(clickhouse_node, mysql_node, service_name): ) check_query(clickhouse_node, "SHOW TABLES FROM db FORMAT TSV", "t\n") clickhouse_node.query("SYSTEM STOP MERGES db.t") + clickhouse_node.query("DROP VIEW IF EXISTS v") clickhouse_node.query("CREATE VIEW v AS SELECT * FROM db.t") mysql_node.query("INSERT INTO db.t VALUES (1, 1), (2, 2)") mysql_node.query("DELETE FROM db.t WHERE a = 2;") @@ -1154,7 +1249,7 @@ def err_sync_user_privs_with_materialized_mysql_database( ) assert "priv_err_db" in clickhouse_node.query("SHOW DATABASES") assert "test_table_1" not in clickhouse_node.query("SHOW TABLES FROM priv_err_db") - clickhouse_node.query_with_retry("DETACH DATABASE priv_err_db") + clickhouse_node.query_with_retry("DETACH DATABASE priv_err_db SYNC") mysql_node.query("REVOKE SELECT ON priv_err_db.* FROM 'test'@'%'") time.sleep(3) @@ -1347,7 +1442,7 @@ def mysql_kill_sync_thread_restore_test(clickhouse_node, mysql_node, service_nam time.sleep(sleep_time) clickhouse_node.query("SELECT * FROM test_database.test_table") - clickhouse_node.query_with_retry("DETACH DATABASE test_database") + clickhouse_node.query_with_retry("DETACH DATABASE test_database SYNC") clickhouse_node.query("ATTACH DATABASE test_database") check_query( clickhouse_node, @@ -1411,7 +1506,7 @@ def mysql_killed_while_insert(clickhouse_node, mysql_node, service_name): mysql_node.alloc_connection() - clickhouse_node.query_with_retry("DETACH DATABASE kill_mysql_while_insert") + clickhouse_node.query_with_retry("DETACH DATABASE kill_mysql_while_insert SYNC") clickhouse_node.query("ATTACH DATABASE kill_mysql_while_insert") result = mysql_node.query_and_get_data( @@ -1497,6 +1592,134 @@ def utf8mb4_test(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE utf8mb4_test") +def utf8mb4_column_test(clickhouse_node, mysql_node, service_name): + db = "utf8mb4_column_test" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + + # Full sync + mysql_node.query(f"CREATE TABLE {db}.unquoted (id INT primary key, 日期 DATETIME)") + mysql_node.query(f"CREATE TABLE {db}.quoted (id INT primary key, `日期` DATETIME)") + mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.quoted VALUES(1, now())") + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + + # Full sync replicated unquoted columns names since they use SHOW CREATE TABLE + # which returns quoted column names + check_query( + clickhouse_node, + f"/* expect: quoted unquoted */ SHOW TABLES FROM {db}", + "quoted\nunquoted\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted", + "1\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted", + "1\n", + ) + + # Inc sync + mysql_node.query( + f"CREATE TABLE {db}.unquoted_new (id INT primary key, 日期 DATETIME)" + ) + mysql_node.query( + f"CREATE TABLE {db}.quoted_new (id INT primary key, `日期` DATETIME)" + ) + mysql_node.query(f"INSERT INTO {db}.unquoted_new VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.quoted_new VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(2, now())") + mysql_node.query(f"INSERT INTO {db}.quoted VALUES(2, now())") + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM {db}.quoted", + "2\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted_new", + "1\n", + ) + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM {db}.unquoted", + "2\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted_new", + "1\n", + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") + mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`") + + +def utf8mb4_name_test(clickhouse_node, mysql_node, service_name): + db = "您Hi您" + table = "日期" + mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`") + clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") + mysql_node.query(f"CREATE DATABASE `{db}`") + mysql_node.query( + f"CREATE TABLE `{db}`.`{table}` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO `{db}`.`{table}` VALUES(1, now())") + mysql_node.query(f"INSERT INTO `{db}`.`{table}`(id, `{table}`) VALUES(2, now())") + mysql_node.query( + f"CREATE TABLE {db}.{table}_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO {db}.{table}_unquoted VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.{table}_unquoted(id, {table}) VALUES(2, now())") + clickhouse_node.query( + f"CREATE DATABASE `{db}` ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}`", + "2\n", + ) + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}_unquoted`", + "2\n", + ) + + # Inc sync + mysql_node.query( + f"CREATE TABLE `{db}`.`{table}2` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO `{db}`.`{table}2` VALUES(1, now())") + mysql_node.query(f"INSERT INTO `{db}`.`{table}2`(id, `{table}`) VALUES(2, now())") + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}2`", + "2\n", + ) + + mysql_node.query( + f"CREATE TABLE {db}.{table}2_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO {db}.{table}2_unquoted VALUES(1, now())") + mysql_node.query( + f"INSERT INTO {db}.{table}2_unquoted(id, {table}) VALUES(2, now())" + ) + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM `{db}`.`{table}2_unquoted`", + "2\n", + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") + mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`") + + def system_parts_test(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE IF EXISTS system_parts_test") clickhouse_node.query("DROP DATABASE IF EXISTS system_parts_test") @@ -1617,6 +1840,41 @@ def materialized_with_column_comments_test(clickhouse_node, mysql_node, service_ mysql_node.query("DROP DATABASE materialized_with_column_comments_test") +def double_quoted_comment(clickhouse_node, mysql_node, service_name): + db = "comment_db" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + mysql_node.query( + f'CREATE TABLE {db}.t1 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")' + ) + mysql_node.query( + f"CREATE TABLE {db}.t2 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')" + ) + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t1\nt2\n", + ) + + # incremental + mysql_node.query( + f'CREATE TABLE {db}.t3 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")' + ) + mysql_node.query( + f"CREATE TABLE {db}.t4 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')" + ) + check_query( + clickhouse_node, f"SHOW TABLES FROM {db} FORMAT TSV", "t1\nt2\nt3\nt4\n" + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + + def materialized_with_enum8_test(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test") clickhouse_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test") @@ -2334,5 +2592,48 @@ def named_collections(clickhouse_node, mysql_node, service_name): f"/* expect: (1, 'a', 1), (2, 'b', 2) */ SELECT * FROM {db}.t1", "1\ta\t1\n2\tb\t2\n", ) + clickhouse_node.query(f"ALTER NAMED COLLECTION {db} SET port=9999") + clickhouse_node.query_with_retry(f"DETACH DATABASE {db} SYNC") + mysql_node.query(f"INSERT INTO {db}.t1 VALUES (3, 'c', 3)") + assert "ConnectionFailed:" in clickhouse_node.query_and_get_error( + f"ATTACH DATABASE {db}" + ) + clickhouse_node.query(f"ALTER NAMED COLLECTION {db} SET port=3306") + clickhouse_node.query(f"ATTACH DATABASE {db}") + check_query( + clickhouse_node, + f"/* expect: (1, 'a', 1), (2, 'b', 2), (3, 'c', 3) */ SELECT * FROM {db}.t1", + "1\ta\t1\n2\tb\t2\n3\tc\t3\n", + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + + +def create_table_as_select(clickhouse_node, mysql_node, service_name): + db = "create_table_as_select" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializeMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + mysql_node.query( + f"CREATE TABLE {db}.t1(a INT NOT NULL PRIMARY KEY) ENGINE = InnoDB" + ) + mysql_node.query(f"INSERT INTO {db}.t1 VALUES (1)") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t1\n", + ) + + mysql_node.query(f"CREATE TABLE {db}.t2(PRIMARY KEY(a)) AS SELECT * FROM {db}.t1") + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t1\nt2\n", + ) + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") mysql_node.query(f"DROP DATABASE IF EXISTS {db}") diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 21316d1a474..5096e0a03b7 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -1,17 +1,11 @@ -import os -import os.path as p import time -import pwd -import re import pymysql.cursors import pytest from helpers.cluster import ( ClickHouseCluster, ClickHouseInstance, get_docker_compose_path, - run_and_check, ) -import docker import logging from . import materialized_with_ddl @@ -52,6 +46,7 @@ def started_cluster(): cluster.start() yield cluster finally: + node_db.stop_clickhouse() # ensures that coverage report is written to disk, even if cluster.shutdown() times out. cluster.shutdown() @@ -62,8 +57,6 @@ class MySQLConnection: user="root", password="clickhouse", ip_address=None, - docker_compose=None, - project_name=cluster.project_name, ): self.user = user self.port = port @@ -86,7 +79,7 @@ class MySQLConnection: else: self.mysql_connection.ping(reconnect=True) logging.debug( - "MySQL Connection establised: {}:{}".format( + "MySQL Connection established: {}:{}".format( self.ip_address, self.port ) ) @@ -94,7 +87,7 @@ class MySQLConnection: except Exception as e: errors += [str(e)] time.sleep(1) - raise Exception("Connection not establised, {}".format(errors)) + raise Exception("Connection not established, {}".format(errors)) def query(self, execution_query): with self.alloc_connection().cursor() as cursor: @@ -118,9 +111,9 @@ class MySQLConnection: if result is not None: print(cursor.fetchall()) - def query_and_get_data(self, executio_query): + def query_and_get_data(self, execution_query): with self.alloc_connection().cursor() as cursor: - cursor.execute(executio_query) + cursor.execute(execution_query) return cursor.fetchall() def close(self): @@ -262,6 +255,12 @@ def test_materialized_database_ddl_with_empty_transaction_8_0( ) +def test_text_blob_charset(started_cluster, started_mysql_8_0, clickhouse_node): + materialized_with_ddl.text_blob_with_charset_test( + clickhouse_node, started_mysql_8_0, "mysql80" + ) + + def test_select_without_columns_5_7( started_cluster, started_mysql_5_7, clickhouse_node ): @@ -375,6 +374,12 @@ def test_utf8mb4( ): materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_5_7, "mysql57") materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_8_0, "mysql80") + materialized_with_ddl.utf8mb4_column_test( + clickhouse_node, started_mysql_8_0, "mysql80" + ) + materialized_with_ddl.utf8mb4_name_test( + clickhouse_node, started_mysql_8_0, "mysql80" + ) def test_system_parts_table(started_cluster, started_mysql_8_0, clickhouse_node): @@ -416,6 +421,12 @@ def test_materialized_with_column_comments( ) +def test_double_quoted_comment(started_cluster, started_mysql_8_0, clickhouse_node): + materialized_with_ddl.double_quoted_comment( + clickhouse_node, started_mysql_8_0, "mysql80" + ) + + def test_materialized_with_enum( started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node ): @@ -529,3 +540,9 @@ def test_named_collections(started_cluster, started_mysql_8_0, clickhouse_node): materialized_with_ddl.named_collections( clickhouse_node, started_mysql_8_0, "mysql80" ) + + +def test_create_table_as_select(started_cluster, started_mysql_8_0, clickhouse_node): + materialized_with_ddl.create_table_as_select( + clickhouse_node, started_mysql_8_0, "mysql80" + ) diff --git a/tests/integration/test_memory_profiler_min_max_borders/__init__.py b/tests/integration/test_memory_profiler_min_max_borders/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml new file mode 100644 index 00000000000..56fc5ed34ca --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml new file mode 100644 index 00000000000..5b3e17d145f --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml @@ -0,0 +1,5 @@ + + 1 + 4096 + 8192 + diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py new file mode 100644 index 00000000000..df9439bc2bb --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/test.py @@ -0,0 +1,40 @@ +from helpers.cluster import ClickHouseCluster +import pytest + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/memory_profiler.xml"], + user_configs=["configs/max_untracked_memory.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_trace_boundaries_work(started_cluster): + if node.is_built_with_sanitizer(): + pytest.skip("Disabled for sanitizers") + + node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null") + node.query("SYSTEM FLUSH LOGS") + + assert ( + node.query( + "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'" + ) + == "1\n" + ) + assert ( + node.query( + "SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" + ) + == "0\n" + ) diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index 761b5257a34..121b9b294f0 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -7,6 +7,7 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.utility import generate_values, replace_config, SafeThread from azure.storage.blob import BlobServiceClient +from test_storage_azure_blob_storage.test import azure_query SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -38,27 +39,10 @@ def cluster(): cluster.shutdown() -# Note: use this for selects and inserts and create table queries. +# Note: use azure_query for selects and inserts and create table queries. # For inserts there is no guarantee that retries will not result in duplicates. -# But it is better to retry anyway because 'Connection was closed by the server' error +# But it is better to retry anyway because connection related errors # happens in fact only for inserts because reads already have build-in retries in code. -def azure_query(node, query, try_num=3, settings={}): - for i in range(try_num): - try: - return node.query(query, settings=settings) - except Exception as ex: - retriable_errors = [ - "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response" - ] - retry = False - for error in retriable_errors: - if error in str(ex): - retry = True - logging.info(f"Try num: {i}. Having retriable error: {ex}") - break - if not retry or i == try_num - 1: - raise Exception(ex) - continue def create_table(node, table_name, **additional_settings): @@ -215,7 +199,7 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical): if attempt == 59: assert parts_count == "(1)" - time.sleep(1) + time.sleep(10) assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" assert ( diff --git a/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml index 858d77e9ea0..e11a406bcbc 100644 --- a/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml +++ b/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml @@ -28,6 +28,7 @@ 0 + 1.0 true diff --git a/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml index 504280e4bed..4f0e2db9b08 100644 --- a/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml +++ b/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml @@ -152,6 +152,7 @@ 0 + 1.0 0 diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index f754bc905bf..507f25209a4 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -783,9 +783,9 @@ def test_merge_canceled_by_s3_errors(cluster, broken_s3, node_name, storage_poli min_key = node.query("SELECT min(key) FROM test_merge_canceled_by_s3_errors") assert int(min_key) == 0, min_key - broken_s3.setup_error_at_object_upload() + broken_s3.setup_at_object_upload() broken_s3.setup_fake_multpartuploads() - broken_s3.setup_error_at_part_upload() + broken_s3.setup_at_part_upload() node.query("SYSTEM START MERGES test_merge_canceled_by_s3_errors") @@ -828,7 +828,7 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name): settings={"materialize_ttl_after_modify": 0}, ) - broken_s3.setup_error_at_object_upload(count=1, after=1) + broken_s3.setup_at_object_upload(count=1, after=1) node.query("SYSTEM START MERGES merge_canceled_by_s3_errors_when_move") @@ -855,6 +855,11 @@ def test_s3_engine_heavy_write_check_mem( memory = in_flight_memory[1] node = cluster.instances[node_name] + + # it's bad idea to test something related to memory with sanitizers + if node.is_built_with_sanitizer(): + pytest.skip("Disabled for sanitizers") + node.query("DROP TABLE IF EXISTS s3_test SYNC") node.query( "CREATE TABLE s3_test" @@ -887,7 +892,7 @@ def test_s3_engine_heavy_write_check_mem( assert int(memory_usage) < 1.2 * memory assert int(memory_usage) > 0.8 * memory - assert int(wait_inflight) > 10 * 1000 * 1000 + assert int(wait_inflight) > in_flight * 1000 * 1000 check_no_objects_after_drop(cluster, node_name=node_name) diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml new file mode 100644 index 00000000000..c58c957b596 --- /dev/null +++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml @@ -0,0 +1,5 @@ + + + 1.0 + + diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml deleted file mode 100644 index 0011583a68c..00000000000 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.xml deleted file mode 100644 index feb537ebbce..00000000000 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.xml +++ /dev/null @@ -1,18 +0,0 @@ - - 9000 - 127.0.0.1 - - - - true - none - - AcceptCertificateHandler - - - - - 500 - ./clickhouse/ - users.xml - diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py index 90dda631924..b47d741e78e 100644 --- a/tests/integration/test_merge_tree_s3_failover/test.py +++ b/tests/integration/test_merge_tree_s3_failover/test.py @@ -67,6 +67,7 @@ def cluster(): "configs/config.d/storage_conf.xml", "configs/config.d/instant_moves.xml", "configs/config.d/part_log.xml", + "configs/config.d/merge_tree.xml", ], with_minio=True, ) diff --git a/tests/integration/test_multi_access_storage_role_management/__init__.py b/tests/integration/test_multi_access_storage_role_management/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_multi_access_storage_role_management/configs/memory.xml b/tests/integration/test_multi_access_storage_role_management/configs/memory.xml new file mode 100644 index 00000000000..c2f60c8bbc1 --- /dev/null +++ b/tests/integration/test_multi_access_storage_role_management/configs/memory.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/tests/integration/test_multi_access_storage_role_management/configs/roles.xml b/tests/integration/test_multi_access_storage_role_management/configs/roles.xml new file mode 100644 index 00000000000..faaf1ee27ae --- /dev/null +++ b/tests/integration/test_multi_access_storage_role_management/configs/roles.xml @@ -0,0 +1,9 @@ + + + + + GRANT ALL ON *.* WITH GRANT OPTION + + + + diff --git a/tests/integration/test_multi_access_storage_role_management/test.py b/tests/integration/test_multi_access_storage_role_management/test.py new file mode 100644 index 00000000000..77aea411394 --- /dev/null +++ b/tests/integration/test_multi_access_storage_role_management/test.py @@ -0,0 +1,153 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", stay_alive=True, main_configs=["configs/memory.xml"] +) + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + node.query("CREATE DATABASE mydb") + node.query("CREATE TABLE mydb.table1(x UInt32) ENGINE=Log") + + node.query("CREATE USER test_user") + node.query("CREATE USER test_user2") + yield cluster + + finally: + cluster.shutdown() + + +def execute_test_for_access_type(access_type: str, system_table_name: str): + node.query(f"CREATE {access_type} test1 IN local_directory") + node.query(f"CREATE {access_type} test2 IN local_directory") + node.query(f"CREATE {access_type} test3 IN local_directory") + + node.query(f"CREATE {access_type} test4 IN memory") + node.query(f"CREATE {access_type} test5 IN memory") + node.query(f"CREATE {access_type} test6 IN memory") + + # Already exists + with pytest.raises(QueryRuntimeException): + node.query(f"CREATE {access_type} test1 IN memory") + + node.query(f"MOVE {access_type} test1 TO memory") + assert node.query( + f"SELECT storage FROM system.{system_table_name} WHERE name = 'test1'" + ) == TSV(["memory"]) + + node.query(f"MOVE {access_type} test2 TO local_directory") + assert node.query( + f"SELECT storage FROM system.{system_table_name} WHERE name = 'test2'" + ) == TSV(["local_directory"]) + + node.query(f"MOVE {access_type} test2,test3 TO memory") + assert node.query( + f"SELECT storage FROM system.{system_table_name} WHERE name = 'test2'" + ) == TSV(["memory"]) + assert node.query( + f"SELECT storage FROM system.{system_table_name} WHERE name = 'test3'" + ) == TSV(["memory"]) + + node.query(f"MOVE {access_type} test4,test5 TO local_directory") + + # Different storages + with pytest.raises(QueryRuntimeException): + node.query(f"MOVE {access_type} test4,test1 TO memory") + + # Doesn't exist + with pytest.raises(QueryRuntimeException): + node.query(f"MOVE {access_type} test7 TO local_directory") + + # Storage doesn't exist + with pytest.raises(QueryRuntimeException): + node.query(f"MOVE {access_type} test6 TO non_existing_storage") + + # Unwriteable storage + with pytest.raises(QueryRuntimeException): + node.query(f"MOVE {access_type} test6 TO users_xml") + + node.query(f"DROP {access_type} test1") + node.query(f"DROP {access_type} test2") + node.query(f"DROP {access_type} test3") + node.query(f"DROP {access_type} test4") + node.query(f"DROP {access_type} test5") + node.query(f"DROP {access_type} test6") + + +def test_roles(): + execute_test_for_access_type("ROLE", "roles") + + +def test_users(): + execute_test_for_access_type("USER", "users") + + +def test_settings_profiles(): + execute_test_for_access_type("SETTINGS PROFILE", "settings_profiles") + + +def test_quotas(): + execute_test_for_access_type("QUOTA", "quotas") + + +def test_role_from_different_storages(): + node.query("CREATE ROLE default_role") + node.query("GRANT SELECT ON system.* TO default_role") + + assert node.query("SHOW GRANTS FOR default_role") == TSV( + ["GRANT SELECT ON system.* TO default_role"] + ) + assert node.query("SHOW ROLES") == TSV(["default_role"]) + + node.query("GRANT default_role TO test_user") + + node.copy_file_to_container( + os.path.join(SCRIPT_DIR, "configs/roles.xml"), + "/etc/clickhouse-server/users.d/roles.xml", + ) + + node.restart_clickhouse() + + assert node.query("SELECT name, storage FROM system.roles") == TSV( + [["default_role", "users_xml"], ["default_role", "local_directory"]] + ) + + # Role from users.xml will have priority + assert node.query("SHOW GRANTS FOR default_role") == TSV( + ["GRANT ALL ON *.* TO default_role WITH GRANT OPTION"] + ) + + node.query("GRANT default_role TO test_user") + node.query("GRANT default_role TO test_user2") + assert node.query( + "SELECT granted_role_id FROM system.role_grants WHERE user_name = 'test_user2'" + ) == TSV( + [ + "62bedbf3-7fb1-94cb-3a35-e479693223b3" + ] # roles from users.xml have deterministic ids + ) + + node.query("DROP ROLE default_role FROM local_directory") + assert node.query( + "SELECT granted_role_id FROM system.role_grants WHERE user_name = 'test_user'" + ) == TSV(["62bedbf3-7fb1-94cb-3a35-e479693223b3"]) + + # Already exists + with pytest.raises(QueryRuntimeException): + node.query("CREATE ROLE default_role IN memory") + + node.query("CREATE ROLE other_role IN memory") + + assert node.query( + "SELECT storage FROM system.roles WHERE name = 'other_role'" + ) == TSV(["memory"]) diff --git a/tests/integration/test_multiple_disks/configs/logs_config.xml b/tests/integration/test_multiple_disks/configs/logs_config.xml index b0643c8bdad..2ee8bb55f38 100644 --- a/tests/integration/test_multiple_disks/configs/logs_config.xml +++ b/tests/integration/test_multiple_disks/configs/logs_config.xml @@ -14,8 +14,4 @@ part_log
500 - - 268435456 - true -
diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 5561d63840b..30669feb6b3 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -818,9 +818,10 @@ def test_start_stop_moves(start_cluster, name, engine): node1.query(f"SYSTEM STOP MOVES {name}") node1.query(f"SYSTEM STOP MERGES {name}") + first_part = None for i in range(5): data = [] # 5MB in total - for i in range(5): + for _ in range(5): data.append(get_random_string(1024 * 1024)) # 1MB row # jbod size is 40MB, so lets insert 5MB batch 7 times node1.query_with_retry( @@ -829,7 +830,13 @@ def test_start_stop_moves(start_cluster, name, engine): ) ) - first_part = get_oldest_part(node1, name) + # we cannot rely simply on modification time of part because it can be changed + # by different background operations so we explicitly check after the first + # part is inserted + if i == 0: + first_part = get_oldest_part(node1, name) + + assert first_part is not None used_disks = get_used_disks_for_table(node1, name) @@ -882,15 +889,12 @@ def get_paths_for_partition_from_part_log(node, table, partition_id): @pytest.mark.parametrize( - "name,engine,use_metadata_cache", + "name,engine", [ - pytest.param("altering_mt", "MergeTree()", "false", id="mt"), - pytest.param("altering_mt", "MergeTree()", "true", id="mt_use_metadata_cache"), - # ("altering_replicated_mt","ReplicatedMergeTree('/clickhouse/altering_replicated_mt', '1')",), - # SYSTEM STOP MERGES doesn't disable merges assignments + pytest.param("altering_mt", "MergeTree()", id="mt"), ], ) -def test_alter_move(start_cluster, name, engine, use_metadata_cache): +def test_alter_move(start_cluster, name, engine): try: node1.query( """ @@ -900,9 +904,9 @@ def test_alter_move(start_cluster, name, engine, use_metadata_cache): ) ENGINE = {engine} ORDER BY tuple() PARTITION BY toYYYYMM(EventDate) - SETTINGS storage_policy='jbods_with_external', use_metadata_cache={use_metadata_cache} + SETTINGS storage_policy='jbods_with_external' """.format( - name=name, engine=engine, use_metadata_cache=use_metadata_cache + name=name, engine=engine ) ) @@ -1162,152 +1166,6 @@ def produce_alter_move(node, name): pass -@pytest.mark.parametrize( - "name,engine", - [ - pytest.param("concurrently_altering_mt", "MergeTree()", id="mt"), - pytest.param( - "concurrently_altering_replicated_mt", - "ReplicatedMergeTree('/clickhouse/concurrently_altering_replicated_mt', '1')", - id="replicated", - ), - ], -) -def test_concurrent_alter_move(start_cluster, name, engine): - try: - node1.query_with_retry( - """ - CREATE TABLE IF NOT EXISTS {name} ( - EventDate Date, - number UInt64 - ) ENGINE = {engine} - ORDER BY tuple() - PARTITION BY toYYYYMM(EventDate) - SETTINGS storage_policy='jbods_with_external' - """.format( - name=name, engine=engine - ) - ) - - values = list({random.randint(1, 1000000) for _ in range(0, 1000)}) - - def insert(num): - for i in range(num): - day = random.randint(11, 30) - value = values.pop() - month = "0" + str(random.choice([3, 4])) - node1.query_with_retry( - "INSERT INTO {} VALUES(toDate('2019-{m}-{d}'), {v})".format( - name, m=month, d=day, v=value - ) - ) - - def alter_move(num): - for i in range(num): - produce_alter_move(node1, name) - - def alter_update(num): - for i in range(num): - node1.query( - "ALTER TABLE {} UPDATE number = number + 1 WHERE 1".format(name) - ) - - def optimize_table(num): - for i in range(num): - node1.query_with_retry("OPTIMIZE TABLE {} FINAL".format(name)) - - p = Pool(15) - tasks = [] - for i in range(5): - tasks.append(p.apply_async(insert, (100,))) - tasks.append(p.apply_async(alter_move, (100,))) - tasks.append(p.apply_async(alter_update, (100,))) - tasks.append(p.apply_async(optimize_table, (100,))) - - for task in tasks: - task.get(timeout=240) - - assert node1.query("SELECT 1") == "1\n" - assert node1.query("SELECT COUNT() FROM {}".format(name)) == "500\n" - finally: - node1.query(f"DROP TABLE IF EXISTS {name} SYNC") - - -@pytest.mark.parametrize( - "name,engine", - [ - pytest.param("concurrently_dropping_mt", "MergeTree()", id="mt"), - pytest.param( - "concurrently_dropping_replicated_mt", - "ReplicatedMergeTree('/clickhouse/concurrently_dropping_replicated_mt', '1')", - id="replicated", - ), - ], -) -def test_concurrent_alter_move_and_drop(start_cluster, name, engine): - try: - node1.query( - """ - CREATE TABLE IF NOT EXISTS {name} ( - EventDate Date, - number UInt64 - ) ENGINE = {engine} - ORDER BY tuple() - PARTITION BY toYYYYMM(EventDate) - SETTINGS storage_policy='jbods_with_external' - """.format( - name=name, engine=engine - ) - ) - - values = list({random.randint(1, 1000000) for _ in range(0, 1000)}) - - def insert(num): - for i in range(num): - day = random.randint(11, 30) - value = values.pop() - month = "0" + str(random.choice([3, 4])) - node1.query_with_retry( - "INSERT INTO {} VALUES(toDate('2019-{m}-{d}'), {v})".format( - name, m=month, d=day, v=value - ) - ) - - def alter_move(num): - for i in range(num): - produce_alter_move(node1, name) - - def alter_drop(num): - for i in range(num): - partition = random.choice([201903, 201904]) - op = random.choice(["drop", "detach"]) - try: - node1.query( - "ALTER TABLE {} {} PARTITION {}".format(name, op, partition) - ) - except QueryRuntimeException as e: - if "Code: 650" in e.stderr: - pass - else: - raise e - - insert(20) - p = Pool(15) - tasks = [] - for i in range(5): - tasks.append(p.apply_async(insert, (20,))) - tasks.append(p.apply_async(alter_move, (20,))) - tasks.append(p.apply_async(alter_drop, (20,))) - - for task in tasks: - task.get(timeout=120) - - assert node1.query("SELECT 1") == "1\n" - - finally: - node1.query_with_retry(f"DROP TABLE IF EXISTS {name} SYNC") - - @pytest.mark.parametrize( "name,engine", [ @@ -1711,7 +1569,7 @@ def test_freeze(start_cluster): ) ENGINE = MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(d) - SETTINGS storage_policy='small_jbod_with_external', compress_marks=false, compress_primary_key=false + SETTINGS storage_policy='small_jbod_with_external', compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1 """ ) diff --git a/tests/integration/test_mutations_with_merge_tree/test.py b/tests/integration/test_mutations_with_merge_tree/test.py index e7d7abc8c7c..0b4a750b38e 100644 --- a/tests/integration/test_mutations_with_merge_tree/test.py +++ b/tests/integration/test_mutations_with_merge_tree/test.py @@ -27,111 +27,6 @@ def started_cluster(): cluster.shutdown() -@pytest.mark.skip(reason="Skipping mutations in partition does not work") -def test_mutations_in_partition_background(started_cluster): - try: - numbers = 100 - - name = "test_mutations_in_partition" - instance_test_mutations.query( - f"""CREATE TABLE {name} (date Date, a UInt64, b String) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY a""" - ) - instance_test_mutations.query( - f"""INSERT INTO {name} SELECT '2019-07-29' AS date, number, toString(number) FROM numbers({numbers})""" - ) - - for i in range(0, numbers, 3): - instance_test_mutations.query( - f"""ALTER TABLE {name} DELETE IN PARTITION {i} WHERE a = {i}""" - ) - - for i in range(1, numbers, 3): - instance_test_mutations.query( - f"""ALTER TABLE {name} UPDATE b = 'changed' IN PARTITION {i} WHERE a = {i} """ - ) - - def count_and_changed(): - return instance_test_mutations.query( - f"SELECT count(), countIf(b == 'changed') FROM {name} SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() - - all_done = False - for wait_times_for_mutation in range( - 100 - ): # wait for replication 80 seconds max - time.sleep(0.8) - - if count_and_changed() == ["66,33"]: - all_done = True - break - - print( - instance_test_mutations.query( - f"SELECT mutation_id, command, parts_to_do, is_done, latest_failed_part, latest_fail_reason, parts_to_do_names FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT TSVWithNames" - ) - ) - - assert (count_and_changed(), all_done) == (["66,33"], True) - assert instance_test_mutations.query( - f"SELECT count(), sum(is_done) FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() == ["67,67"] - - finally: - instance_test_mutations.query(f"""DROP TABLE {name}""") - - -@pytest.mark.skip(reason="Skipping mutations in partition does not work") -@pytest.mark.parametrize("sync", [("last",), ("all",)]) -def test_mutations_in_partition_sync(started_cluster, sync): - try: - numbers = 10 - - name = "test_mutations_in_partition_sync" - instance_test_mutations.query( - f"""CREATE TABLE {name} (date Date, a UInt64, b String) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY a""" - ) - instance_test_mutations.query( - f"""INSERT INTO {name} SELECT '2019-07-29' AS date, number, toString(number) FROM numbers({numbers})""" - ) - - for i in range(0, numbers, 3): - instance_test_mutations.query( - f"""ALTER TABLE {name} DELETE IN PARTITION {i} WHERE a = {i}""" - + (" SETTINGS mutations_sync = 1" if sync == "all" else "") - ) - - for reverse_index, i in reversed( - list(enumerate(reversed(range(1, numbers, 3)))) - ): - instance_test_mutations.query( - f"""ALTER TABLE {name} UPDATE b = 'changed' IN PARTITION {i} WHERE a = {i}""" - + ( - " SETTINGS mutations_sync = 1" - if not reverse_index or sync == "all" - else "" - ) - ) - - def count_and_changed(): - return instance_test_mutations.query( - f"SELECT count(), countIf(b == 'changed') FROM {name} SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() - - print( - instance_test_mutations.query( - f"SELECT mutation_id, command, parts_to_do, is_done, latest_failed_part, latest_fail_reason FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT TSVWithNames" - ) - ) - - assert count_and_changed() == ["6,3"] - assert instance_test_mutations.query( - f"SELECT count(), sum(is_done) FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() == ["7,7"] - - finally: - instance_test_mutations.query(f"""DROP TABLE {name}""") - - def test_mutations_with_merge_background_task(started_cluster): instance_test_mutations.query( """SYSTEM STOP MERGES test_mutations_with_ast_elements""" @@ -190,92 +85,3 @@ def test_mutations_with_truncate_table(started_cluster): ).rstrip() == "0" ) - - -@pytest.mark.skip(reason="Skipping mutations in partition does not work") -def test_mutations_will_not_hang_for_non_existing_parts_sync(started_cluster): - try: - numbers = 100 - - name = "test_mutations_will_not_hang_for_non_existing_parts_sync" - instance_test_mutations.query( - f"""CREATE TABLE {name} (date Date, a UInt64, b String) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY a""" - ) - instance_test_mutations.query( - f"""INSERT INTO {name} SELECT '2019-07-29' AS date, number, toString(number) FROM numbers({numbers})""" - ) - - for i in range(0, numbers, 3): - instance_test_mutations.query( - f"""ALTER TABLE {name} DELETE IN PARTITION {i+1000} WHERE a = {i} SETTINGS mutations_sync = 1""" - ) - - def count(): - return instance_test_mutations.query( - f"SELECT count() FROM {name} SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() - - print( - instance_test_mutations.query( - f"SELECT mutation_id, command, parts_to_do, is_done, latest_failed_part, latest_fail_reason, parts_to_do_names FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT TSVWithNames" - ) - ) - - assert count() == [f"{numbers}"] - assert instance_test_mutations.query( - f"SELECT count(), sum(is_done) FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() == [f"34,34"] - - finally: - instance_test_mutations.query(f"""DROP TABLE {name}""") - - -@pytest.mark.skip(reason="Skipping mutations in partition does not work") -def test_mutations_will_not_hang_for_non_existing_parts_async(started_cluster): - try: - numbers = 100 - - name = "test_mutations_will_not_hang_for_non_existing_parts_async" - instance_test_mutations.query( - f"""CREATE TABLE {name} (date Date, a UInt64, b String) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY a""" - ) - instance_test_mutations.query( - f"""INSERT INTO {name} SELECT '2019-07-29' AS date, number, toString(number) FROM numbers({numbers})""" - ) - - for i in range(0, numbers, 3): - instance_test_mutations.query( - f"""ALTER TABLE {name} DELETE IN PARTITION {i+1000} WHERE a = {i}""" - ) - - def count(): - return instance_test_mutations.query( - f"SELECT count() FROM {name} SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() - - def count_and_sum_is_done(): - return instance_test_mutations.query( - f"SELECT count(), sum(is_done) FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT CSV" - ).splitlines() - - all_done = False - for wait_times_for_mutation in range( - 100 - ): # wait for replication 80 seconds max - time.sleep(0.8) - - if count_and_sum_is_done() == ["34,34"]: - all_done = True - break - - print( - instance_test_mutations.query( - f"SELECT mutation_id, command, parts_to_do, is_done, latest_failed_part, latest_fail_reason, parts_to_do_names FROM system.mutations WHERE table = '{name}' SETTINGS force_index_by_date = 0, force_primary_key = 0 FORMAT TSVWithNames" - ) - ) - - assert count() == [f"{numbers}"] - assert count_and_sum_is_done() == ["34,34"] - - finally: - instance_test_mutations.query(f"""DROP TABLE {name}""") diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 18dde5307fd..00b5eb9e8aa 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -1021,3 +1021,24 @@ def test_memory_leak(started_cluster): clickhouse_node.query("DROP DATABASE test_database") clickhouse_node.restart_clickhouse() + + +def test_password_leak(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + ) + ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + mysql_node.query( + "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" + ) + + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1" + ) + assert "clickhouse" not in clickhouse_node.query( + "SHOW CREATE test_database.test_table" + ) diff --git a/tests/integration/test_mysql_protocol/golang.reference b/tests/integration/test_mysql_protocol/golang.reference index 4069b2a086a..7680c8c74dc 100644 --- a/tests/integration/test_mysql_protocol/golang.reference +++ b/tests/integration/test_mysql_protocol/golang.reference @@ -14,6 +14,7 @@ a UNSIGNED TINYINT Result: tables 1 tables 1 +tables 1 Columns: a b diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py index 5e03c4883b4..f2bffe69495 100644 --- a/tests/integration/test_mysql_protocol/test.py +++ b/tests/integration/test_mysql_protocol/test.py @@ -207,7 +207,7 @@ def test_mysql_client(started_cluster): expected_msg = "\n".join( [ "mysql: [Warning] Using a password on the command line interface can be insecure.", - "ERROR 81 (00000) at line 1: Code: 81. DB::Exception: Database system2 doesn't exist", + "ERROR 81 (00000) at line 1: Code: 81. DB::Exception: Database system2 does not exist", ] ) assert stderr[: len(expected_msg)].decode() == expected_msg @@ -621,7 +621,7 @@ def test_python_client(started_cluster): client.select_db("system2") assert exc_info.value.args[1].startswith( - "Code: 81. DB::Exception: Database system2 doesn't exist" + "Code: 81. DB::Exception: Database system2 does not exist" ), exc_info.value.args[1] cursor = client.cursor(pymysql.cursors.DictCursor) @@ -646,7 +646,7 @@ def test_golang_client(started_cluster, golang_container): ) assert code == 1 - assert stderr.decode() == "Error 81: Database abc doesn't exist\n" + assert stderr.decode() == "Error 81: Database abc does not exist\n" code, (stdout, stderr) = golang_container.exec_run( "./main --host {host} --port {port} --user default --password 123 --database " @@ -679,7 +679,7 @@ def test_php_client(started_cluster, php_container): demux=True, ) assert code == 0 - assert stdout.decode() == "tables\ntables\n" + assert stdout.decode() == "tables\ntables\ntables\n" code, (stdout, stderr) = php_container.exec_run( "php -f test_ssl.php {host} {port} default 123".format( @@ -688,7 +688,7 @@ def test_php_client(started_cluster, php_container): demux=True, ) assert code == 0 - assert stdout.decode() == "tables\ntables\n" + assert stdout.decode() == "tables\ntables\ntables\n" code, (stdout, stderr) = php_container.exec_run( "php -f test.php {host} {port} user_with_double_sha1 abacaba".format( @@ -697,7 +697,7 @@ def test_php_client(started_cluster, php_container): demux=True, ) assert code == 0 - assert stdout.decode() == "tables\ntables\n" + assert stdout.decode() == "tables\ntables\ntables\n" code, (stdout, stderr) = php_container.exec_run( "php -f test_ssl.php {host} {port} user_with_double_sha1 abacaba".format( @@ -706,7 +706,7 @@ def test_php_client(started_cluster, php_container): demux=True, ) assert code == 0 - assert stdout.decode() == "tables\ntables\n" + assert stdout.decode() == "tables\ntables\ntables\n" def test_mysqljs_client(started_cluster, nodejs_container): diff --git a/tests/integration/test_named_collections/test.py b/tests/integration/test_named_collections/test.py index 5f5657fad54..cbb8c94c701 100644 --- a/tests/integration/test_named_collections/test.py +++ b/tests/integration/test_named_collections/test.py @@ -344,7 +344,7 @@ def test_granular_access_create_alter_drop_query(cluster): ) assert ( - "DB::Exception: kek: Not enough privileges. To execute this query it's necessary to have grant CREATE NAMED COLLECTION" + "DB::Exception: kek: Not enough privileges. To execute this query, it's necessary to have the grant CREATE NAMED COLLECTION" in node.query_and_get_error( "CREATE NAMED COLLECTION collection2 AS key1=1, key2='value2'", user="kek" ) @@ -370,7 +370,7 @@ def test_granular_access_create_alter_drop_query(cluster): ) assert ( - "DB::Exception: kek: Not enough privileges. To execute this query it's necessary to have grant ALTER NAMED COLLECTION" + "DB::Exception: kek: Not enough privileges. To execute this query, it's necessary to have the grant ALTER NAMED COLLECTION" in node.query_and_get_error( "ALTER NAMED COLLECTION collection2 SET key1=2", user="kek" ) @@ -385,14 +385,14 @@ def test_granular_access_create_alter_drop_query(cluster): ) node.query("REVOKE alter named collection ON collection2 FROM kek") assert ( - "DB::Exception: kek: Not enough privileges. To execute this query it's necessary to have grant ALTER NAMED COLLECTION" + "DB::Exception: kek: Not enough privileges. To execute this query, it's necessary to have the grant ALTER NAMED COLLECTION" in node.query_and_get_error( "ALTER NAMED COLLECTION collection2 SET key1=3", user="kek" ) ) assert ( - "DB::Exception: kek: Not enough privileges. To execute this query it's necessary to have grant DROP NAMED COLLECTION" + "DB::Exception: kek: Not enough privileges. To execute this query, it's necessary to have the grant DROP NAMED COLLECTION" in node.query_and_get_error("DROP NAMED COLLECTION collection2", user="kek") ) node.query("GRANT drop named collection ON collection2 TO kek") diff --git a/tests/integration/test_named_collections_if_exists_on_cluster/__init__.py b/tests/integration/test_named_collections_if_exists_on_cluster/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_named_collections_if_exists_on_cluster/configs/config.d/cluster.xml b/tests/integration/test_named_collections_if_exists_on_cluster/configs/config.d/cluster.xml new file mode 100644 index 00000000000..95336350c1e --- /dev/null +++ b/tests/integration/test_named_collections_if_exists_on_cluster/configs/config.d/cluster.xml @@ -0,0 +1,22 @@ + + + + + true + + clickhouse1 + 9000 + + + clickhouse2 + 9000 + + + clickhouse3 + 9000 + + + true + + + diff --git a/tests/integration/test_named_collections_if_exists_on_cluster/configs/users.d/default.xml b/tests/integration/test_named_collections_if_exists_on_cluster/configs/users.d/default.xml new file mode 100644 index 00000000000..15da914f666 --- /dev/null +++ b/tests/integration/test_named_collections_if_exists_on_cluster/configs/users.d/default.xml @@ -0,0 +1,12 @@ + + + + + default + default + 1 + 1 + 1 + + + diff --git a/tests/integration/test_named_collections_if_exists_on_cluster/test.py b/tests/integration/test_named_collections_if_exists_on_cluster/test.py new file mode 100644 index 00000000000..5f5be9156b9 --- /dev/null +++ b/tests/integration/test_named_collections_if_exists_on_cluster/test.py @@ -0,0 +1,148 @@ +""" +Test cases: + +--- execute on the first node +create named collection foobar as a=1, b=2; +create named collection if not exists foobar on cluster '{cluster}' as a=1, b=2, c=3; +create named collection collection_present_on_first_node as a=1, b=2, s='string', x=0, y=-1; + +--- execute on any other node +alter named collection foobar on cluster '{cluster}' set a=2, c=3; +alter named collection foobar on cluster '{cluster}' delete b; +alter named collection foobar on cluster '{cluster}' set a=3 delete c; +alter named collection if exists collection_absent_ewerywhere on cluster '{cluster}' delete b; +alter named collection if exists collection_present_on_first_node on cluster '{cluster}' delete b; + +--- execute on every node +select * from system.named_collections; + +--- execute on any node +drop named collection foobar on cluster '{cluster}'; +drop named collection if exists collection_absent_ewerywhere on cluster '{cluster}'; +drop named collection if exists collection_present_on_first_node on cluster '{cluster}'; + +--- execute on every node +select * from system.named_collections; +""" + +import logging +from json import dumps, loads +from functools import partial +import pytest +from helpers.cluster import ClickHouseCluster + +dumps = partial(dumps, ensure_ascii=False) + +NODE01, NODE02, NODE03 = "clickhouse1", "clickhouse2", "clickhouse3" + +CHECK_STRING_VALUE = "Some ~`$tr!ng-_+=123@#%^&&()|?[]{}<🤡>.,\t\n:;" + +STMT_CREATE = "CREATE NAMED COLLECTION" +STMT_ALTER = "ALTER NAMED COLLECTION" +STMT_DROP = "DROP NAMED COLLECTION" + +SYSTEM_TABLE = "system.named_collections" + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + common_kwargs = dict( + main_configs=[ + "configs/config.d/cluster.xml", + ], + user_configs=[ + "configs/users.d/default.xml", + ], + with_zookeeper=True, + stay_alive=True, + ) + for name in [NODE01, NODE02, NODE03]: + cluster.add_instance(name, **common_kwargs) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def test_create_alter_drop_on_cluster(cluster): + """ + Executes the set of queries and checks the final named collections state. + """ + q_count_collections = f"select count() from {SYSTEM_TABLE}" + + def check_collections_empty(): + for name, node in list(cluster.instances.items()): + assert ( + "0" == node.query(q_count_collections).strip() + ), f"{SYSTEM_TABLE} is not empty on {name}" + + foobar_final_state = {"name": "foobar", "collection": {"a": "3"}} + collection_present_on_first_node_final_state = { + "name": "collection_present_on_first_node", + "collection": {"a": "1", "s": CHECK_STRING_VALUE, "x": "0", "y": "-1"}, + } + expected_state = { + NODE01: [foobar_final_state, collection_present_on_first_node_final_state], + NODE02: [foobar_final_state], + NODE03: [foobar_final_state], + } + + q_get_collections = f"select * from {SYSTEM_TABLE} order by name desc format JSON" + + def check_state(): + for name, node in list(cluster.instances.items()): + result = loads(node.query(q_get_collections))["data"] + logging.debug("%s ?= %s", dumps(result), dumps(expected_state[name])) + assert ( + expected_state[name] == result + ), f"invalid {SYSTEM_TABLE} content on {name}: {result}" + + check_collections_empty() + + # create executed on the first node + node = cluster.instances[NODE01] + node.query(f"{STMT_CREATE} foobar AS a=1, b=2") + node.query( + f"{STMT_CREATE} IF NOT EXISTS foobar ON CLUSTER 'cluster' AS a=1, b=2, c=3" + ) + node.query( + f"{STMT_CREATE} collection_present_on_first_node AS a=1, b=2, s='{CHECK_STRING_VALUE}', x=0, y=-1" + ) + + # alter executed on the second node + node = cluster.instances[NODE02] + node.query(f"{STMT_ALTER} foobar ON CLUSTER 'cluster' SET a=2, c=3") + node.query(f"{STMT_ALTER} foobar ON CLUSTER 'cluster' DELETE b") + node.query(f"{STMT_ALTER} foobar ON CLUSTER 'cluster' SET a=3 DELETE c") + node.query( + f"{STMT_ALTER} IF EXISTS collection_absent_ewerywhere ON CLUSTER 'cluster' DELETE b" + ) + node.query( + f"{STMT_ALTER} IF EXISTS collection_present_on_first_node ON CLUSTER 'cluster' DELETE b" + ) + + check_state() + for node in list(cluster.instances.values()): + node.restart_clickhouse() + check_state() + + # drop executed on the third node + node = cluster.instances[NODE03] + node.query(f"{STMT_DROP} foobar ON CLUSTER 'cluster'") + node.query( + f"{STMT_DROP} IF EXISTS collection_absent_ewerywhere ON CLUSTER 'cluster'" + ) + node.query( + f"{STMT_DROP} IF EXISTS collection_present_on_first_node ON CLUSTER 'cluster'" + ) + + check_collections_empty() + for node in list(cluster.instances.values()): + node.restart_clickhouse() + check_collections_empty() diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 14f5de17870..e8b3ba3fcf3 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -582,75 +582,83 @@ def test_sqlite_odbc_cached_dictionary(started_cluster): def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster): skip_test_msan(node1) - conn = get_postgres_conn(started_cluster) - cursor = conn.cursor() - cursor.execute( - "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')" - ) - node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed") - node1.exec_in_container( - ["ss", "-K", "dport", "postgresql"], privileged=True, user="root" - ) - node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed") - assert_eq_with_retry( - node1, - "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(1))", - "hello", - ) - assert_eq_with_retry( - node1, - "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(2))", - "world", - ) - cursor.execute("truncate table clickhouse.test_table") + try: + conn = get_postgres_conn(started_cluster) + cursor = conn.cursor() + cursor.execute( + "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')" + ) + node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed") + node1.exec_in_container( + ["ss", "-K", "dport", "postgresql"], privileged=True, user="root" + ) + node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed") + assert_eq_with_retry( + node1, + "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(1))", + "hello", + ) + assert_eq_with_retry( + node1, + "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(2))", + "world", + ) + finally: + cursor.execute("truncate table clickhouse.test_table") def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): skip_test_msan(node1) - conn = get_postgres_conn(started_cluster) - cursor = conn.cursor() - cursor.execute("insert into clickhouse.test_table values(3, 3, 'xxx')") - for i in range(100): - try: - node1.query("system reload dictionary postgres_odbc_hashed", timeout=15) - except Exception as ex: - assert False, "Exception occured -- odbc-bridge hangs: " + str(ex) + try: + conn = get_postgres_conn(started_cluster) + cursor = conn.cursor() + cursor.execute("insert into clickhouse.test_table values(3, 3, 'xxx')") + for i in range(100): + try: + node1.query("system reload dictionary postgres_odbc_hashed", timeout=15) + except Exception as ex: + assert False, "Exception occured -- odbc-bridge hangs: " + str(ex) - assert_eq_with_retry( - node1, - "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(3))", - "xxx", - ) - cursor.execute("truncate table clickhouse.test_table") + assert_eq_with_retry( + node1, + "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(3))", + "xxx", + ) + finally: + cursor.execute("truncate table clickhouse.test_table") def test_no_connection_pooling(started_cluster): skip_test_msan(node1) - conn = get_postgres_conn(started_cluster) - cursor = conn.cursor() - cursor.execute( - "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')" - ) - node1.exec_in_container(["ss", "-K", "dport", "5432"], privileged=True, user="root") - node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_nopool") - assert_eq_with_retry( - node1, - "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(1))", - "hello", - ) - assert_eq_with_retry( - node1, - "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(2))", - "world", - ) + try: + conn = get_postgres_conn(started_cluster) + cursor = conn.cursor() + cursor.execute( + "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')" + ) + node1.exec_in_container( + ["ss", "-K", "dport", "5432"], privileged=True, user="root" + ) + node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_nopool") + assert_eq_with_retry( + node1, + "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(1))", + "hello", + ) + assert_eq_with_retry( + node1, + "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(2))", + "world", + ) - # No open connections should be left because we don't use connection pooling. - assert "" == node1.exec_in_container( - ["ss", "-H", "dport", "5432"], privileged=True, user="root" - ) - cursor.execute("truncate table clickhouse.test_table") + # No open connections should be left because we don't use connection pooling. + assert "" == node1.exec_in_container( + ["ss", "-H", "dport", "5432"], privileged=True, user="root" + ) + finally: + cursor.execute("truncate table clickhouse.test_table") def test_postgres_insert(started_cluster): @@ -662,112 +670,119 @@ def test_postgres_insert(started_cluster): # postgres .yml file). This is needed to check parsing, validation and # reconstruction of connection string. - node1.query( - "create table pg_insert (id UInt64, column1 UInt8, column2 String) engine=ODBC('DSN=postgresql_odbc;Servername=postgre-sql.local', 'clickhouse', 'test_table')" - ) - node1.query("insert into pg_insert values (1, 1, 'hello'), (2, 2, 'world')") - assert node1.query("select * from pg_insert") == "1\t1\thello\n2\t2\tworld\n" - node1.query( - "insert into table function odbc('DSN=postgresql_odbc', 'clickhouse', 'test_table') format CSV 3,3,test" - ) - node1.query( - "insert into table function odbc('DSN=postgresql_odbc;Servername=postgre-sql.local', 'clickhouse', 'test_table')" - " select number, number, 's' || toString(number) from numbers (4, 7)" - ) - assert ( - node1.query("select sum(column1), count(column1) from pg_insert") == "55\t10\n" - ) - assert ( + try: node1.query( - "select sum(n), count(n) from (select (*,).1 as n from (select * from odbc('DSN=postgresql_odbc', 'clickhouse', 'test_table')))" + "create table pg_insert (id UInt64, column1 UInt8, column2 String) engine=ODBC('DSN=postgresql_odbc;Servername=postgre-sql.local', 'clickhouse', 'test_table')" ) - == "55\t10\n" - ) - node1.query("DROP TABLE pg_insert") - conn.cursor().execute("truncate table clickhouse.test_table") + node1.query("insert into pg_insert values (1, 1, 'hello'), (2, 2, 'world')") + assert node1.query("select * from pg_insert") == "1\t1\thello\n2\t2\tworld\n" + node1.query( + "insert into table function odbc('DSN=postgresql_odbc', 'clickhouse', 'test_table') format CSV 3,3,test" + ) + node1.query( + "insert into table function odbc('DSN=postgresql_odbc;Servername=postgre-sql.local', 'clickhouse', 'test_table')" + " select number, number, 's' || toString(number) from numbers (4, 7)" + ) + assert ( + node1.query("select sum(column1), count(column1) from pg_insert") + == "55\t10\n" + ) + assert ( + node1.query( + "select sum(n), count(n) from (select (*,).1 as n from (select * from odbc('DSN=postgresql_odbc', 'clickhouse', 'test_table')))" + ) + == "55\t10\n" + ) + finally: + node1.query("DROP TABLE IF EXISTS pg_insert") + conn.cursor().execute("truncate table clickhouse.test_table") def test_odbc_postgres_date_data_type(started_cluster): skip_test_msan(node1) - conn = get_postgres_conn(started_cluster) - cursor = conn.cursor() - cursor.execute( - "CREATE TABLE clickhouse.test_date (id integer, column1 integer, column2 date)" - ) + try: + conn = get_postgres_conn(started_cluster) + cursor = conn.cursor() + cursor.execute( + "CREATE TABLE clickhouse.test_date (id integer, column1 integer, column2 date)" + ) - cursor.execute("INSERT INTO clickhouse.test_date VALUES (1, 1, '2020-12-01')") - cursor.execute("INSERT INTO clickhouse.test_date VALUES (2, 2, '2020-12-02')") - cursor.execute("INSERT INTO clickhouse.test_date VALUES (3, 3, '2020-12-03')") - conn.commit() + cursor.execute("INSERT INTO clickhouse.test_date VALUES (1, 1, '2020-12-01')") + cursor.execute("INSERT INTO clickhouse.test_date VALUES (2, 2, '2020-12-02')") + cursor.execute("INSERT INTO clickhouse.test_date VALUES (3, 3, '2020-12-03')") + conn.commit() - node1.query( - """ - CREATE TABLE test_date (id UInt64, column1 UInt64, column2 Date) - ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_date')""" - ) + node1.query( + """ + CREATE TABLE test_date (id UInt64, column1 UInt64, column2 Date) + ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_date')""" + ) - expected = "1\t1\t2020-12-01\n2\t2\t2020-12-02\n3\t3\t2020-12-03\n" - result = node1.query("SELECT * FROM test_date") - assert result == expected - cursor.execute("DROP TABLE clickhouse.test_date") - node1.query("DROP TABLE test_date") + expected = "1\t1\t2020-12-01\n2\t2\t2020-12-02\n3\t3\t2020-12-03\n" + result = node1.query("SELECT * FROM test_date") + assert result == expected + finally: + cursor.execute("DROP TABLE clickhouse.test_date") + node1.query("DROP TABLE IF EXISTS test_date") def test_odbc_postgres_conversions(started_cluster): skip_test_msan(node1) - conn = get_postgres_conn(started_cluster) - cursor = conn.cursor() + try: + conn = get_postgres_conn(started_cluster) + cursor = conn.cursor() - cursor.execute( - """CREATE TABLE clickhouse.test_types ( - a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial, - h timestamp)""" - ) + cursor.execute( + """CREATE TABLE clickhouse.test_types ( + a smallint, b integer, c bigint, d real, e double precision, f serial, g bigserial, + h timestamp)""" + ) - node1.query( - """ - INSERT INTO TABLE FUNCTION - odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types') - VALUES (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12')""" - ) + node1.query( + """ + INSERT INTO TABLE FUNCTION + odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types') + VALUES (-32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12')""" + ) - result = node1.query( - """ - SELECT a, b, c, d, e, f, g, h - FROM odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types') - """ - ) + result = node1.query( + """ + SELECT a, b, c, d, e, f, g, h + FROM odbc('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types') + """ + ) - assert ( - result - == "-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12\n" - ) - cursor.execute("DROP TABLE IF EXISTS clickhouse.test_types") + assert ( + result + == "-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12\n" + ) + cursor.execute("DROP TABLE IF EXISTS clickhouse.test_types") - cursor.execute( - """CREATE TABLE clickhouse.test_types (column1 Timestamp, column2 Numeric)""" - ) + cursor.execute( + """CREATE TABLE clickhouse.test_types (column1 Timestamp, column2 Numeric)""" + ) - node1.query( - """ - CREATE TABLE test_types (column1 DateTime64, column2 Decimal(5, 1)) - ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types')""" - ) + node1.query( + """ + CREATE TABLE test_types (column1 DateTime64, column2 Decimal(5, 1)) + ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_types')""" + ) - node1.query( - """INSERT INTO test_types - SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Etc/UTC'), toDecimal32(1.1, 1)""" - ) + node1.query( + """INSERT INTO test_types + SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Etc/UTC'), toDecimal32(1.1, 1)""" + ) - expected = node1.query( - "SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Etc/UTC'), toDecimal32(1.1, 1)" - ) - result = node1.query("SELECT * FROM test_types") - cursor.execute("DROP TABLE clickhouse.test_types") - node1.query("DROP TABLE test_types") - assert result == expected + expected = node1.query( + "SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Etc/UTC'), toDecimal32(1.1, 1)" + ) + result = node1.query("SELECT * FROM test_types") + assert result == expected + finally: + cursor.execute("DROP TABLE IF EXISTS clickhouse.test_types") + node1.query("DROP TABLE IF EXISTS test_types") def test_odbc_cyrillic_with_varchar(started_cluster): diff --git a/tests/integration/test_old_versions/test.py b/tests/integration/test_old_versions/test.py index beef294b792..aff07c53114 100644 --- a/tests/integration/test_old_versions/test.py +++ b/tests/integration/test_old_versions/test.py @@ -10,6 +10,7 @@ node18_14 = cluster.add_instance( tag="18.14.19", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) node19_1 = cluster.add_instance( "node19_1", @@ -17,6 +18,7 @@ node19_1 = cluster.add_instance( tag="19.1.16", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) node19_4 = cluster.add_instance( "node19_4", @@ -24,6 +26,7 @@ node19_4 = cluster.add_instance( tag="19.4.5.35", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) node19_8 = cluster.add_instance( "node19_8", @@ -31,6 +34,7 @@ node19_8 = cluster.add_instance( tag="19.8.3.8", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) node19_11 = cluster.add_instance( "node19_11", @@ -38,6 +42,7 @@ node19_11 = cluster.add_instance( tag="19.11.13.74", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) node19_13 = cluster.add_instance( "node19_13", @@ -45,6 +50,7 @@ node19_13 = cluster.add_instance( tag="19.13.7.57", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) node19_16 = cluster.add_instance( "node19_16", @@ -52,6 +58,7 @@ node19_16 = cluster.add_instance( tag="19.16.2.2", with_installed_binary=True, main_configs=["configs/config.d/test_cluster.xml"], + allow_analyzer=False, ) old_nodes = [node18_14, node19_1, node19_4, node19_8, node19_11, node19_13, node19_16] new_node = cluster.add_instance("node_new") diff --git a/tests/integration/test_on_cluster_timeouts/configs/users_config.xml b/tests/integration/test_on_cluster_timeouts/configs/users_config.xml index 76f01279ce4..85bd9b7215a 100644 --- a/tests/integration/test_on_cluster_timeouts/configs/users_config.xml +++ b/tests/integration/test_on_cluster_timeouts/configs/users_config.xml @@ -2,6 +2,7 @@ 2 + 0 diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py index 93f03f4420e..2517b2d1ae6 100644 --- a/tests/integration/test_partition/test.py +++ b/tests/integration/test_partition/test.py @@ -38,7 +38,7 @@ def partition_table_simple(started_cluster): q( "CREATE TABLE test.partition_simple (date MATERIALIZED toDate(0), x UInt64, sample_key MATERIALIZED intHash64(x)) " "ENGINE=MergeTree PARTITION BY date SAMPLE BY sample_key ORDER BY (date,x,sample_key) " - "SETTINGS index_granularity=8192, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false" + "SETTINGS index_granularity=8192, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1" ) q("INSERT INTO test.partition_simple ( x ) VALUES ( now() )") q("INSERT INTO test.partition_simple ( x ) VALUES ( now()+1 )") @@ -150,7 +150,7 @@ def partition_table_complex(started_cluster): q("DROP TABLE IF EXISTS test.partition_complex") q( "CREATE TABLE test.partition_complex (p Date, k Int8, v1 Int8 MATERIALIZED k + 1) " - "ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false" + "ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1" ) q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(31), 1)") q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(1), 2)") @@ -188,7 +188,7 @@ def test_partition_complex(partition_table_complex): def cannot_attach_active_part_table(started_cluster): q("DROP TABLE IF EXISTS test.attach_active") q( - "CREATE TABLE test.attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false" + "CREATE TABLE test.attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1" ) q("INSERT INTO test.attach_active SELECT number FROM system.numbers LIMIT 16") @@ -217,7 +217,7 @@ def attach_check_all_parts_table(started_cluster): q("DROP TABLE IF EXISTS test.attach_partition") q( "CREATE TABLE test.attach_partition (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n " - "SETTINGS compress_marks=false, compress_primary_key=false, old_parts_lifetime=0" + "SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1, old_parts_lifetime=0" ) q( "INSERT INTO test.attach_partition SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8" @@ -299,7 +299,7 @@ def drop_detached_parts_table(started_cluster): q("SYSTEM STOP MERGES") q("DROP TABLE IF EXISTS test.drop_detached") q( - "CREATE TABLE test.drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false" + "CREATE TABLE test.drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1" ) q( "INSERT INTO test.drop_detached SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8" @@ -370,13 +370,13 @@ def test_drop_detached_parts(drop_detached_parts_table): def test_system_detached_parts(drop_detached_parts_table): q( - "create table sdp_0 (n int, x int) engine=MergeTree order by n SETTINGS compress_marks=false, compress_primary_key=false" + "create table sdp_0 (n int, x int) engine=MergeTree order by n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1" ) q( - "create table sdp_1 (n int, x int) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false" + "create table sdp_1 (n int, x int) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1" ) q( - "create table sdp_2 (n int, x String) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false" + "create table sdp_2 (n int, x String) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1" ) q( "create table sdp_3 (n int, x Enum('broken' = 0, 'all' = 1)) engine=MergeTree order by n partition by x" @@ -497,7 +497,7 @@ def test_system_detached_parts(drop_detached_parts_table): def test_detached_part_dir_exists(started_cluster): q( "create table detached_part_dir_exists (n int) engine=MergeTree order by n " - "SETTINGS compress_marks=false, compress_primary_key=false, old_parts_lifetime=0" + "SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1, old_parts_lifetime=0" ) q("insert into detached_part_dir_exists select 1") # will create all_1_1_0 q( @@ -549,7 +549,7 @@ def test_detached_part_dir_exists(started_cluster): def test_make_clone_in_detached(started_cluster): q( - "create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n SETTINGS compress_marks=false, compress_primary_key=false" + "create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n SETTINGS compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1" ) path = path_to_data + "data/default/clone_in_detached/" diff --git a/tests/integration/test_peak_memory_usage/__init__.py b/tests/integration/test_peak_memory_usage/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_peak_memory_usage/configs/remote_servers.xml b/tests/integration/test_peak_memory_usage/configs/remote_servers.xml new file mode 100644 index 00000000000..b137758489e --- /dev/null +++ b/tests/integration/test_peak_memory_usage/configs/remote_servers.xml @@ -0,0 +1,20 @@ + + + + + 1 + + shard_1 + 9000 + + + + 3 + + shard_2 + 9000 + + + + + diff --git a/tests/integration/test_peak_memory_usage/test.py b/tests/integration/test_peak_memory_usage/test.py new file mode 100644 index 00000000000..a1313461482 --- /dev/null +++ b/tests/integration/test_peak_memory_usage/test.py @@ -0,0 +1,122 @@ +import pytest +import tempfile +import re + +from helpers.cluster import ClickHouseCluster +from helpers.uclient import client, prompt + +cluster = ClickHouseCluster(__file__) + +shard_1 = cluster.add_instance( + "shard_1", + main_configs=["configs/remote_servers.xml"], + with_zookeeper=True, + macros={ + "shard": "shard_1", + }, +) +shard_2 = cluster.add_instance( + "shard_2", + main_configs=["configs/remote_servers.xml"], + with_zookeeper=True, + macros={ + "shard": "shard_2", + }, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + shard_1.query( + "CREATE TABLE fixed_numbers ON CLUSTER 'cluster' (" + "number UInt64" + ") ENGINE=MergeTree()" + "ORDER BY number" + ) + + shard_1.query( + "CREATE TABLE fixed_numbers_2 ON CLUSTER 'cluster' (" + "number UInt64" + ") ENGINE=Memory ()" + ) + + shard_1.query( + "CREATE TABLE distributed_fixed_numbers (number UInt64) ENGINE=Distributed('cluster', 'default', 'fixed_numbers')" + ) + shard_1.query("INSERT INTO fixed_numbers SELECT number FROM numbers(0, 100)") + + shard_2.query("INSERT INTO fixed_numbers SELECT number FROM numbers(100, 200)") + + shard_1.query("INSERT INTO fixed_numbers_2 SELECT number FROM numbers(0, 10)") + + shard_2.query( + "INSERT INTO fixed_numbers_2 SELECT number FROM numbers(0, 120000)" + ) + + yield cluster + finally: + cluster.shutdown() + + +def get_memory_usage_from_client_output_and_close(client_output): + client_output.seek(0) + peek_memory_usage_str_found = False + for line in client_output: + print(f"'{line}'\n") + if not peek_memory_usage_str_found: + peek_memory_usage_str_found = "Peak memory usage" in line + + if peek_memory_usage_str_found: + search_obj = re.search(r"[+-]?[0-9]+\.[0-9]+", line) + if search_obj: + client_output.close() + print(f"peak_memory_usage {search_obj.group()}") + return search_obj.group() + + print(f"peak_memory_usage not found") + client_output.close() + return "" + + +def test_clickhouse_client_max_peak_memory_usage_distributed(started_cluster): + client_output = tempfile.TemporaryFile(mode="w+t") + command_text = ( + f"{started_cluster.get_client_cmd()} --host {shard_1.ip_address} --port 9000" + ) + with client(name="client1>", log=client_output, command=command_text) as client1: + client1.expect(prompt) + client1.send( + "SELECT COUNT(*) FROM distributed_fixed_numbers JOIN fixed_numbers_2 ON distributed_fixed_numbers.number=fixed_numbers_2.number", + ) + client1.expect("Peak memory usage", timeout=60) + client1.expect(prompt) + + peak_memory_usage = get_memory_usage_from_client_output_and_close(client_output) + assert peak_memory_usage + assert shard_2.contains_in_log( + f"Peak memory usage (for query): {peak_memory_usage}" + ) + + +def test_clickhouse_client_max_peak_memory_single_node(started_cluster): + client_output = tempfile.TemporaryFile(mode="w+t") + + command_text = ( + f"{started_cluster.get_client_cmd()} --host {shard_1.ip_address} --port 9000" + ) + with client(name="client1>", log=client_output, command=command_text) as client1: + client1.expect(prompt) + client1.send( + "SELECT COUNT(*) FROM (SELECT number FROM numbers(1,300000) INTERSECT SELECT number FROM numbers(10000,1200000))" + ) + client1.expect("Peak memory usage", timeout=60) + client1.expect(prompt) + + peak_memory_usage = get_memory_usage_from_client_output_and_close(client_output) + assert peak_memory_usage + assert shard_1.contains_in_log( + f"Peak memory usage (for query): {peak_memory_usage}" + ) diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index fb1f363b825..debb509de90 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -363,6 +363,7 @@ node7 = cluster.add_instance( tag="19.17.8.54", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node8 = cluster.add_instance( "node8", @@ -413,54 +414,6 @@ def start_cluster_diff_versions(): cluster.shutdown() -@pytest.mark.skip(reason="compatability is temporary broken") -def test_polymorphic_parts_diff_versions(start_cluster_diff_versions): - # Check that replication with Wide parts works between different versions. - - node_old = node7 - node_new = node8 - - insert_random_data("polymorphic_table", node7, 100) - node8.query("SYSTEM SYNC REPLICA polymorphic_table", timeout=20) - - assert node8.query("SELECT count() FROM polymorphic_table") == "100\n" - assert ( - node8.query( - "SELECT DISTINCT part_type FROM system.parts WHERE table = 'polymorphic_table' and active" - ) - == "Wide\n" - ) - - -@pytest.mark.skip(reason="compatability is temporary broken") -def test_polymorphic_parts_diff_versions_2(start_cluster_diff_versions): - # Replication doesn't work on old version if part is created in compact format, because - # this version doesn't know anything about it. It's considered to be ok. - - node_old = node7 - node_new = node8 - - insert_random_data("polymorphic_table_2", node_new, 100) - - assert node_new.query("SELECT count() FROM polymorphic_table_2") == "100\n" - assert node_old.query("SELECT count() FROM polymorphic_table_2") == "0\n" - with pytest.raises(Exception): - node_old.query("SYSTEM SYNC REPLICA polymorphic_table_2", timeout=3) - - node_old.restart_with_latest_version(fix_metadata=True) - - node_old.query("SYSTEM SYNC REPLICA polymorphic_table_2", timeout=20) - - # Works after update - assert node_old.query("SELECT count() FROM polymorphic_table_2") == "100\n" - assert ( - node_old.query( - "SELECT DISTINCT part_type FROM system.parts WHERE table = 'polymorphic_table_2' and active" - ) - == "Compact\n" - ) - - def test_polymorphic_parts_non_adaptive(start_cluster): node1.query("SYSTEM STOP MERGES") node2.query("SYSTEM STOP MERGES") @@ -498,7 +451,7 @@ def test_polymorphic_parts_index(start_cluster): """ CREATE TABLE test_index.index_compact(a UInt32, s String) ENGINE = MergeTree ORDER BY a - SETTINGS min_rows_for_wide_part = 1000, index_granularity = 128, merge_max_block_size = 100, compress_marks=false, compress_primary_key=false""" + SETTINGS min_rows_for_wide_part = 1000, index_granularity = 128, merge_max_block_size = 100, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization=1""" ) node1.query( diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py index 59a464f9020..bdf1056765e 100644 --- a/tests/integration/test_postgresql_database_engine/test.py +++ b/tests/integration/test_postgresql_database_engine/test.py @@ -400,6 +400,39 @@ def test_datetime(started_cluster): assert "DateTime64(6)" in node1.query("show create table pg.test") +def test_postgresql_password_leak(started_cluster): + conn = get_postgres_conn( + started_cluster.postgres_ip, started_cluster.postgres_port, database=True + ) + cursor = conn.cursor() + + cursor.execute("DROP SCHEMA IF EXISTS test_schema CASCADE") + cursor.execute("CREATE SCHEMA test_schema") + cursor.execute("CREATE TABLE test_schema.table1 (a integer)") + cursor.execute("CREATE TABLE table2 (a integer)") + + node1.query("DROP DATABASE IF EXISTS postgres_database") + node1.query( + "CREATE DATABASE postgres_database ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword', 'test_schema')" + ) + + node1.query("DROP DATABASE IF EXISTS postgres_database2") + node1.query( + "CREATE DATABASE postgres_database2 ENGINE = PostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')" + ) + + assert "mysecretpassword" not in node1.query("SHOW CREATE postgres_database.table1") + assert "mysecretpassword" not in node1.query( + "SHOW CREATE postgres_database2.table2" + ) + + node1.query("DROP DATABASE postgres_database") + node1.query("DROP DATABASE postgres_database2") + + cursor.execute("DROP SCHEMA test_schema CASCADE") + cursor.execute("DROP TABLE table2") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_postgresql_replica_database_engine_1/test.py b/tests/integration/test_postgresql_replica_database_engine_1/test.py index 377b1c89efc..57ee920d49c 100644 --- a/tests/integration/test_postgresql_replica_database_engine_1/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_1/test.py @@ -46,7 +46,12 @@ pg_manager = PostgresManager() def started_cluster(): try: cluster.start() - pg_manager.init(instance, cluster.postgres_ip, cluster.postgres_port) + pg_manager.init( + instance, + cluster.postgres_ip, + cluster.postgres_port, + default_database="postgres_database", + ) yield cluster finally: @@ -74,16 +79,10 @@ def test_load_and_sync_all_database_tables(started_cluster): def test_replicating_dml(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() NUM_TABLES = 5 for i in range(NUM_TABLES): - create_postgres_table(cursor, "postgresql_replica_{}".format(i)) + pg_manager.create_postgres_table(f"postgresql_replica_{i}") instance.query( "INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {} from numbers(50)".format( i, i @@ -96,39 +95,29 @@ def test_replicating_dml(started_cluster): for i in range(NUM_TABLES): instance.query( - "INSERT INTO postgres_database.postgresql_replica_{} SELECT 50 + number, {} from numbers(1000)".format( - i, i - ) + f"INSERT INTO postgres_database.postgresql_replica_{i} SELECT 50 + number, {i} from numbers(1000)" ) check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): - cursor.execute( - "UPDATE postgresql_replica_{} SET value = {} * {} WHERE key < 50;".format( - i, i, i - ) + pg_manager.execute( + f"UPDATE postgresql_replica_{i} SET value = {i} * {i} WHERE key < 50;" ) - cursor.execute( - "UPDATE postgresql_replica_{} SET value = {} * {} * {} WHERE key >= 50;".format( - i, i, i, i - ) + pg_manager.execute( + f"UPDATE postgresql_replica_{i} SET value = {i} * {i} * {i} WHERE key >= 50;" ) + check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): - cursor.execute( - "DELETE FROM postgresql_replica_{} WHERE (value*value + {}) % 2 = 0;".format( - i, i - ) + pg_manager.execute( + f"DELETE FROM postgresql_replica_{i} WHERE (value*value + {i}) % 2 = 0;" ) - cursor.execute( - "UPDATE postgresql_replica_{} SET value = value - (value % 7) WHERE key > 128 AND key < 512;".format( - i - ) - ) - cursor.execute( - "DELETE FROM postgresql_replica_{} WHERE key % 7 = 1;".format(i, i) + pg_manager.execute( + f"UPDATE postgresql_replica_{i} SET value = value - (value % 7) WHERE key > 128 AND key < 512;" ) + pg_manager.execute(f"DELETE FROM postgresql_replica_{i} WHERE key % 7 = 1;") + check_several_tables_are_synchronized(instance, NUM_TABLES) @@ -288,13 +277,7 @@ def test_load_and_sync_subset_of_database_tables(started_cluster): def test_changing_replica_identity_value(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") + pg_manager.create_postgres_table("postgresql_replica") instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, number from numbers(50)" ) @@ -307,7 +290,7 @@ def test_changing_replica_identity_value(started_cluster): "INSERT INTO postgres_database.postgresql_replica SELECT 100 + number, number from numbers(50)" ) check_tables_are_synchronized(instance, "postgresql_replica") - cursor.execute("UPDATE postgresql_replica SET key=key-25 WHERE key<100 ") + pg_manager.execute("UPDATE postgresql_replica SET key=key-25 WHERE key<100 ") check_tables_are_synchronized(instance, "postgresql_replica") @@ -331,18 +314,13 @@ def test_clickhouse_restart(started_cluster): def test_replica_identity_index(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, + pg_manager.create_postgres_table( + "postgresql_replica", template=postgres_table_template_3 ) - cursor = conn.cursor() - - create_postgres_table( - cursor, "postgresql_replica", template=postgres_table_template_3 + pg_manager.execute("CREATE unique INDEX idx on postgresql_replica(key1, key2);") + pg_manager.execute( + "ALTER TABLE postgresql_replica REPLICA IDENTITY USING INDEX idx" ) - cursor.execute("CREATE unique INDEX idx on postgresql_replica(key1, key2);") - cursor.execute("ALTER TABLE postgresql_replica REPLICA IDENTITY USING INDEX idx") instance.query( "INSERT INTO postgres_database.postgresql_replica SELECT number, number, number, number from numbers(50, 10)" ) @@ -355,35 +333,29 @@ def test_replica_identity_index(started_cluster): ) check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1") - cursor.execute("UPDATE postgresql_replica SET key1=key1-25 WHERE key1<100 ") - cursor.execute("UPDATE postgresql_replica SET key2=key2-25 WHERE key2>100 ") - cursor.execute("UPDATE postgresql_replica SET value1=value1+100 WHERE key1<100 ") - cursor.execute("UPDATE postgresql_replica SET value2=value2+200 WHERE key2>100 ") + pg_manager.execute("UPDATE postgresql_replica SET key1=key1-25 WHERE key1<100 ") + pg_manager.execute("UPDATE postgresql_replica SET key2=key2-25 WHERE key2>100 ") + pg_manager.execute( + "UPDATE postgresql_replica SET value1=value1+100 WHERE key1<100 " + ) + pg_manager.execute( + "UPDATE postgresql_replica SET value2=value2+200 WHERE key2>100 " + ) check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1") - cursor.execute("DELETE FROM postgresql_replica WHERE key2<75;") + pg_manager.execute("DELETE FROM postgresql_replica WHERE key2<75;") check_tables_are_synchronized(instance, "postgresql_replica", order_by="key1") def test_table_schema_changes(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() NUM_TABLES = 5 for i in range(NUM_TABLES): - create_postgres_table( - cursor, - "postgresql_replica_{}".format(i), - template=postgres_table_template_2, + pg_manager.create_postgres_table( + f"postgresql_replica_{i}", template=postgres_table_template_2 ) instance.query( - "INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {}, {}, {} from numbers(25)".format( - i, i, i, i - ) + f"INSERT INTO postgres_database.postgresql_replica_{i} SELECT number, {i}, {i}, {i} from numbers(25)" ) pg_manager.create_materialized_db( @@ -393,9 +365,7 @@ def test_table_schema_changes(started_cluster): for i in range(NUM_TABLES): instance.query( - "INSERT INTO postgres_database.postgresql_replica_{} SELECT 25 + number, {}, {}, {} from numbers(25)".format( - i, i, i, i - ) + f"INSERT INTO postgres_database.postgresql_replica_{i} SELECT 25 + number, {i}, {i}, {i} from numbers(25)" ) check_several_tables_are_synchronized(instance, NUM_TABLES) @@ -410,9 +380,9 @@ def test_table_schema_changes(started_cluster): instance.query(f"SELECT count() FROM test_database.{altered_table}") ) - cursor.execute(f"ALTER TABLE {altered_table} DROP COLUMN value2") + pg_manager.execute(f"ALTER TABLE {altered_table} DROP COLUMN value2") for i in range(NUM_TABLES): - cursor.execute(f"INSERT INTO postgresql_replica_{i} VALUES (50, {i}, {i})") + pg_manager.execute(f"INSERT INTO postgresql_replica_{i} VALUES (50, {i}, {i})") assert instance.wait_for_log_line( f"Table postgresql_replica_{altered_idx} is skipped from replication stream" @@ -444,10 +414,7 @@ def test_many_concurrent_queries(started_cluster): port=started_cluster.postgres_port, database=True, ) - cursor = conn.cursor() - pg_manager.create_and_fill_postgres_tables_from_cursor( - cursor, NUM_TABLES, numbers=10000 - ) + pg_manager.create_and_fill_postgres_tables(NUM_TABLES, numbers=10000) def attack(thread_id): print("thread {}".format(thread_id)) @@ -589,9 +556,8 @@ def test_multiple_databases(started_cluster): port=started_cluster.postgres_port, database=False, ) - cursor = conn.cursor() - pg_manager.create_postgres_db(cursor, "postgres_database_1") - pg_manager.create_postgres_db(cursor, "postgres_database_2") + pg_manager.create_postgres_db("postgres_database_1") + pg_manager.create_postgres_db("postgres_database_2") conn1 = get_postgres_conn( ip=started_cluster.postgres_ip, @@ -610,15 +576,13 @@ def test_multiple_databases(started_cluster): cursor2 = conn2.cursor() pg_manager.create_clickhouse_postgres_db( - cluster.postgres_ip, - cluster.postgres_port, "postgres_database_1", + "", "postgres_database_1", ) pg_manager.create_clickhouse_postgres_db( - cluster.postgres_ip, - cluster.postgres_port, "postgres_database_2", + "", "postgres_database_2", ) diff --git a/tests/integration/test_postgresql_replica_database_engine_2/configs/merge_tree_too_many_parts.xml b/tests/integration/test_postgresql_replica_database_engine_2/configs/merge_tree_too_many_parts.xml new file mode 100644 index 00000000000..4bc63453f55 --- /dev/null +++ b/tests/integration/test_postgresql_replica_database_engine_2/configs/merge_tree_too_many_parts.xml @@ -0,0 +1,5 @@ + + + 5 + + diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 90d19e9532c..3f2ec74180b 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -46,14 +46,32 @@ instance = cluster.add_instance( stay_alive=True, ) +instance2 = cluster.add_instance( + "instance2", + main_configs=["configs/log_conf.xml", "configs/merge_tree_too_many_parts.xml"], + user_configs=["configs/users.xml"], + with_postgres=True, + stay_alive=True, +) + + pg_manager = PostgresManager() +pg_manager2 = PostgresManager() @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() - pg_manager.init(instance, cluster.postgres_ip, cluster.postgres_port) + pg_manager.init( + instance, + cluster.postgres_ip, + cluster.postgres_port, + default_database="postgres_database", + ) + pg_manager2.init( + instance2, cluster.postgres_ip, cluster.postgres_port, "postgres_database2" + ) yield cluster finally: @@ -68,11 +86,9 @@ def setup_teardown(): def test_add_new_table_to_replication(started_cluster): - cursor = pg_manager.get_db_cursor() - cursor.execute("DROP TABLE IF EXISTS test_table") NUM_TABLES = 5 - pg_manager.create_and_fill_postgres_tables_from_cursor(cursor, NUM_TABLES, 10000) + pg_manager.create_and_fill_postgres_tables(NUM_TABLES, 10000) pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port ) @@ -85,7 +101,7 @@ def test_add_new_table_to_replication(started_cluster): ) table_name = "postgresql_replica_5" - pg_manager.create_and_fill_postgres_table_from_cursor(cursor, table_name) + pg_manager.create_and_fill_postgres_table(table_name) result = instance.query("SHOW CREATE DATABASE test_database") assert ( @@ -138,7 +154,7 @@ def test_add_new_table_to_replication(started_cluster): ) table_name = "postgresql_replica_6" - create_postgres_table(cursor, table_name) + pg_manager.create_postgres_table(table_name) instance.query( "INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format( table_name @@ -149,7 +165,7 @@ def test_add_new_table_to_replication(started_cluster): instance.restart_clickhouse() table_name = "postgresql_replica_7" - create_postgres_table(cursor, table_name) + pg_manager.create_postgres_table(table_name) instance.query( "INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format( table_name @@ -251,8 +267,7 @@ def test_remove_table_from_replication(started_cluster): == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4\\'\n" ) - cursor = pg_manager.get_db_cursor() - cursor.execute(f"drop table if exists postgresql_replica_0;") + pg_manager.execute(f"drop table if exists postgresql_replica_0;") # Removing from replication table which does not exist in PostgreSQL must be ok. instance.query("DETACH TABLE test_database.postgresql_replica_0 PERMANENTLY") @@ -262,10 +277,11 @@ def test_remove_table_from_replication(started_cluster): def test_predefined_connection_configuration(started_cluster): - cursor = pg_manager.get_db_cursor() - cursor.execute(f"DROP TABLE IF EXISTS test_table") - cursor.execute(f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)") - cursor.execute(f"INSERT INTO test_table SELECT 1, 2") + pg_manager.execute(f"DROP TABLE IF EXISTS test_table") + pg_manager.execute( + f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)" + ) + pg_manager.execute(f"INSERT INTO test_table SELECT 1, 2") instance.query( "CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL(postgres1) SETTINGS materialized_postgresql_tables_list='test_table'" ) @@ -312,10 +328,9 @@ def test_database_with_single_non_default_schema(started_cluster): create_postgres_schema(cursor, schema_name) pg_manager.create_clickhouse_postgres_db( - ip=cluster.postgres_ip, - port=cluster.postgres_port, - name=clickhouse_postgres_db, + database_name=clickhouse_postgres_db, schema_name=schema_name, + postgres_database="postgres_database", ) for i in range(NUM_TABLES): @@ -347,7 +362,7 @@ def test_database_with_single_non_default_schema(started_cluster): check_all_tables_are_synchronized() altered_table = random.randint(0, NUM_TABLES - 1) - cursor.execute( + pg_manager.execute( "ALTER TABLE test_schema.postgresql_replica_{} ADD COLUMN value2 integer".format( altered_table ) @@ -414,10 +429,9 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): create_postgres_schema(cursor, schema_name) pg_manager.create_clickhouse_postgres_db( - ip=cluster.postgres_ip, - port=cluster.postgres_port, - name=clickhouse_postgres_db, + database_name=clickhouse_postgres_db, schema_name=schema_name, + postgres_database="postgres_database", ) for i in range(NUM_TABLES): @@ -452,7 +466,7 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): check_all_tables_are_synchronized() altered_table = random.randint(0, NUM_TABLES - 1) - cursor.execute( + pg_manager.execute( "ALTER TABLE test_schema.postgresql_replica_{} ADD COLUMN value2 integer".format( altered_table ) @@ -530,10 +544,9 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): clickhouse_postgres_db = f"clickhouse_postgres_db{i}" create_postgres_schema(cursor, schema_name) pg_manager.create_clickhouse_postgres_db( - ip=cluster.postgres_ip, - port=cluster.postgres_port, - name=clickhouse_postgres_db, + database_name=clickhouse_postgres_db, schema_name=schema_name, + postgres_database="postgres_database", ) for ti in range(NUM_TABLES): table_name = f"postgresql_replica_{ti}" @@ -566,7 +579,7 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): altered_schema = random.randint(0, schemas_num - 1) altered_table = random.randint(0, NUM_TABLES - 1) clickhouse_postgres_db = f"clickhouse_postgres_db{altered_schema}" - cursor.execute( + pg_manager.execute( f"ALTER TABLE schema{altered_schema}.postgresql_replica_{altered_table} ADD COLUMN value2 integer" ) @@ -599,10 +612,9 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): def test_table_override(started_cluster): - cursor = pg_manager.get_db_cursor() table_name = "table_override" materialized_database = "test_database" - create_postgres_table(cursor, table_name, template=postgres_table_template_5) + pg_manager.create_postgres_table(table_name, template=postgres_table_template_5) instance.query( f"create table {table_name}(key Int32, value UUID) engine = PostgreSQL (postgres1, table={table_name})" ) @@ -629,10 +641,11 @@ def test_table_override(started_cluster): def test_materialized_view(started_cluster): - cursor = pg_manager.get_db_cursor() - cursor.execute(f"DROP TABLE IF EXISTS test_table") - cursor.execute(f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)") - cursor.execute(f"INSERT INTO test_table SELECT 1, 2") + pg_manager.execute(f"DROP TABLE IF EXISTS test_table") + pg_manager.execute( + f"CREATE TABLE test_table (key integer PRIMARY KEY, value integer)" + ) + pg_manager.execute(f"INSERT INTO test_table SELECT 1, 2") instance.query("DROP DATABASE IF EXISTS test_database") instance.query( "CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL(postgres1) SETTINGS materialized_postgresql_tables_list='test_table'" @@ -643,12 +656,69 @@ def test_materialized_view(started_cluster): "CREATE MATERIALIZED VIEW mv ENGINE=MergeTree ORDER BY tuple() POPULATE AS SELECT * FROM test_database.test_table" ) assert "1\t2" == instance.query("SELECT * FROM mv").strip() - cursor.execute(f"INSERT INTO test_table SELECT 3, 4") + pg_manager.execute(f"INSERT INTO test_table SELECT 3, 4") check_tables_are_synchronized(instance, "test_table") assert "1\t2\n3\t4" == instance.query("SELECT * FROM mv ORDER BY 1, 2").strip() pg_manager.drop_materialized_db() +def test_too_many_parts(started_cluster): + table = "test_table" + pg_manager2.create_and_fill_postgres_table(table) + pg_manager2.create_materialized_db( + ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + settings=[ + f"materialized_postgresql_tables_list = 'test_table', materialized_postgresql_backoff_min_ms = 100, materialized_postgresql_backoff_max_ms = 100" + ], + ) + check_tables_are_synchronized( + instance2, "test_table", postgres_database=pg_manager2.get_default_database() + ) + assert ( + "50" == instance2.query("SELECT count() FROM test_database.test_table").strip() + ) + + instance2.query("SYSTEM STOP MERGES") + num = 50 + for i in range(10): + instance2.query( + f""" + INSERT INTO {pg_manager2.get_default_database()}.test_table SELECT {num}, {num}; + """ + ) + num = num + 1 + for i in range(30): + if num == int( + instance2.query("SELECT count() FROM test_database.test_table") + ) or instance2.contains_in_log("DB::Exception: Too many parts"): + break + time.sleep(1) + print(f"wait sync try {i}") + instance2.query("SYSTEM FLUSH LOGS") + if instance2.contains_in_log("DB::Exception: Too many parts"): + break + assert num == int( + instance2.query("SELECT count() FROM test_database.test_table") + ) or num - 1 == int( + instance2.query("SELECT count() FROM test_database.test_table") + ) + + assert instance2.contains_in_log("DB::Exception: Too many parts") + print(num) + assert num == int( + instance2.query("SELECT count() FROM test_database.test_table") + ) or num - 1 == int(instance2.query("SELECT count() FROM test_database.test_table")) + + instance2.query("SYSTEM START MERGES") + check_tables_are_synchronized( + instance2, "test_table", postgres_database=pg_manager2.get_default_database() + ) + + # assert "200" == instance.query("SELECT count FROM test_database.test_table").strip() + pg_manager2.drop_materialized_db() + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_profile_max_sessions_for_user/__init__.py b/tests/integration/test_profile_max_sessions_for_user/__init__.py new file mode 100755 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/dhparam.pem b/tests/integration/test_profile_max_sessions_for_user/configs/dhparam.pem new file mode 100755 index 00000000000..2e6cee0798d --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/dhparam.pem @@ -0,0 +1,8 @@ +-----BEGIN DH PARAMETERS----- +MIIBCAKCAQEAua92DDli13gJ+//ZXyGaggjIuidqB0crXfhUlsrBk9BV1hH3i7fR +XGP9rUdk2ubnB3k2ejBStL5oBrkHm9SzUFSQHqfDjLZjKoUpOEmuDc4cHvX1XTR5 +Pr1vf5cd0yEncJWG5W4zyUB8k++SUdL2qaeslSs+f491HBLDYn/h8zCgRbBvxhxb +9qeho1xcbnWeqkN6Kc9bgGozA16P9NLuuLttNnOblkH+lMBf42BSne/TWt3AlGZf +slKmmZcySUhF8aKfJnLKbkBCFqOtFRh8zBA9a7g+BT/lSANATCDPaAk1YVih2EKb +dpc3briTDbRsiqg2JKMI7+VdULY9bh3EawIBAg== +-----END DH PARAMETERS----- diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/log.xml b/tests/integration/test_profile_max_sessions_for_user/configs/log.xml new file mode 100644 index 00000000000..22f95a8bd5d --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/log.xml @@ -0,0 +1,9 @@ + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/ports.xml b/tests/integration/test_profile_max_sessions_for_user/configs/ports.xml new file mode 100644 index 00000000000..3123c4a3d9c --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/ports.xml @@ -0,0 +1,9 @@ + + 5433 + 9001 + 9100 + + + false + + diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/server.crt b/tests/integration/test_profile_max_sessions_for_user/configs/server.crt new file mode 100755 index 00000000000..070d37f3b77 --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/server.crt @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC+zCCAeOgAwIBAgIJANhP897Se2gmMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV +BAMMCWxvY2FsaG9zdDAeFw0yMDA0MTgyMTE2NDBaFw0yMTA0MTgyMTE2NDBaMBQx +EjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAM92kcojQoMsjZ9YGhPMY6h/fDUsZeSKHLxgqE6wbmfU1oZKCPWqnvl+4n0J +pnT5h1ETxxYZLepimKq0DEVPUTmCl0xmcKbtUNiaTUKYKsdita6b2vZCX9wUPN9p +2Kjnm41l+aZNqIEBhIgHNWg9qowi20y0EIXR79jQLwwaInHAaJLZxVsqY2zjQ/D7 +1Zh82MXud7iqxBQiEfw9Cz35UFA239R8QTlPkVQfsN1gfLxnLk24QUX3o+hbUI1g +nlSpyYDHYQlOmwz8doDs6THHAZNJ4bPE9xHNFpw6dGZdbtH+IKQ/qRZIiOaiNuzJ +IOHl6XQDRDkW2LMTiCQ6fjC7Pz8CAwEAAaNQME4wHQYDVR0OBBYEFFvhaA/Eguyf +BXkMj8BkNLBqMnz2MB8GA1UdIwQYMBaAFFvhaA/EguyfBXkMj8BkNLBqMnz2MAwG +A1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBACeU/oL48eVAKH7NQntHhRaJ +ZGeQzKIjrSBjFo8BGXD1nJZhUeFsylLrhCkC8/5/3grE3BNVX9bxcGjO81C9Mn4U +t0z13d6ovJjCZSQArtLwgeJGlpH7gNdD3DyT8DQmrqYVnmnB7UmBu45XH1LWGQZr +FAOhGRVs6s6mNj8QlLMgdmsOeOQnsGCMdoss8zV9vO2dc4A5SDSSL2mqGGY4Yjtt +X+XlEhXXnksGyx8NGVOZX4wcj8WeCAj/lihQ7Zh6XYwZH9i+E46ompUwoziZnNPu +2RH63tLNCxkOY2HF5VMlbMmzer3FkhlM6TAZZRPcvSphKPwXK4A33yqc6wnWvpc= +-----END CERTIFICATE----- diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/server.key b/tests/integration/test_profile_max_sessions_for_user/configs/server.key new file mode 100755 index 00000000000..b3dee82dcda --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDPdpHKI0KDLI2f +WBoTzGOof3w1LGXkihy8YKhOsG5n1NaGSgj1qp75fuJ9CaZ0+YdRE8cWGS3qYpiq +tAxFT1E5gpdMZnCm7VDYmk1CmCrHYrWum9r2Ql/cFDzfadio55uNZfmmTaiBAYSI +BzVoPaqMIttMtBCF0e/Y0C8MGiJxwGiS2cVbKmNs40Pw+9WYfNjF7ne4qsQUIhH8 +PQs9+VBQNt/UfEE5T5FUH7DdYHy8Zy5NuEFF96PoW1CNYJ5UqcmAx2EJTpsM/HaA +7OkxxwGTSeGzxPcRzRacOnRmXW7R/iCkP6kWSIjmojbsySDh5el0A0Q5FtizE4gk +On4wuz8/AgMBAAECggEAJ54J2yL+mZQRe2NUn4FBarTloDXZQ1pIgISov1Ybz0Iq +sTxEF728XAKp95y3J9Fa0NXJB+RJC2BGrRpy2W17IlNY1yMc0hOxg5t7s4LhcG/e +J/jlSG+GZL2MnlFVKXQJFWhq0yIzUmdayqstvLlB7z7cx/n+yb88YRfoVBRNjZEL +Tdrsw+087igDjrIxZJ3eMN5Wi434n9s4yAoRQC1bP5wcWx0gD4MzdmL8ip6suiRc +LRuBAhV/Op812xlxUhrF5dInUM9OLlGTXpUzexAS8Cyy7S4bfkW2BaCxTF7I7TFw +Whx28CKn/G49tIuU0m6AlxWbXpLVePTFyMb7RJz5cQKBgQD7VQd2u3HM6eE3PcXD +p6ObdLTUk8OAJ5BMmADFc71W0Epyo26/e8KXKGYGxE2W3fr13y+9b0fl5fxZPuhS +MgvXEO7rItAVsLcp0IzaqY0WUee2b4XWPAU0XuPqvjYMpx8H5OEHqFK6lhZysAqM +X7Ot3/Hux9X0MC4v5a/HNbDUOQKBgQDTUPaP3ADRrmpmE2sWuzWEnCSEz5f0tCLO +wTqhV/UraWUNlAbgK5NB790IjH/gotBSqqNPLJwJh0LUfClKM4LiaHsEag0OArOF +GhPMK1Ohps8c2RRsiG8+hxX2HEHeAVbkouEDPDiHdIW/92pBViDoETXL6qxDKbm9 +LkOcVeDfNwKBgQChh1xsqrvQ/t+IKWNZA/zahH9TwEP9sW/ESkz0mhYuHWA7nV4o +ItpFW+l2n+Nd+vy32OFN1p9W2iD9GrklWpTRfEiRRqaFyjVt4mMkhaPvnGRXlAVo +Utrldbb1v5ntN9txr2ARE9VXpe53dzzQSxGnxi4vUK/paK3GitAWMCOdwQKBgQCi +hmGsUXQb0P6qVYMGr6PAw2re7t8baLRguoMCdqjs45nCMLh9D2apzvb8TTtJJU/+ +VJlYGqJEPdDrpjcHh8jBo8QBqCM0RGWYGG9jl2syKB6hPGCV/PU6bSE58Y/DVNpk +7NUM7PM5UyhPddY2PC0A78Ole29UFLJzSzLa+b4DTwKBgH9Wh2k4YPnPcRrX89UL +eSwWa1CGq6HWX8Kd5qyz256aeHWuG5nv15+rBt+D7nwajUsqeVkAXz5H/dHuG1xz +jb7RW+pEjx0GVAmIbkM9vOLqEUfHHHPuk4AXCGGZ5sarPiKg4BHKBBsY1dpoO5UH +0j71fRA6zurHnTXDaCLWlUpZ +-----END PRIVATE KEY----- diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/ssl_conf.xml b/tests/integration/test_profile_max_sessions_for_user/configs/ssl_conf.xml new file mode 100644 index 00000000000..778d327c460 --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/ssl_conf.xml @@ -0,0 +1,17 @@ + + + + + + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + + /etc/clickhouse-server/config.d/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + diff --git a/tests/integration/test_profile_max_sessions_for_user/configs/users.xml b/tests/integration/test_profile_max_sessions_for_user/configs/users.xml new file mode 100644 index 00000000000..3bed673b2ca --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/configs/users.xml @@ -0,0 +1,16 @@ + + + + 2 + 0 + + + + + + + + 123 + + + diff --git a/tests/integration/test_profile_max_sessions_for_user/protos/clickhouse_grpc.proto b/tests/integration/test_profile_max_sessions_for_user/protos/clickhouse_grpc.proto new file mode 120000 index 00000000000..25d15f11e3b --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/protos/clickhouse_grpc.proto @@ -0,0 +1 @@ +../../../../src/Server/grpc_protos/clickhouse_grpc.proto \ No newline at end of file diff --git a/tests/integration/test_profile_max_sessions_for_user/test.py b/tests/integration/test_profile_max_sessions_for_user/test.py new file mode 100755 index 00000000000..5eaef09bf6d --- /dev/null +++ b/tests/integration/test_profile_max_sessions_for_user/test.py @@ -0,0 +1,242 @@ +import os + +import grpc +import pymysql.connections +import psycopg2 as py_psql +import pytest +import sys +import threading + +from helpers.cluster import ClickHouseCluster, run_and_check +from helpers.test_tools import assert_logs_contain_with_retry + +from helpers.uclient import client, prompt + +MAX_SESSIONS_FOR_USER = 2 +POSTGRES_SERVER_PORT = 5433 +MYSQL_SERVER_PORT = 9001 +GRPC_PORT = 9100 + +TEST_USER = "test_user" +TEST_PASSWORD = "123" + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_ENCODING = "utf-8" + +# Use grpcio-tools to generate *pb2.py files from *.proto. +proto_dir = os.path.join(SCRIPT_DIR, "./protos") +gen_dir = os.path.join(SCRIPT_DIR, "./_gen") +os.makedirs(gen_dir, exist_ok=True) +run_and_check( + f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto", + shell=True, +) + +sys.path.append(gen_dir) + +import clickhouse_grpc_pb2 +import clickhouse_grpc_pb2_grpc + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "node", + main_configs=[ + "configs/ports.xml", + "configs/log.xml", + "configs/ssl_conf.xml", + "configs/dhparam.pem", + "configs/server.crt", + "configs/server.key", + ], + user_configs=["configs/users.xml"], + env_variables={ + "UBSAN_OPTIONS": "print_stacktrace=1", + # Bug in TSAN reproduces in this test https://github.com/grpc/grpc/issues/29550#issuecomment-1188085387 + "TSAN_OPTIONS": "report_atomic_races=0 " + + os.getenv("TSAN_OPTIONS", default=""), + }, +) + + +def get_query(name, id): + return f"SElECT '{name}', {id}, number from system.numbers" + + +def grpc_get_url(): + return f"{instance.ip_address}:{GRPC_PORT}" + + +def grpc_create_insecure_channel(): + channel = grpc.insecure_channel(grpc_get_url()) + grpc.channel_ready_future(channel).result(timeout=2) + return channel + + +def grpc_query(query_text, channel, session_id_): + query_info = clickhouse_grpc_pb2.QueryInfo( + query=query_text, + session_id=session_id_, + user_name=TEST_USER, + password=TEST_PASSWORD, + ) + + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(channel) + result = stub.ExecuteQuery(query_info) + if result and result.HasField("exception"): + raise Exception(result.exception.display_text) + return result.output.decode(DEFAULT_ENCODING) + + +def threaded_run_test(sessions): + instance.rotate_logs() + thread_list = [] + for i in range(len(sessions)): + thread = ThreadWithException(target=sessions[i], args=(i,)) + thread_list.append(thread) + thread.start() + + if len(sessions) > MAX_SESSIONS_FOR_USER: + assert_logs_contain_with_retry(instance, "overflown session count") + + instance.query(f"KILL QUERY WHERE user='{TEST_USER}' SYNC") + + for thread in thread_list: + thread.join() + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +class ThreadWithException(threading.Thread): + def run(self): + try: + super().run() + except: + pass + + +def postgres_session(id): + ch = py_psql.connect( + host=instance.ip_address, + port=POSTGRES_SERVER_PORT, + user=TEST_USER, + password=TEST_PASSWORD, + database="default", + ) + cur = ch.cursor() + cur.execute(get_query("postgres_session", id)) + cur.fetchall() + + +def mysql_session(id): + client = pymysql.connections.Connection( + host=instance.ip_address, + user=TEST_USER, + password=TEST_PASSWORD, + database="default", + port=MYSQL_SERVER_PORT, + ) + cursor = client.cursor(pymysql.cursors.DictCursor) + cursor.execute(get_query("mysql_session", id)) + cursor.fetchall() + + +def tcp_session(id): + instance.query(get_query("tcp_session", id), user=TEST_USER, password=TEST_PASSWORD) + + +def http_session(id): + instance.http_query( + get_query("http_session", id), user=TEST_USER, password=TEST_PASSWORD + ) + + +def http_named_session(id): + instance.http_query( + get_query("http_named_session", id), + user=TEST_USER, + password=TEST_PASSWORD, + params={"session_id": id}, + ) + + +def grpc_session(id): + grpc_query( + get_query("grpc_session", id), grpc_create_insecure_channel(), f"session_{id}" + ) + + +def test_profile_max_sessions_for_user_tcp(started_cluster): + threaded_run_test([tcp_session] * 3) + + +def test_profile_max_sessions_for_user_postgres(started_cluster): + threaded_run_test([postgres_session] * 3) + + +def test_profile_max_sessions_for_user_mysql(started_cluster): + threaded_run_test([mysql_session] * 3) + + +def test_profile_max_sessions_for_user_http(started_cluster): + threaded_run_test([http_session] * 3) + + +def test_profile_max_sessions_for_user_http_named_session(started_cluster): + threaded_run_test([http_named_session] * 3) + + +def test_profile_max_sessions_for_user_grpc(started_cluster): + threaded_run_test([grpc_session] * 3) + + +def test_profile_max_sessions_for_user_tcp_and_others(started_cluster): + threaded_run_test([tcp_session, grpc_session, grpc_session]) + threaded_run_test([tcp_session, http_session, http_session]) + threaded_run_test([tcp_session, mysql_session, mysql_session]) + threaded_run_test([tcp_session, postgres_session, postgres_session]) + threaded_run_test([tcp_session, http_session, postgres_session]) + threaded_run_test([tcp_session, postgres_session, http_session]) + + +def test_profile_max_sessions_for_user_setting_in_query(started_cluster): + instance.query_and_get_error("SET max_sessions_for_user = 10") + + +def test_profile_max_sessions_for_user_client_suggestions_connection(started_cluster): + command_text = f"{started_cluster.get_client_cmd()} --host {instance.ip_address} --port 9000 -u {TEST_USER} --password {TEST_PASSWORD}" + command_text_without_suggestions = command_text + " --disable_suggestion" + + # Launch client1 without suggestions to avoid a race condition: + # Client1 opens a session. + # Client1 opens a session for suggestion connection. + # Client2 fails to open a session and gets the USER_SESSION_LIMIT_EXCEEDED error. + # + # Expected order: + # Client1 opens a session. + # Client2 opens a session. + # Client2 fails to open a session for suggestions and with USER_SESSION_LIMIT_EXCEEDED (No error printed). + # Client3 fails to open a session. + # Client1 executes the query. + # Client2 loads suggestions from the server using the main connection and executes a query. + with client( + name="client1>", log=None, command=command_text_without_suggestions + ) as client1: + client1.expect(prompt) + with client(name="client2>", log=None, command=command_text) as client2: + client2.expect(prompt) + with client(name="client3>", log=None, command=command_text) as client3: + client3.expect("USER_SESSION_LIMIT_EXCEEDED") + + client1.send("SELECT 'CLIENT_1_SELECT' FORMAT CSV") + client1.expect("CLIENT_1_SELECT") + client1.expect(prompt) + client2.send("SELECT 'CLIENT_2_SELECT' FORMAT CSV") + client2.expect("CLIENT_2_SELECT") + client2.expect(prompt) diff --git a/tests/integration/test_quorum_inserts/test.py b/tests/integration/test_quorum_inserts/test.py index 4dbd530dd17..1276a6079f0 100644 --- a/tests/integration/test_quorum_inserts/test.py +++ b/tests/integration/test_quorum_inserts/test.py @@ -147,12 +147,16 @@ def test_drop_replica_and_achieve_quorum(started_cluster): @pytest.mark.parametrize(("add_new_data"), [False, True]) def test_insert_quorum_with_drop_partition(started_cluster, add_new_data): - zero.query( - "DROP TABLE IF EXISTS test_quorum_insert_with_drop_partition ON CLUSTER cluster" + # use different table names for easier disambiguation in logs between runs (you may also check uuid though, but not always convenient) + table_name = ( + "test_quorum_insert_with_drop_partition_new_data" + if add_new_data + else "test_quorum_insert_with_drop_partition" ) + zero.query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER cluster") create_query = ( - "CREATE TABLE test_quorum_insert_with_drop_partition ON CLUSTER cluster " + f"CREATE TABLE {table_name} ON CLUSTER cluster " "(a Int8, d Date) " "Engine = ReplicatedMergeTree " "PARTITION BY d ORDER BY a " @@ -161,78 +165,74 @@ def test_insert_quorum_with_drop_partition(started_cluster, add_new_data): print("Create Replicated table with three replicas") zero.query(create_query) - print("Stop fetches for test_quorum_insert_with_drop_partition at first replica.") - first.query("SYSTEM STOP FETCHES test_quorum_insert_with_drop_partition") + print(f"Stop fetches for {table_name} at first replica.") + first.query(f"SYSTEM STOP FETCHES {table_name}") print("Insert with quorum. (zero and second)") - zero.query( - "INSERT INTO test_quorum_insert_with_drop_partition(a,d) VALUES(1, '2011-01-01')" - ) + zero.query(f"INSERT INTO {table_name}(a,d) VALUES(1, '2011-01-01')") print("Drop partition.") - zero.query( - "ALTER TABLE test_quorum_insert_with_drop_partition DROP PARTITION '2011-01-01'" - ) + zero.query(f"ALTER TABLE {table_name} DROP PARTITION '2011-01-01'") if add_new_data: print("Insert to deleted partition") - zero.query( - "INSERT INTO test_quorum_insert_with_drop_partition(a,d) VALUES(2, '2011-01-01')" - ) + zero.query(f"INSERT INTO {table_name}(a,d) VALUES(2, '2011-01-01')") - print("Resume fetches for test_quorum_insert_with_drop_partition at first replica.") - first.query("SYSTEM START FETCHES test_quorum_insert_with_drop_partition") + print(f"Resume fetches for {table_name} at first replica.") + first.query(f"SYSTEM START FETCHES {table_name}") print("Sync first replica with others.") - first.query("SYSTEM SYNC REPLICA test_quorum_insert_with_drop_partition") + first.query(f"SYSTEM SYNC REPLICA {table_name}") assert "20110101" not in first.query( - """ - WITH (SELECT toString(uuid) FROM system.tables WHERE name = 'test_quorum_insert_with_drop_partition') AS uuid, + f""" + WITH (SELECT toString(uuid) FROM system.tables WHERE name = '{table_name}') AS uuid, '/clickhouse/tables/' || uuid || '/0/quorum/last_part' AS p SELECT * FROM system.zookeeper WHERE path = p FORMAT Vertical """ ) + # Sync second replica not to have `REPLICA_IS_NOT_IN_QUORUM` error + second.query(f"SYSTEM SYNC REPLICA {table_name}") + print("Select from updated partition.") if add_new_data: + assert TSV("2\t2011-01-01\n") == TSV(zero.query(f"SELECT * FROM {table_name}")) assert TSV("2\t2011-01-01\n") == TSV( - zero.query("SELECT * FROM test_quorum_insert_with_drop_partition") - ) - assert TSV("2\t2011-01-01\n") == TSV( - second.query("SELECT * FROM test_quorum_insert_with_drop_partition") + second.query(f"SELECT * FROM {table_name}") ) else: - assert TSV("") == TSV( - zero.query("SELECT * FROM test_quorum_insert_with_drop_partition") - ) - assert TSV("") == TSV( - second.query("SELECT * FROM test_quorum_insert_with_drop_partition") - ) + assert TSV("") == TSV(zero.query(f"SELECT * FROM {table_name}")) + assert TSV("") == TSV(second.query(f"SELECT * FROM {table_name}")) - zero.query( - "DROP TABLE IF EXISTS test_quorum_insert_with_drop_partition ON CLUSTER cluster" - ) + zero.query(f"DROP TABLE IF EXISTS {table_name} ON CLUSTER cluster") @pytest.mark.parametrize(("add_new_data"), [False, True]) def test_insert_quorum_with_move_partition(started_cluster, add_new_data): - zero.query( - "DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_source ON CLUSTER cluster" + # use different table names for easier disambiguation in logs between runs (you may also check uuid though, but not always convenient) + source_table_name = ( + "test_insert_quorum_with_move_partition_source_new_data" + if add_new_data + else "test_insert_quorum_with_move_partition_source" ) - zero.query( - "DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_destination ON CLUSTER cluster" + destination_table_name = ( + "test_insert_quorum_with_move_partition_destination_new_data" + if add_new_data + else "test_insert_quorum_with_move_partition_destination" ) + zero.query(f"DROP TABLE IF EXISTS {source_table_name} ON CLUSTER cluster") + zero.query(f"DROP TABLE IF EXISTS {destination_table_name} ON CLUSTER cluster") create_source = ( - "CREATE TABLE test_insert_quorum_with_move_partition_source ON CLUSTER cluster " + f"CREATE TABLE {source_table_name} ON CLUSTER cluster " "(a Int8, d Date) " "Engine = ReplicatedMergeTree " "PARTITION BY d ORDER BY a " ) create_destination = ( - "CREATE TABLE test_insert_quorum_with_move_partition_destination ON CLUSTER cluster " + f"CREATE TABLE {destination_table_name} ON CLUSTER cluster " "(a Int8, d Date) " "Engine = ReplicatedMergeTree " "PARTITION BY d ORDER BY a " @@ -244,65 +244,52 @@ def test_insert_quorum_with_move_partition(started_cluster, add_new_data): print("Create destination Replicated table with three replicas") zero.query(create_destination) - print( - "Stop fetches for test_insert_quorum_with_move_partition_source at first replica." - ) - first.query("SYSTEM STOP FETCHES test_insert_quorum_with_move_partition_source") + print(f"Stop fetches for {source_table_name} at first replica.") + first.query(f"SYSTEM STOP FETCHES {source_table_name}") print("Insert with quorum. (zero and second)") - zero.query( - "INSERT INTO test_insert_quorum_with_move_partition_source(a,d) VALUES(1, '2011-01-01')" - ) + zero.query(f"INSERT INTO {source_table_name}(a,d) VALUES(1, '2011-01-01')") print("Drop partition.") zero.query( - "ALTER TABLE test_insert_quorum_with_move_partition_source MOVE PARTITION '2011-01-01' TO TABLE test_insert_quorum_with_move_partition_destination" + f"ALTER TABLE {source_table_name} MOVE PARTITION '2011-01-01' TO TABLE {destination_table_name}" ) if add_new_data: print("Insert to deleted partition") - zero.query( - "INSERT INTO test_insert_quorum_with_move_partition_source(a,d) VALUES(2, '2011-01-01')" - ) + zero.query(f"INSERT INTO {source_table_name}(a,d) VALUES(2, '2011-01-01')") - print( - "Resume fetches for test_insert_quorum_with_move_partition_source at first replica." - ) - first.query("SYSTEM START FETCHES test_insert_quorum_with_move_partition_source") + print(f"Resume fetches for {source_table_name} at first replica.") + first.query(f"SYSTEM START FETCHES {source_table_name}") print("Sync first replica with others.") - first.query("SYSTEM SYNC REPLICA test_insert_quorum_with_move_partition_source") + first.query(f"SYSTEM SYNC REPLICA {source_table_name}") assert "20110101" not in first.query( - """ - WITH (SELECT toString(uuid) FROM system.tables WHERE name = 'test_insert_quorum_with_move_partition_source') AS uuid, + f""" + WITH (SELECT toString(uuid) FROM system.tables WHERE name = '{source_table_name}') AS uuid, '/clickhouse/tables/' || uuid || '/0/quorum/last_part' AS p SELECT * FROM system.zookeeper WHERE path = p FORMAT Vertical """ ) + # Sync second replica not to have `REPLICA_IS_NOT_IN_QUORUM` error + second.query(f"SYSTEM SYNC REPLICA {source_table_name}") + print("Select from updated partition.") if add_new_data: assert TSV("2\t2011-01-01\n") == TSV( - zero.query("SELECT * FROM test_insert_quorum_with_move_partition_source") + zero.query(f"SELECT * FROM {source_table_name}") ) assert TSV("2\t2011-01-01\n") == TSV( - second.query("SELECT * FROM test_insert_quorum_with_move_partition_source") + second.query(f"SELECT * FROM {source_table_name}") ) else: - assert TSV("") == TSV( - zero.query("SELECT * FROM test_insert_quorum_with_move_partition_source") - ) - assert TSV("") == TSV( - second.query("SELECT * FROM test_insert_quorum_with_move_partition_source") - ) + assert TSV("") == TSV(zero.query(f"SELECT * FROM {source_table_name}")) + assert TSV("") == TSV(second.query(f"SELECT * FROM {source_table_name}")) - zero.query( - "DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_source ON CLUSTER cluster" - ) - zero.query( - "DROP TABLE IF EXISTS test_insert_quorum_with_move_partition_destination ON CLUSTER cluster" - ) + zero.query(f"DROP TABLE IF EXISTS {source_table_name} ON CLUSTER cluster") + zero.query(f"DROP TABLE IF EXISTS {destination_table_name} ON CLUSTER cluster") def test_insert_quorum_with_ttl(started_cluster): diff --git a/tests/integration/test_quorum_inserts_parallel/test.py b/tests/integration/test_quorum_inserts_parallel/test.py index 99548e37a54..7f8784d822c 100644 --- a/tests/integration/test_quorum_inserts_parallel/test.py +++ b/tests/integration/test_quorum_inserts_parallel/test.py @@ -27,7 +27,11 @@ def started_cluster(): def test_parallel_quorum_actually_parallel(started_cluster): - settings = {"insert_quorum": "3", "insert_quorum_parallel": "1"} + settings = { + "insert_quorum": "3", + "insert_quorum_parallel": "1", + "function_sleep_max_microseconds_per_block": "0", + } for i, node in enumerate([node1, node2, node3]): node.query( "CREATE TABLE r (a UInt64, b String) ENGINE=ReplicatedMergeTree('/test/r', '{num}') ORDER BY tuple()".format( diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 4f98b9a0d0d..cec14b0af73 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -105,7 +105,7 @@ def test_quota_from_users_xml(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -252,7 +252,7 @@ def test_simpliest_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[]", 0, @@ -326,7 +326,7 @@ def test_tracking_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -446,7 +446,7 @@ def test_exceed_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -523,7 +523,7 @@ def test_exceed_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -613,7 +613,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -675,7 +675,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952,63113904]", 0, @@ -824,7 +824,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -914,7 +914,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[]", 0, @@ -986,7 +986,7 @@ def test_add_remove_interval(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -1048,7 +1048,7 @@ def test_add_remove_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, @@ -1111,7 +1111,7 @@ def test_add_remove_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -1121,7 +1121,7 @@ def test_add_remove_quota(): [ "myQuota2", "4590510c-4d13-bf21-ec8a-c2187b092e73", - "users.xml", + "users_xml", "['client_key','user_name']", "[3600,2629746]", 0, @@ -1214,7 +1214,7 @@ def test_add_remove_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -1283,7 +1283,7 @@ def test_add_remove_quota(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -1346,7 +1346,7 @@ def test_reload_users_xml_by_timer(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", "[31556952]", 0, @@ -1385,7 +1385,7 @@ def test_reload_users_xml_by_timer(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", ["user_name"], "[31556952]", 0, @@ -1554,7 +1554,7 @@ def test_query_inserts(): [ "myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", - "users.xml", + "users_xml", "['user_name']", [31556952], 0, diff --git a/tests/integration/test_read_temporary_tables_on_failure/test.py b/tests/integration/test_read_temporary_tables_on_failure/test.py index fd1d92eff92..77c8f3cf26b 100644 --- a/tests/integration/test_read_temporary_tables_on_failure/test.py +++ b/tests/integration/test_read_temporary_tables_on_failure/test.py @@ -19,7 +19,10 @@ def start_cluster(): def test_different_versions(start_cluster): with pytest.raises(QueryTimeoutExceedException): - node.query("SELECT sleepEachRow(3) FROM numbers(10)", timeout=5) + node.query( + "SELECT sleepEachRow(3) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 0", + timeout=5, + ) with pytest.raises(QueryRuntimeException): node.query("SELECT 1", settings={"max_concurrent_queries_for_user": 1}) assert node.contains_in_log("Too many simultaneous queries for user") diff --git a/tests/integration/test_rename_column/test.py b/tests/integration/test_rename_column/test.py index 8dc57cf08ff..1c87b101b11 100644 --- a/tests/integration/test_rename_column/test.py +++ b/tests/integration/test_rename_column/test.py @@ -159,7 +159,7 @@ def insert( ) elif slow: query.append( - "INSERT INTO {table_name} ({col0}, {col1}) SELECT number + sleepEachRow(0.001) AS {col0}, number + 1 AS {col1} FROM numbers_mt({chunk})".format( + "INSERT INTO {table_name} ({col0}, {col1}) SELECT number + sleepEachRow(0.001) AS {col0}, number + 1 AS {col1} FROM numbers_mt({chunk}) SETTINGS function_sleep_max_microseconds_per_block = 0".format( table_name=table_name, chunk=chunk, col0=col_names[0], @@ -198,7 +198,7 @@ def select( try: if slow: r = node.query( - "SELECT count() FROM (SELECT num2, sleepEachRow(0.5) FROM {} WHERE {} % 1000 > 0)".format( + "SELECT count() FROM (SELECT num2, sleepEachRow(0.5) FROM {} WHERE {} % 1000 > 0) SETTINGS function_sleep_max_microseconds_per_block = 0".format( table_name, col_name ) ) diff --git a/tests/integration/test_render_log_file_name_templates/__init__.py b/tests/integration/test_render_log_file_name_templates/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_render_log_file_name_templates/configs/config-file-template.xml b/tests/integration/test_render_log_file_name_templates/configs/config-file-template.xml new file mode 100644 index 00000000000..ba408eb9823 --- /dev/null +++ b/tests/integration/test_render_log_file_name_templates/configs/config-file-template.xml @@ -0,0 +1,6 @@ + + + /var/log/clickhouse-server/clickhouse-server-%Y-%m.log + /var/log/clickhouse-server/clickhouse-server-%Y-%m.err.log + + diff --git a/tests/integration/test_render_log_file_name_templates/test.py b/tests/integration/test_render_log_file_name_templates/test.py new file mode 100644 index 00000000000..58df32b823e --- /dev/null +++ b/tests/integration/test_render_log_file_name_templates/test.py @@ -0,0 +1,54 @@ +import pytest +import logging +from helpers.cluster import ClickHouseCluster +from datetime import datetime + + +log_dir = "/var/log/clickhouse-server/" +cluster = ClickHouseCluster(__file__) + + +@pytest.fixture(scope="module") +def started_cluster(): + cluster.add_instance( + "file-names-from-config", + main_configs=["configs/config-file-template.xml"], + clickhouse_log_file=None, + clickhouse_error_log_file=None, + ) + cluster.add_instance( + "file-names-from-params", + clickhouse_log_file=log_dir + "clickhouse-server-%Y-%m.log", + clickhouse_error_log_file=log_dir + "clickhouse-server-%Y-%m.err.log", + ) + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_check_file_names(started_cluster): + now = datetime.now() + log_file = log_dir + f"clickhouse-server-{now.strftime('%Y-%m')}.log" + err_log_file = log_dir + f"clickhouse-server-{now.strftime('%Y-%m')}.err.log" + logging.debug(f"log_file {log_file} err_log_file {err_log_file}") + + for name, instance in started_cluster.instances.items(): + files = instance.exec_in_container( + ["bash", "-c", f"ls -lh {log_dir}"], nothrow=True + ) + + logging.debug(f"check instance '{name}': {log_dir} contains: {files}") + + assert ( + instance.exec_in_container(["bash", "-c", f"ls {log_file}"], nothrow=True) + == log_file + "\n" + ) + + assert ( + instance.exec_in_container( + ["bash", "-c", f"ls {err_log_file}"], nothrow=True + ) + == err_log_file + "\n" + ) diff --git a/tests/integration/test_replicated_database/configs/config.xml b/tests/integration/test_replicated_database/configs/config.xml index 16cd942e975..e598cc28d5d 100644 --- a/tests/integration/test_replicated_database/configs/config.xml +++ b/tests/integration/test_replicated_database/configs/config.xml @@ -1,5 +1,6 @@ 10 + 1 10 diff --git a/tests/integration/test_replicated_database/configs/settings.xml b/tests/integration/test_replicated_database/configs/settings.xml index 5666ffeace8..7c0e60a044e 100644 --- a/tests/integration/test_replicated_database/configs/settings.xml +++ b/tests/integration/test_replicated_database/configs/settings.xml @@ -4,6 +4,8 @@ 1 1 1 + 0 + 0 diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index b3ba8d4737f..f45841124d9 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -132,14 +132,15 @@ def test_create_replicated_table(started_cluster): @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_simple_alter_table(started_cluster, engine): + database = f"test_simple_alter_table_{engine}" main_node.query( - "CREATE DATABASE test_simple_alter_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE test_simple_alter_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) # test_simple_alter_table - name = "test_simple_alter_table.alter_test_{}".format(engine) + name = f"{database}.alter_test" main_node.query( "CREATE TABLE {} " "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) " @@ -187,10 +188,9 @@ def test_simple_alter_table(started_cluster, engine): # test_create_replica_after_delay competing_node.query( - "CREATE DATABASE IF NOT EXISTS test_simple_alter_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica3');" + f"CREATE DATABASE IF NOT EXISTS {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica3');" ) - name = "test_simple_alter_table.alter_test_{}".format(engine) main_node.query("ALTER TABLE {} ADD COLUMN Added3 UInt32;".format(name)) main_node.query("ALTER TABLE {} DROP COLUMN AddedNested1;".format(name)) main_node.query("ALTER TABLE {} RENAME COLUMN Added1 TO AddedNested1;".format(name)) @@ -210,21 +210,23 @@ def test_simple_alter_table(started_cluster, engine): ) assert_create_query([main_node, dummy_node, competing_node], name, expected) - main_node.query("DROP DATABASE test_simple_alter_table SYNC") - dummy_node.query("DROP DATABASE test_simple_alter_table SYNC") - competing_node.query("DROP DATABASE test_simple_alter_table SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") + competing_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_delete_from_table(started_cluster, engine): + database = f"delete_from_table_{engine}" + main_node.query( - "CREATE DATABASE delete_from_table ENGINE = Replicated('/test/simple_alter_table', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE delete_from_table ENGINE = Replicated('/test/simple_alter_table', 'shard2', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard2', 'replica1');" ) - name = "delete_from_table.delete_test_{}".format(engine) + name = f"{database}.delete_test" main_node.query( "CREATE TABLE {} " "(id UInt64, value String) " @@ -241,7 +243,7 @@ def test_delete_from_table(started_cluster, engine): table_for_select = name if not "Replicated" in engine: - table_for_select = "cluster('delete_from_table', {})".format(name) + table_for_select = f"cluster('{database}', {name})" for node in [main_node, dummy_node]: assert_eq_with_retry( node, @@ -249,8 +251,8 @@ def test_delete_from_table(started_cluster, engine): expected, ) - main_node.query("DROP DATABASE delete_from_table SYNC") - dummy_node.query("DROP DATABASE delete_from_table SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") def get_table_uuid(database, name): @@ -278,18 +280,18 @@ def fixture_attachable_part(started_cluster): @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_attach(started_cluster, attachable_part, engine): + database = f"alter_attach_{engine}" main_node.query( - "CREATE DATABASE alter_attach ENGINE = Replicated('/test/alter_attach', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_attach ENGINE = Replicated('/test/alter_attach', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) - name = "alter_attach_test_{}".format(engine) main_node.query( - f"CREATE TABLE alter_attach.{name} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_attach_test (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - table_uuid = get_table_uuid("alter_attach", name) + table_uuid = get_table_uuid(database, "alter_attach_test") # Provide and attach a part to the main node shutil.copytree( attachable_part, @@ -298,146 +300,157 @@ def test_alter_attach(started_cluster, attachable_part, engine): f"database/store/{table_uuid[:3]}/{table_uuid}/detached/all_1_1_0", ), ) - main_node.query(f"ALTER TABLE alter_attach.{name} ATTACH PART 'all_1_1_0'") + main_node.query(f"ALTER TABLE {database}.alter_attach_test ATTACH PART 'all_1_1_0'") # On the main node, data is attached - assert main_node.query(f"SELECT CounterID FROM alter_attach.{name}") == "123\n" + assert ( + main_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") + == "123\n" + ) # On the other node, data is replicated only if using a Replicated table engine if engine == "ReplicatedMergeTree": - assert dummy_node.query(f"SELECT CounterID FROM alter_attach.{name}") == "123\n" + assert ( + dummy_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") + == "123\n" + ) else: - assert dummy_node.query(f"SELECT CounterID FROM alter_attach.{name}") == "" - main_node.query("DROP DATABASE alter_attach SYNC") - dummy_node.query("DROP DATABASE alter_attach SYNC") + assert ( + dummy_node.query(f"SELECT CounterID FROM {database}.alter_attach_test") + == "" + ) + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_drop_part(started_cluster, engine): + database = f"alter_drop_part_{engine}" main_node.query( - "CREATE DATABASE alter_drop_part ENGINE = Replicated('/test/alter_drop_part', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_drop_part ENGINE = Replicated('/test/alter_drop_part', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) - table = f"alter_drop_{engine}" part_name = "all_0_0_0" if engine == "ReplicatedMergeTree" else "all_1_1_0" main_node.query( - f"CREATE TABLE alter_drop_part.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_drop_part (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - main_node.query(f"INSERT INTO alter_drop_part.{table} VALUES (123)") + main_node.query(f"INSERT INTO {database}.alter_drop_part VALUES (123)") if engine == "MergeTree": - dummy_node.query(f"INSERT INTO alter_drop_part.{table} VALUES (456)") - main_node.query(f"ALTER TABLE alter_drop_part.{table} DROP PART '{part_name}'") - assert main_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") == "" + dummy_node.query(f"INSERT INTO {database}.alter_drop_part VALUES (456)") + main_node.query(f"ALTER TABLE {database}.alter_drop_part DROP PART '{part_name}'") + assert main_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "" if engine == "ReplicatedMergeTree": # The DROP operation is still replicated at the table engine level - assert dummy_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") == "" + assert ( + dummy_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "" + ) else: assert ( - dummy_node.query(f"SELECT CounterID FROM alter_drop_part.{table}") + dummy_node.query(f"SELECT CounterID FROM {database}.alter_drop_part") == "456\n" ) - main_node.query("DROP DATABASE alter_drop_part SYNC") - dummy_node.query("DROP DATABASE alter_drop_part SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_detach_part(started_cluster, engine): + database = f"alter_detach_part_{engine}" main_node.query( - "CREATE DATABASE alter_detach_part ENGINE = Replicated('/test/alter_detach_part', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_detach_part ENGINE = Replicated('/test/alter_detach_part', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) - table = f"alter_detach_{engine}" part_name = "all_0_0_0" if engine == "ReplicatedMergeTree" else "all_1_1_0" main_node.query( - f"CREATE TABLE alter_detach_part.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_detach (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - main_node.query(f"INSERT INTO alter_detach_part.{table} VALUES (123)") + main_node.query(f"INSERT INTO {database}.alter_detach VALUES (123)") if engine == "MergeTree": - dummy_node.query(f"INSERT INTO alter_detach_part.{table} VALUES (456)") - main_node.query(f"ALTER TABLE alter_detach_part.{table} DETACH PART '{part_name}'") - detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='alter_detach_part' AND table='{table}'" + dummy_node.query(f"INSERT INTO {database}.alter_detach VALUES (456)") + main_node.query(f"ALTER TABLE {database}.alter_detach DETACH PART '{part_name}'") + detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='{database}' AND table='alter_detach'" assert main_node.query(detached_parts_query) == f"{part_name}\n" if engine == "ReplicatedMergeTree": # The detach operation is still replicated at the table engine level assert dummy_node.query(detached_parts_query) == f"{part_name}\n" else: assert dummy_node.query(detached_parts_query) == "" - main_node.query("DROP DATABASE alter_detach_part SYNC") - dummy_node.query("DROP DATABASE alter_detach_part SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_drop_detached_part(started_cluster, engine): + database = f"alter_drop_detached_part_{engine}" main_node.query( - "CREATE DATABASE alter_drop_detached_part ENGINE = Replicated('/test/alter_drop_detached_part', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_drop_detached_part ENGINE = Replicated('/test/alter_drop_detached_part', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) - table = f"alter_drop_detached_{engine}" part_name = "all_0_0_0" if engine == "ReplicatedMergeTree" else "all_1_1_0" main_node.query( - f"CREATE TABLE alter_drop_detached_part.{table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_drop_detached (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - main_node.query(f"INSERT INTO alter_drop_detached_part.{table} VALUES (123)") + main_node.query(f"INSERT INTO {database}.alter_drop_detached VALUES (123)") main_node.query( - f"ALTER TABLE alter_drop_detached_part.{table} DETACH PART '{part_name}'" + f"ALTER TABLE {database}.alter_drop_detached DETACH PART '{part_name}'" ) if engine == "MergeTree": - dummy_node.query(f"INSERT INTO alter_drop_detached_part.{table} VALUES (456)") + dummy_node.query(f"INSERT INTO {database}.alter_drop_detached VALUES (456)") dummy_node.query( - f"ALTER TABLE alter_drop_detached_part.{table} DETACH PART '{part_name}'" + f"ALTER TABLE {database}.alter_drop_detached DETACH PART '{part_name}'" ) main_node.query( - f"ALTER TABLE alter_drop_detached_part.{table} DROP DETACHED PART '{part_name}'" + f"ALTER TABLE {database}.alter_drop_detached DROP DETACHED PART '{part_name}'" ) - detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='alter_drop_detached_part' AND table='{table}'" + detached_parts_query = f"SELECT name FROM system.detached_parts WHERE database='{database}' AND table='alter_drop_detached'" assert main_node.query(detached_parts_query) == "" assert dummy_node.query(detached_parts_query) == f"{part_name}\n" - main_node.query("DROP DATABASE alter_drop_detached_part SYNC") - dummy_node.query("DROP DATABASE alter_drop_detached_part SYNC") + main_node.query(f"DROP DATABASE {database} SYNC") + dummy_node.query(f"DROP DATABASE {database} SYNC") @pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) def test_alter_drop_partition(started_cluster, engine): + database = f"alter_drop_partition_{engine}" main_node.query( - "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/test/alter_drop_partition', 'shard1', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica1');" ) dummy_node.query( - "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/test/alter_drop_partition', 'shard1', 'replica2');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard1', 'replica2');" ) snapshotting_node.query( - "CREATE DATABASE alter_drop_partition ENGINE = Replicated('/test/alter_drop_partition', 'shard2', 'replica1');" + f"CREATE DATABASE {database} ENGINE = Replicated('/test/{database}', 'shard2', 'replica1');" ) - table = f"alter_drop_partition.alter_drop_{engine}" main_node.query( - f"CREATE TABLE {table} (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" + f"CREATE TABLE {database}.alter_drop (CounterID UInt32) ENGINE = {engine} ORDER BY (CounterID)" ) - main_node.query(f"INSERT INTO {table} VALUES (123)") + main_node.query(f"INSERT INTO {database}.alter_drop VALUES (123)") if engine == "MergeTree": - dummy_node.query(f"INSERT INTO {table} VALUES (456)") - snapshotting_node.query(f"INSERT INTO {table} VALUES (789)") + dummy_node.query(f"INSERT INTO {database}.alter_drop VALUES (456)") + snapshotting_node.query(f"INSERT INTO {database}.alter_drop VALUES (789)") main_node.query( - f"ALTER TABLE {table} ON CLUSTER alter_drop_partition DROP PARTITION ID 'all'", + f"ALTER TABLE {database}.alter_drop ON CLUSTER {database} DROP PARTITION ID 'all'", settings={"replication_alter_partitions_sync": 2}, ) assert ( main_node.query( - f"SELECT CounterID FROM clusterAllReplicas('alter_drop_partition', {table})" + f"SELECT CounterID FROM clusterAllReplicas('{database}', {database}.alter_drop)" ) == "" ) - assert dummy_node.query(f"SELECT CounterID FROM {table}") == "" - main_node.query("DROP DATABASE alter_drop_partition") - dummy_node.query("DROP DATABASE alter_drop_partition") - snapshotting_node.query("DROP DATABASE alter_drop_partition") + assert dummy_node.query(f"SELECT CounterID FROM {database}.alter_drop") == "" + main_node.query(f"DROP DATABASE {database}") + dummy_node.query(f"DROP DATABASE {database}") + snapshotting_node.query(f"DROP DATABASE {database}") def test_alter_fetch(started_cluster): @@ -659,7 +672,11 @@ def test_alters_from_different_replicas(started_cluster): def create_some_tables(db): - settings = {"distributed_ddl_task_timeout": 0} + settings = { + "distributed_ddl_task_timeout": 0, + "allow_experimental_object_type": 1, + "allow_suspicious_codecs": 1, + } main_node.query(f"CREATE TABLE {db}.t1 (n int) ENGINE=Memory", settings=settings) dummy_node.query( f"CREATE TABLE {db}.t2 (s String) ENGINE=Memory", settings=settings @@ -677,11 +694,11 @@ def create_some_tables(db): settings=settings, ) dummy_node.query( - f"CREATE TABLE {db}.rmt2 (n int) ENGINE=ReplicatedMergeTree order by n", + f"CREATE TABLE {db}.rmt2 (n int CODEC(ZSTD, ZSTD, ZSTD(12), LZ4HC(12))) ENGINE=ReplicatedMergeTree order by n", settings=settings, ) main_node.query( - f"CREATE TABLE {db}.rmt3 (n int) ENGINE=ReplicatedMergeTree order by n", + f"CREATE TABLE {db}.rmt3 (n int, json Object('json') materialized '') ENGINE=ReplicatedMergeTree order by n", settings=settings, ) dummy_node.query( @@ -855,7 +872,10 @@ def test_recover_staled_replica(started_cluster): ]: assert main_node.query(f"SELECT (*,).1 FROM recover.{table}") == "42\n" for table in ["t2", "rmt1", "rmt2", "rmt4", "d1", "d2", "mt2", "mv1", "mv3"]: - assert dummy_node.query(f"SELECT (*,).1 FROM recover.{table}") == "42\n" + assert ( + dummy_node.query(f"SELECT '{table}', (*,).1 FROM recover.{table}") + == f"{table}\t42\n" + ) for table in ["m1", "mt1"]: assert dummy_node.query(f"SELECT count() FROM recover.{table}") == "0\n" global test_recover_staled_replica_run @@ -1206,7 +1226,7 @@ def test_force_synchronous_settings(started_cluster): def select_func(): dummy_node.query( - "SELECT sleepEachRow(1) FROM test_force_synchronous_settings.t" + "SELECT sleepEachRow(1) FROM test_force_synchronous_settings.t SETTINGS function_sleep_max_microseconds_per_block = 0" ) select_thread = threading.Thread(target=select_func) @@ -1242,7 +1262,7 @@ def test_recover_digest_mismatch(started_cluster): "mv /var/lib/clickhouse/metadata/recover_digest_mismatch/t1.sql /var/lib/clickhouse/metadata/recover_digest_mismatch/m1.sql", "sed --follow-symlinks -i 's/Int32/String/' /var/lib/clickhouse/metadata/recover_digest_mismatch/mv1.sql", "rm -f /var/lib/clickhouse/metadata/recover_digest_mismatch/d1.sql", - # f"rm -rf /var/lib/clickhouse/metadata/recover_digest_mismatch/", # Directory already exists + "rm -rf /var/lib/clickhouse/metadata/recover_digest_mismatch/", # Will trigger "Directory already exists" "rm -rf /var/lib/clickhouse/store", ] diff --git a/tests/integration/test_replicated_merge_tree_compatibility/test.py b/tests/integration/test_replicated_merge_tree_compatibility/test.py index 68f2776e955..c30a0d86c98 100644 --- a/tests/integration/test_replicated_merge_tree_compatibility/test.py +++ b/tests/integration/test_replicated_merge_tree_compatibility/test.py @@ -9,6 +9,7 @@ node1 = cluster.add_instance( tag="20.12.4.5", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -17,6 +18,7 @@ node2 = cluster.add_instance( tag="20.12.4.5", stay_alive=True, with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py b/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py index 05d7bbb7282..25d30eb9c82 100644 --- a/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py +++ b/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py @@ -67,6 +67,8 @@ def optimize_table(): def check_table(): expected = [[1, "str1"], [2, "str2"]] + node1.query("SYSTEM SYNC REPLICA tbl LIGHTWEIGHT") + node2.query("SYSTEM SYNC REPLICA tbl LIGHTWEIGHT") assert node1.query("SELECT * FROM tbl ORDER BY id") == TSV(expected) assert node2.query("SELECT * FROM tbl ORDER BY id") == TSV(expected) assert node1.query("CHECK TABLE tbl") == "1\n" diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml deleted file mode 100644 index bb4aba94e0b..00000000000 --- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml +++ /dev/null @@ -1,93 +0,0 @@ - - - - - hdfs - hdfs://hdfs1:9000/clickhouse1/ - - true - - - hdfs - hdfs://hdfs1:9000/clickhouse1/ - - true - - - hdfs - hdfs://hdfs1:9000/clickhouse2/ - - true - - - - - -
- hdfs1 -
-
-
- - -
- default -
- - hdfs1 - -
- 0.0 -
- - -
- hdfs2 -
- - hdfs1 - -
-
- - -
- hdfs1_again -
- - hdfs1 - -
-
-
-
- - - 1024000 - 1 - true - - - - - - - node1 - 9000 - - - - - node2 - 9000 - - - - - - - test_cluster - 1 - - true -
diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py deleted file mode 100644 index eb3d62eb718..00000000000 --- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py +++ /dev/null @@ -1,322 +0,0 @@ -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/42561 - -pytestmark = pytest.mark.skip - -import logging -from string import Template -import time - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry - -from pyhdfs import HdfsClient - -SHARDS = 2 -FILES_OVERHEAD_PER_TABLE = 1 # format_version.txt -FILES_OVERHEAD_PER_PART_COMPACT = 7 - - -def wait_for_hdfs_objects(cluster, fp, expected, num_tries=30): - fs = HdfsClient(hosts=cluster.hdfs_ip) - while num_tries > 0: - num_hdfs_objects = len(fs.listdir(fp)) - if num_hdfs_objects == expected: - break - num_tries -= 1 - time.sleep(1) - assert len(fs.listdir(fp)) == expected - - -@pytest.fixture(scope="module") -def cluster(): - try: - cluster = ClickHouseCluster(__file__) - cluster.add_instance( - "node1", - main_configs=["configs/config.d/storage_conf.xml"], - macros={"replica": "node1"}, - with_zookeeper=True, - with_hdfs=True, - ) - cluster.add_instance( - "node2", - main_configs=["configs/config.d/storage_conf.xml"], - macros={"replica": "node2"}, - with_zookeeper=True, - with_hdfs=True, - ) - logging.info("Starting cluster...") - cluster.start() - if cluster.instances["node1"].is_debug_build(): - # https://github.com/ClickHouse/ClickHouse/issues/27814 - pytest.skip( - "libhdfs3 calls rand function which does not pass harmful check in debug build" - ) - logging.info("Cluster started") - - fs = HdfsClient(hosts=cluster.hdfs_ip) - fs.mkdirs("/clickhouse1") - fs.mkdirs("/clickhouse2") - logging.info("Created HDFS directory") - - yield cluster - finally: - cluster.shutdown() - - -def test_hdfs_zero_copy_replication_insert(cluster): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - try: - node1.query( - """ - CREATE TABLE hdfs_test ON CLUSTER test_cluster (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hdfs_test', '{replica}') - ORDER BY (dt, id) - SETTINGS storage_policy='hdfs_only' - """ - ) - wait_for_hdfs_objects( - cluster, "/clickhouse1", SHARDS * FILES_OVERHEAD_PER_TABLE - ) - - node1.query("INSERT INTO hdfs_test VALUES (now() - INTERVAL 3 DAY, 10)") - node2.query("SYSTEM SYNC REPLICA hdfs_test", timeout=30) - assert node1.query("SELECT count() FROM hdfs_test FORMAT Values") == "(1)" - assert node2.query("SELECT count() FROM hdfs_test FORMAT Values") == "(1)" - assert ( - node1.query("SELECT id FROM hdfs_test ORDER BY dt FORMAT Values") == "(10)" - ) - assert ( - node2.query("SELECT id FROM hdfs_test ORDER BY dt FORMAT Values") == "(10)" - ) - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hdfs_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hdfs_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - wait_for_hdfs_objects( - cluster, - "/clickhouse1", - SHARDS * FILES_OVERHEAD_PER_TABLE + FILES_OVERHEAD_PER_PART_COMPACT, - ) - finally: - node1.query("DROP TABLE IF EXISTS hdfs_test SYNC") - node2.query("DROP TABLE IF EXISTS hdfs_test SYNC") - - -@pytest.mark.parametrize( - ("storage_policy", "init_objects"), - [("hybrid", 0), ("tiered", 0), ("tiered_copy", FILES_OVERHEAD_PER_TABLE)], -) -def test_hdfs_zero_copy_replication_single_move(cluster, storage_policy, init_objects): - node1 = cluster.instances["node1"] - try: - node1.query( - Template( - """ - CREATE TABLE single_node_move_test (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/single_node_move_test', '{replica}') - ORDER BY (dt, id) - SETTINGS storage_policy='$policy',temporary_directories_lifetime=1 - """ - ).substitute(policy=storage_policy) - ) - wait_for_hdfs_objects(cluster, "/clickhouse1", init_objects) - - node1.query( - "INSERT INTO single_node_move_test VALUES (now() - INTERVAL 3 DAY, 10), (now() - INTERVAL 1 DAY, 11)" - ) - assert ( - node1.query( - "SELECT id FROM single_node_move_test ORDER BY dt FORMAT Values" - ) - == "(10),(11)" - ) - - node1.query( - "ALTER TABLE single_node_move_test MOVE PARTITION ID 'all' TO VOLUME 'external'" - ) - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='single_node_move_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - assert ( - node1.query( - "SELECT id FROM single_node_move_test ORDER BY dt FORMAT Values" - ) - == "(10),(11)" - ) - wait_for_hdfs_objects( - cluster, "/clickhouse1", init_objects + FILES_OVERHEAD_PER_PART_COMPACT - ) - - node1.query( - "ALTER TABLE single_node_move_test MOVE PARTITION ID 'all' TO VOLUME 'main'" - ) - assert ( - node1.query( - "SELECT id FROM single_node_move_test ORDER BY dt FORMAT Values" - ) - == "(10),(11)" - ) - finally: - node1.query("DROP TABLE IF EXISTS single_node_move_test SYNC") - - -@pytest.mark.parametrize( - ("storage_policy", "init_objects"), - [("hybrid", 0), ("tiered", 0), ("tiered_copy", SHARDS * FILES_OVERHEAD_PER_TABLE)], -) -def test_hdfs_zero_copy_replication_move(cluster, storage_policy, init_objects): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - try: - node1.query( - Template( - """ - CREATE TABLE move_test ON CLUSTER test_cluster (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/move_test', '{replica}') - ORDER BY (dt, id) - SETTINGS storage_policy='$policy' - """ - ).substitute(policy=storage_policy) - ) - wait_for_hdfs_objects(cluster, "/clickhouse1", init_objects) - - node1.query( - "INSERT INTO move_test VALUES (now() - INTERVAL 3 DAY, 10), (now() - INTERVAL 1 DAY, 11)" - ) - node2.query("SYSTEM SYNC REPLICA move_test", timeout=30) - - assert ( - node1.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") - == "(10),(11)" - ) - assert ( - node2.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") - == "(10),(11)" - ) - - node1.query( - "ALTER TABLE move_test MOVE PARTITION ID 'all' TO VOLUME 'external'" - ) - wait_for_hdfs_objects( - cluster, "/clickhouse1", init_objects + FILES_OVERHEAD_PER_PART_COMPACT - ) - - node2.query( - "ALTER TABLE move_test MOVE PARTITION ID 'all' TO VOLUME 'external'" - ) - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='move_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='move_test' FORMAT Values" - ) - == "('all','hdfs1')" - ) - assert ( - node1.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") - == "(10),(11)" - ) - assert ( - node2.query("SELECT id FROM move_test ORDER BY dt FORMAT Values") - == "(10),(11)" - ) - wait_for_hdfs_objects( - cluster, "/clickhouse1", init_objects + FILES_OVERHEAD_PER_PART_COMPACT - ) - finally: - node1.query("DROP TABLE IF EXISTS move_test SYNC") - node2.query("DROP TABLE IF EXISTS move_test SYNC") - - -@pytest.mark.parametrize(("storage_policy"), ["hybrid", "tiered", "tiered_copy"]) -def test_hdfs_zero_copy_with_ttl_move(cluster, storage_policy): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - try: - node1.query( - Template( - """ - CREATE TABLE ttl_move_test ON CLUSTER test_cluster (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/ttl_move_test', '{replica}') - ORDER BY (dt, id) - TTL dt + INTERVAL 2 DAY TO VOLUME 'external' - SETTINGS storage_policy='$policy' - """ - ).substitute(policy=storage_policy) - ) - - node1.query("INSERT INTO ttl_move_test VALUES (now() - INTERVAL 3 DAY, 10)") - node1.query("INSERT INTO ttl_move_test VALUES (now() - INTERVAL 1 DAY, 11)") - - node1.query("OPTIMIZE TABLE ttl_move_test FINAL") - node2.query("SYSTEM SYNC REPLICA ttl_move_test", timeout=30) - - assert_eq_with_retry(node1, "SELECT count() FROM ttl_move_test", "2") - assert_eq_with_retry(node2, "SELECT count() FROM ttl_move_test", "2") - assert ( - node1.query("SELECT id FROM ttl_move_test ORDER BY id FORMAT Values") - == "(10),(11)" - ) - assert ( - node2.query("SELECT id FROM ttl_move_test ORDER BY id FORMAT Values") - == "(10),(11)" - ) - finally: - node1.query("DROP TABLE IF EXISTS ttl_move_test SYNC") - node2.query("DROP TABLE IF EXISTS ttl_move_test SYNC") - - -def test_hdfs_zero_copy_with_ttl_delete(cluster): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - try: - node1.query( - """ - CREATE TABLE ttl_delete_test ON CLUSTER test_cluster (dt DateTime, id Int64) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/ttl_delete_test', '{replica}') - ORDER BY (dt, id) - TTL dt + INTERVAL 2 DAY - SETTINGS storage_policy='tiered' - """ - ) - - node1.query("INSERT INTO ttl_delete_test VALUES (now() - INTERVAL 3 DAY, 10)") - node1.query("INSERT INTO ttl_delete_test VALUES (now() - INTERVAL 1 DAY, 11)") - - node1.query("OPTIMIZE TABLE ttl_delete_test FINAL") - node2.query("SYSTEM SYNC REPLICA ttl_delete_test", timeout=30) - - assert_eq_with_retry(node1, "SELECT count() FROM ttl_delete_test", "1") - assert_eq_with_retry(node2, "SELECT count() FROM ttl_delete_test", "1") - - assert ( - node1.query("SELECT id FROM ttl_delete_test ORDER BY id FORMAT Values") - == "(11)" - ) - assert ( - node2.query("SELECT id FROM ttl_delete_test ORDER BY id FORMAT Values") - == "(11)" - ) - finally: - node1.query("DROP TABLE IF EXISTS ttl_delete_test SYNC") - node2.query("DROP TABLE IF EXISTS ttl_delete_test SYNC") diff --git a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml index 00aa03b1a92..829bf16fdfb 100644 --- a/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml +++ b/tests/integration/test_replicated_merge_tree_s3/configs/config.d/storage_conf.xml @@ -35,6 +35,7 @@ 0 0 + 1.0 diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml index 96d59d5633e..f78256bdb26 100644 --- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml +++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml @@ -29,6 +29,7 @@ 0 true + 1.0 diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/__init__.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/config/merge_tree_conf.xml b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/config/merge_tree_conf.xml new file mode 100644 index 00000000000..8ff3bdf9a2f --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/config/merge_tree_conf.xml @@ -0,0 +1,5 @@ + + + 30000 + + diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py new file mode 100644 index 00000000000..67dd03098e9 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry +from multiprocessing.dummy import Pool +import time + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", + main_configs=["config/merge_tree_conf.xml"], + with_zookeeper=True, + stay_alive=True, +) + +node2 = cluster.add_instance( + "node2", + main_configs=["config/merge_tree_conf.xml"], + with_zookeeper=True, + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def test_shutdown_and_wait(start_cluster): + for i, node in enumerate([node1, node2]): + node.query( + f"CREATE TABLE test_table (value UInt64) ENGINE=ReplicatedMergeTree('/test/table', 'r{i}') ORDER BY tuple()" + ) + + node1.query("INSERT INTO test_table VALUES (0)") + node2.query("SYSTEM SYNC REPLICA test_table") + + assert node1.query("SELECT * FROM test_table") == "0\n" + assert node2.query("SELECT * FROM test_table") == "0\n" + + def soft_shutdown(node): + node.stop_clickhouse(kill=False, stop_wait_sec=60) + + p = Pool(50) + + def insert(value): + node1.query(f"INSERT INTO test_table VALUES ({value})") + + with PartitionManager() as pm: + pm.partition_instances(node1, node2) + p.map(insert, range(1, 50)) + + # Start shutdown async + waiter = p.apply_async(soft_shutdown, (node1,)) + # to be sure that shutdown started + time.sleep(5) + + # node 2 partitioned and don't see any data + assert node2.query("SELECT * FROM test_table") == "0\n" + + # Restore network + pm.heal_all() + + # wait for shutdown to finish + waiter.get() + + node2.query("SYSTEM SYNC REPLICA test_table", timeout=5) + + # check second replica has all data + assert node2.query("SELECT sum(value) FROM test_table") == "1225\n" + # and nothing in queue + assert node2.query("SELECT count() FROM system.replication_queue") == "0\n" + + # It can happend that the second replica is superfast + assert node1.contains_in_log( + "Successfully waited all the parts" + ) or node1.contains_in_log("All parts found on replica") diff --git a/tests/integration/test_replicated_mutations/configs/users.xml b/tests/integration/test_replicated_mutations/configs/users.xml new file mode 100644 index 00000000000..b0990ca3a60 --- /dev/null +++ b/tests/integration/test_replicated_mutations/configs/users.xml @@ -0,0 +1,7 @@ + + + + 0 + + + diff --git a/tests/integration/test_replicated_mutations/test.py b/tests/integration/test_replicated_mutations/test.py index 7479f082b06..e20bcf367e3 100644 --- a/tests/integration/test_replicated_mutations/test.py +++ b/tests/integration/test_replicated_mutations/test.py @@ -15,6 +15,7 @@ node2 = cluster.add_instance( "node2", macros={"cluster": "test1"}, main_configs=["configs/merge_tree.xml"], + user_configs=["configs/users.xml"], with_zookeeper=True, ) @@ -22,12 +23,14 @@ node3 = cluster.add_instance( "node3", macros={"cluster": "test2"}, main_configs=["configs/merge_tree_max_parts.xml"], + user_configs=["configs/users.xml"], with_zookeeper=True, ) node4 = cluster.add_instance( "node4", macros={"cluster": "test2"}, main_configs=["configs/merge_tree_max_parts.xml"], + user_configs=["configs/users.xml"], with_zookeeper=True, ) @@ -35,6 +38,7 @@ node5 = cluster.add_instance( "node5", macros={"cluster": "test3"}, main_configs=["configs/merge_tree_max_parts.xml"], + user_configs=["configs/users.xml"], ) all_nodes = [node1, node2, node3, node4, node5] diff --git a/tests/integration/test_replicating_constants/test.py b/tests/integration/test_replicating_constants/test.py index 82cc5e757f1..00781e473c7 100644 --- a/tests/integration/test_replicating_constants/test.py +++ b/tests/integration/test_replicating_constants/test.py @@ -11,6 +11,7 @@ node2 = cluster.add_instance( image="yandex/clickhouse-server", tag="19.1.14", with_installed_binary=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_restore_replica/test.py b/tests/integration/test_restore_replica/test.py index 31c503f6184..3d0baa87c21 100644 --- a/tests/integration/test_restore_replica/test.py +++ b/tests/integration/test_restore_replica/test.py @@ -44,6 +44,18 @@ def fill_table(): check_data(499500, 1000) +# kazoo.delete may throw NotEmptyError on concurrent modifications of the path +def zk_rmr_with_retries(zk, path): + for i in range(1, 10): + try: + zk.delete(path, recursive=True) + return + except Exception as ex: + print(ex) + time.sleep(0.5) + assert False + + @pytest.fixture(scope="module") def start_cluster(): try: @@ -84,7 +96,7 @@ def test_restore_replica_sequential(start_cluster): fill_table() print("Deleting root ZK path metadata") - zk.delete("/clickhouse/tables/test", recursive=True) + zk_rmr_with_retries(zk, "/clickhouse/tables/test") assert zk.exists("/clickhouse/tables/test") is None node_1.query("SYSTEM RESTART REPLICA test") @@ -119,7 +131,7 @@ def test_restore_replica_parallel(start_cluster): fill_table() print("Deleting root ZK path metadata") - zk.delete("/clickhouse/tables/test", recursive=True) + zk_rmr_with_retries(zk, "/clickhouse/tables/test") assert zk.exists("/clickhouse/tables/test") is None node_1.query("SYSTEM RESTART REPLICA test") @@ -147,12 +159,12 @@ def test_restore_replica_alive_replicas(start_cluster): fill_table() print("Deleting replica2 path, trying to restore replica1") - zk.delete("/clickhouse/tables/test/replicas/replica2", recursive=True) + zk_rmr_with_retries(zk, "/clickhouse/tables/test/replicas/replica2") assert zk.exists("/clickhouse/tables/test/replicas/replica2") is None node_1.query_and_get_error("SYSTEM RESTORE REPLICA test") print("Deleting replica1 path, trying to restore replica1") - zk.delete("/clickhouse/tables/test/replicas/replica1", recursive=True) + zk_rmr_with_retries(zk, "/clickhouse/tables/test/replicas/replica1") assert zk.exists("/clickhouse/tables/test/replicas/replica1") is None node_1.query("SYSTEM RESTART REPLICA test") diff --git a/tests/integration/test_reverse_dns_query/configs/listen_host.xml b/tests/integration/test_reverse_dns_query/configs/listen_host.xml index 58ef55cd3f3..9c27c612f63 100644 --- a/tests/integration/test_reverse_dns_query/configs/listen_host.xml +++ b/tests/integration/test_reverse_dns_query/configs/listen_host.xml @@ -1,5 +1,5 @@ - + :: 0.0.0.0 1 - +
diff --git a/tests/integration/test_reverse_dns_query/coredns_config/Corefile b/tests/integration/test_reverse_dns_query/coredns_config/Corefile index 84d297f7cdf..3edf37dafa5 100644 --- a/tests/integration/test_reverse_dns_query/coredns_config/Corefile +++ b/tests/integration/test_reverse_dns_query/coredns_config/Corefile @@ -1,4 +1,8 @@ . { + hosts /example.com { + reload "20ms" + fallthrough + } forward . 127.0.0.11 log } diff --git a/tests/integration/test_reverse_dns_query/coredns_config/example.com b/tests/integration/test_reverse_dns_query/coredns_config/example.com new file mode 100644 index 00000000000..6c6e4cbee2e --- /dev/null +++ b/tests/integration/test_reverse_dns_query/coredns_config/example.com @@ -0,0 +1 @@ +filled in runtime, but needs to exist in order to be volume mapped in docker diff --git a/tests/integration/test_reverse_dns_query/test.py b/tests/integration/test_reverse_dns_query/test.py index c5d4304a5d2..00c3956f74f 100644 --- a/tests/integration/test_reverse_dns_query/test.py +++ b/tests/integration/test_reverse_dns_query/test.py @@ -1,4 +1,5 @@ import pytest +import socket from helpers.cluster import ClickHouseCluster, get_docker_compose_path, run_and_check from time import sleep import os @@ -30,6 +31,28 @@ def started_cluster(): cluster.shutdown() +def check_ptr_record(ip, hostname): + try: + host, aliaslist, ipaddrlist = socket.gethostbyaddr(ip) + if hostname.lower() == host.lower(): + return True + except socket.herror: + pass + return False + + +def setup_dns_server(ip): + domains_string = "test.example.com" + example_file_path = f'{ch_server.env_variables["COREDNS_CONFIG_DIR"]}/example.com' + run_and_check(f"echo '{ip} {domains_string}' > {example_file_path}", shell=True) + + # DNS server takes time to reload the configuration. + for try_num in range(10): + if all(check_ptr_record(ip, host) for host in domains_string.split()): + break + sleep(1) + + def setup_ch_server(dns_server_ip): ch_server.exec_in_container( (["bash", "-c", f"echo 'nameserver {dns_server_ip}' > /etc/resolv.conf"]) @@ -42,9 +65,10 @@ def setup_ch_server(dns_server_ip): def test_reverse_dns_query(started_cluster): dns_server_ip = cluster.get_instance_ip(cluster.coredns_host) - + random_ipv6 = "4ae8:fa0f:ee1d:68c5:0b76:1b79:7ae6:1549" # https://commentpicker.com/ip-address-generator.php + setup_dns_server(random_ipv6) setup_ch_server(dns_server_ip) for _ in range(0, 200): - response = ch_server.query("select reverseDNSQuery('2001:4860:4860::8888')") - assert response == "['dns.google']\n" + response = ch_server.query(f"select reverseDNSQuery('{random_ipv6}')") + assert response == "['test.example.com']\n" diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index 9a352f43eab..0e1bbea9cea 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -287,7 +287,7 @@ def test_introspection(): assert instance.query( "SELECT name, storage from system.roles WHERE name IN ('R1', 'R2') ORDER BY name" - ) == TSV([["R1", "local directory"], ["R2", "local directory"]]) + ) == TSV([["R1", "local_directory"], ["R2", "local_directory"]]) assert instance.query( "SELECT * from system.grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, access_type, database, table, column, is_partial_revoke, grant_option" @@ -301,7 +301,7 @@ def test_introspection(): ) assert instance.query( - "SELECT * from system.role_grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, granted_role_name" + "SELECT user_name, role_name, granted_role_name, granted_role_is_default, with_admin_option from system.role_grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, granted_role_name" ) == TSV([["A", "\\N", "R1", 1, 0], ["B", "\\N", "R2", 1, 1]]) assert instance.query( diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index 1933823f5d2..d88a8f2a243 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -420,7 +420,7 @@ def test_introspection(): "mydb", "local", "5b23c389-7e18-06bf-a6bc-dd1afbbc0a97", - "users.xml", + "users_xml", "a = 1", 0, 0, @@ -433,7 +433,7 @@ def test_introspection(): "mydb", "filtered_table1", "9e8a8f62-4965-2b5e-8599-57c7b99b3549", - "users.xml", + "users_xml", "a = 1", 0, 0, @@ -446,7 +446,7 @@ def test_introspection(): "mydb", "filtered_table2", "cffae79d-b9bf-a2ef-b798-019c18470b25", - "users.xml", + "users_xml", "a + b < 1 or c - d > 5", 0, 0, @@ -459,7 +459,7 @@ def test_introspection(): "mydb", "filtered_table3", "12fc5cef-e3da-3940-ec79-d8be3911f42b", - "users.xml", + "users_xml", "c = 1", 0, 0, @@ -636,7 +636,9 @@ def test_grant_create_row_policy(): assert node.query("SHOW POLICIES") == "" node.query("CREATE USER X") - expected_error = "necessary to have grant CREATE ROW POLICY ON mydb.filtered_table1" + expected_error = ( + "necessary to have the grant CREATE ROW POLICY ON mydb.filtered_table1" + ) assert expected_error in node.query_and_get_error( "CREATE POLICY pA ON mydb.filtered_table1 FOR SELECT USING a1024 1 true + 1.0 diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index 2a4e0eece08..0ca81a27802 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -153,96 +153,6 @@ def test_s3_zero_copy_replication(started_cluster, policy): node2.query("DROP TABLE IF EXISTS s3_test SYNC") -@pytest.mark.skip(reason="Test is flaky (and never was stable)") -def test_s3_zero_copy_on_hybrid_storage(started_cluster): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - - node1.query( - """ - CREATE TABLE hybrid_test ON CLUSTER test_cluster (id UInt32, value String) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/hybrid_test', '{}') - ORDER BY id - SETTINGS storage_policy='hybrid',temporary_directories_lifetime=1 - """.format( - "{replica}" - ) - ) - - node1.query("INSERT INTO hybrid_test VALUES (0,'data'),(1,'data')") - node2.query("SYSTEM SYNC REPLICA hybrid_test", timeout=30) - - assert ( - node1.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - assert ( - node2.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','default')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','default')" - ) - - node1.query("ALTER TABLE hybrid_test MOVE PARTITION ID 'all' TO DISK 's31'") - - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','s31')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','default')" - ) - - # Total objects in S3 - s3_objects = get_large_objects_count(cluster, size=0) - - node2.query("ALTER TABLE hybrid_test MOVE PARTITION ID 'all' TO DISK 's31'") - - assert ( - node1.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','s31')" - ) - assert ( - node2.query( - "SELECT partition_id,disk_name FROM system.parts WHERE table='hybrid_test' FORMAT Values" - ) - == "('all','s31')" - ) - - # Check that after moving partition on node2 no new obects on s3 - wait_for_large_objects_count(cluster, s3_objects, size=0) - - assert ( - node1.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - assert ( - node2.query("SELECT * FROM hybrid_test ORDER BY id FORMAT Values") - == "(0,'data'),(1,'data')" - ) - - node1.query("DROP TABLE IF EXISTS hybrid_test SYNC") - node2.query("DROP TABLE IF EXISTS hybrid_test SYNC") - - def insert_data_time(node, table, number_of_mb, time, start=0): values = ",".join( f"({x},{time})" diff --git a/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml b/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml index e179c848be1..7bb7fa875e4 100644 --- a/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml +++ b/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml @@ -32,6 +32,7 @@ true + 1.0 true diff --git a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py deleted file mode 100644 index 5fbe426074f..00000000000 --- a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python3 -import time - -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/45887 - -pytestmark = pytest.mark.skip - -from helpers.cluster import ClickHouseCluster - - -single_node_cluster = ClickHouseCluster(__file__) -small_node = single_node_cluster.add_instance( - "small_node", main_configs=["configs/s3.xml"], with_minio=True -) - - -@pytest.fixture(scope="module") -def started_single_node_cluster(): - try: - single_node_cluster.start() - - yield single_node_cluster - finally: - single_node_cluster.shutdown() - - -def test_move_and_s3_memory_usage(started_single_node_cluster): - if small_node.is_built_with_sanitizer() or small_node.is_debug_build(): - pytest.skip("Disabled for debug and sanitizers. Too slow.") - - small_node.query( - "CREATE TABLE s3_test_with_ttl (x UInt32, a String codec(NONE), b String codec(NONE), c String codec(NONE), d String codec(NONE), e String codec(NONE)) engine = MergeTree order by x partition by x SETTINGS storage_policy='s3_and_default'" - ) - - for _ in range(10): - small_node.query( - "insert into s3_test_with_ttl select 0, repeat('a', 100), repeat('b', 100), repeat('c', 100), repeat('d', 100), repeat('e', 100) from zeros(400000) settings max_block_size = 8192, max_insert_block_size=10000000, min_insert_block_size_rows=10000000" - ) - - # After this, we should have 5 columns per 10 * 100 * 400000 ~ 400 MB; total ~2G data in partition - small_node.query( - "optimize table s3_test_with_ttl final", - settings={ - "send_logs_level": "error", - "allow_prefetched_read_pool_for_remote_filesystem": 0, - }, - ) - - small_node.query("system flush logs") - # Will take memory usage from metric_log. - # It is easier then specifying total memory limit (insert queries can hit this limit). - small_node.query("truncate table system.metric_log") - - small_node.query( - "alter table s3_test_with_ttl move partition 0 to volume 'external'", - settings={ - "send_logs_level": "error", - "allow_prefetched_read_pool_for_remote_filesystem": 0, - }, - ) - small_node.query("system flush logs") - max_usage = small_node.query( - """ - select max(m.val - am.val * 4096) from - (select toStartOfMinute(event_time) as time, max(CurrentMetric_MemoryTracking) as val from system.metric_log group by time) as m join - (select toStartOfMinute(event_time) as time, min(value) as val from system.asynchronous_metric_log where metric='jemalloc.arenas.all.pdirty' group by time) as am using time;""" - ) - # 3G limit is a big one. However, we can hit it anyway with parallel s3 writes enabled. - # Also actual value can be bigger because of memory drift. - # Increase it a little bit if test fails. - assert int(max_usage) < 3e9 - res = small_node.query( - "select * from system.errors where last_error_message like '%Memory limit%' limit 1", - settings={ - "allow_prefetched_read_pool_for_remote_filesystem": 0, - }, - ) - assert res == "" diff --git a/tests/integration/test_select_access_rights/test_from_system_tables.py b/tests/integration/test_select_access_rights/test_from_system_tables.py index ac938a9694a..3cd6b90109c 100644 --- a/tests/integration/test_select_access_rights/test_from_system_tables.py +++ b/tests/integration/test_select_access_rights/test_from_system_tables.py @@ -44,14 +44,14 @@ def test_system_db(): assert node.query("SELECT count()>0 FROM system.settings", user="another") == "1\n" expected_error = ( - "necessary to have grant SELECT for at least one column on system.users" + "necessary to have the grant SELECT for at least one column on system.users" ) assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.users", user="another" ) expected_error = ( - "necessary to have grant SELECT for at least one column on system.clusters" + "necessary to have the grant SELECT for at least one column on system.clusters" ) assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.clusters", user="another" @@ -72,14 +72,14 @@ def test_system_db(): assert node.query("SELECT count()>0 FROM system.settings", user="sqluser") == "1\n" expected_error = ( - "necessary to have grant SELECT for at least one column on system.users" + "necessary to have the grant SELECT for at least one column on system.users" ) assert expected_error in node.query_and_get_error( "SELECT count()>0 FROM system.users", user="sqluser" ) expected_error = ( - "necessary to have grant SELECT for at least one column on system.clusters" + "necessary to have the grant SELECT for at least one column on system.clusters" ) assert node.query_and_get_error( "SELECT count()>0 FROM system.clusters", user="sqluser" @@ -138,7 +138,7 @@ def test_information_schema(): ) expected_error = ( - "necessary to have grant SELECT(table_name) ON information_schema.tables" + "necessary to have the grant SELECT(table_name) ON information_schema.tables" ) assert expected_error in node.query_and_get_error( "SELECT count() FROM information_schema.tables WHERE table_name='table1'", diff --git a/tests/integration/test_select_access_rights/test_main.py b/tests/integration/test_select_access_rights/test_main.py index 76940cdadb4..eedecc2d30c 100644 --- a/tests/integration/test_select_access_rights/test_main.py +++ b/tests/integration/test_select_access_rights/test_main.py @@ -30,7 +30,7 @@ def test_select_single_column(): select_query = "SELECT a FROM table1" assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -39,7 +39,7 @@ def test_select_single_column(): instance.query("REVOKE SELECT(a) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -51,7 +51,7 @@ def test_select_single_column_with_table_grant(): select_query = "SELECT a FROM table1" assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -60,7 +60,7 @@ def test_select_single_column_with_table_grant(): instance.query("REVOKE SELECT(a) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -72,19 +72,19 @@ def test_select_all_columns(): select_query = "SELECT * FROM table1" assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) instance.query("GRANT SELECT(d) ON default.table1 TO A") assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) instance.query("GRANT SELECT(a) ON default.table1 TO A") assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -99,7 +99,7 @@ def test_select_all_columns_with_table_grant(): select_query = "SELECT * FROM table1" assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -114,7 +114,7 @@ def test_alias(): select_query = "SELECT x, y, x + y AS s FROM table1" assert ( - "it's necessary to have grant SELECT(x, y) ON default.table1" + "it's necessary to have the grant SELECT(x, y) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -129,7 +129,7 @@ def test_alias_columns(): select_query = "SELECT * FROM table1" assert ( - "it's necessary to have grant SELECT(x, y) ON default.table1" + "it's necessary to have the grant SELECT(x, y) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -138,7 +138,7 @@ def test_alias_columns(): select_query = "SELECT s FROM table1" assert ( - "it's necessary to have grant SELECT(s) ON default.table1" + "it's necessary to have the grant SELECT(s) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -156,7 +156,7 @@ def test_materialized_columns(): select_query = "SELECT * FROM table1" assert ( - "it's necessary to have grant SELECT(x, y) ON default.table1" + "it's necessary to have the grant SELECT(x, y) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -165,7 +165,7 @@ def test_materialized_columns(): select_query = "SELECT p FROM table1" assert ( - "it's necessary to have grant SELECT(p) ON default.table1" + "it's necessary to have the grant SELECT(p) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -186,13 +186,13 @@ def test_select_join(): select_query = "SELECT * FROM table1 JOIN table2 USING(d)" assert ( - "it's necessary to have grant SELECT(d, x, y) ON default.table2" + "it's necessary to have the grant SELECT(d, x, y) ON default.table2" in instance.query_and_get_error(select_query, user="A") ) instance.query("GRANT SELECT(d, x, y) ON default.table2 TO A") assert ( - "it's necessary to have grant SELECT(d, a, b) ON default.table1" + "it's necessary to have the grant SELECT(d, a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -201,7 +201,7 @@ def test_select_join(): instance.query("REVOKE SELECT ON default.table2 FROM A") assert ( - "it's necessary to have grant SELECT(d, x, y) ON default.table2" + "it's necessary to have the grant SELECT(d, x, y) ON default.table2" in instance.query_and_get_error(select_query, user="A") ) @@ -216,13 +216,13 @@ def test_select_union(): select_query = "SELECT * FROM table1 UNION ALL SELECT * FROM table2" assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) instance.query("GRANT SELECT(a, b) ON default.table1 TO A") assert ( - "it's necessary to have grant SELECT(a, b) ON default.table2" + "it's necessary to have the grant SELECT(a, b) ON default.table2" in instance.query_and_get_error(select_query, user="A") ) @@ -231,7 +231,7 @@ def test_select_union(): instance.query("REVOKE SELECT ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -243,7 +243,7 @@ def test_select_count(): select_query = "SELECT count() FROM table1" assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -252,7 +252,7 @@ def test_select_count(): instance.query("REVOKE SELECT(x) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -261,7 +261,7 @@ def test_select_count(): instance.query("REVOKE SELECT(y) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -279,7 +279,7 @@ def test_select_where(): select_query = "SELECT a FROM table1 WHERE b = 0" assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -288,7 +288,7 @@ def test_select_where(): instance.query("REVOKE SELECT ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -306,7 +306,7 @@ def test_select_prewhere(): select_query = "SELECT a FROM table1 PREWHERE b = 0" assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -315,7 +315,7 @@ def test_select_prewhere(): instance.query("REVOKE SELECT ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a, b) ON default.table1" + "it's necessary to have the grant SELECT(a, b) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) @@ -334,11 +334,11 @@ def test_select_with_row_policy(): select_query = "SELECT a FROM table1" select_query2 = "SELECT count() FROM table1" assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query2, user="A") ) @@ -348,10 +348,10 @@ def test_select_with_row_policy(): instance.query("REVOKE SELECT(a) ON default.table1 FROM A") assert ( - "it's necessary to have grant SELECT(a) ON default.table1" + "it's necessary to have the grant SELECT(a) ON default.table1" in instance.query_and_get_error(select_query, user="A") ) assert ( - "it's necessary to have grant SELECT for at least one column on default.table1" + "it's necessary to have the grant SELECT for at least one column on default.table1" in instance.query_and_get_error(select_query2, user="A") ) diff --git a/tests/integration/test_session_log/.gitignore b/tests/integration/test_session_log/.gitignore new file mode 100644 index 00000000000..edf565ec632 --- /dev/null +++ b/tests/integration/test_session_log/.gitignore @@ -0,0 +1 @@ +_gen diff --git a/tests/integration/test_session_log/__init__.py b/tests/integration/test_session_log/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_session_log/configs/log.xml b/tests/integration/test_session_log/configs/log.xml new file mode 100644 index 00000000000..7a079b81e69 --- /dev/null +++ b/tests/integration/test_session_log/configs/log.xml @@ -0,0 +1,9 @@ + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + \ No newline at end of file diff --git a/tests/integration/test_session_log/configs/ports.xml b/tests/integration/test_session_log/configs/ports.xml new file mode 100644 index 00000000000..fbaefc16b3a --- /dev/null +++ b/tests/integration/test_session_log/configs/ports.xml @@ -0,0 +1,9 @@ + + 5433 + 9001 + 9100 + + + false + + \ No newline at end of file diff --git a/tests/integration/test_session_log/configs/session_log.xml b/tests/integration/test_session_log/configs/session_log.xml new file mode 100644 index 00000000000..a0e4e3e2216 --- /dev/null +++ b/tests/integration/test_session_log/configs/session_log.xml @@ -0,0 +1,9 @@ + + + system + session_log
+ + toYYYYMM(event_date) + 7500 +
+
diff --git a/tests/integration/test_session_log/configs/users.xml b/tests/integration/test_session_log/configs/users.xml new file mode 100644 index 00000000000..0416dfadc8a --- /dev/null +++ b/tests/integration/test_session_log/configs/users.xml @@ -0,0 +1,23 @@ + + + + 0 + + + + + + + pass + + + pass + + + pass + + + pass + + + \ No newline at end of file diff --git a/tests/integration/test_session_log/protos/clickhouse_grpc.proto b/tests/integration/test_session_log/protos/clickhouse_grpc.proto new file mode 120000 index 00000000000..25d15f11e3b --- /dev/null +++ b/tests/integration/test_session_log/protos/clickhouse_grpc.proto @@ -0,0 +1 @@ +../../../../src/Server/grpc_protos/clickhouse_grpc.proto \ No newline at end of file diff --git a/tests/integration/test_session_log/test.py b/tests/integration/test_session_log/test.py new file mode 100644 index 00000000000..bb7cafa4ee6 --- /dev/null +++ b/tests/integration/test_session_log/test.py @@ -0,0 +1,292 @@ +import os + +import grpc +import pymysql.connections +import pytest +import random +import sys +import threading + +from helpers.cluster import ClickHouseCluster, run_and_check + +POSTGRES_SERVER_PORT = 5433 +MYSQL_SERVER_PORT = 9001 +GRPC_PORT = 9100 +SESSION_LOG_MATCHING_FIELDS = "auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface" + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_ENCODING = "utf-8" + +# Use grpcio-tools to generate *pb2.py files from *.proto. +proto_dir = os.path.join(SCRIPT_DIR, "./protos") +gen_dir = os.path.join(SCRIPT_DIR, "./_gen") +os.makedirs(gen_dir, exist_ok=True) +run_and_check( + f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto", + shell=True, +) + +sys.path.append(gen_dir) + +import clickhouse_grpc_pb2 +import clickhouse_grpc_pb2_grpc + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "node", + main_configs=[ + "configs/ports.xml", + "configs/log.xml", + "configs/session_log.xml", + ], + user_configs=["configs/users.xml"], + # Bug in TSAN reproduces in this test https://github.com/grpc/grpc/issues/29550#issuecomment-1188085387 + env_variables={ + "TSAN_OPTIONS": "report_atomic_races=0 " + os.getenv("TSAN_OPTIONS", default="") + }, + with_postgres=True, +) + + +def grpc_get_url(): + return f"{instance.ip_address}:{GRPC_PORT}" + + +def grpc_create_insecure_channel(): + channel = grpc.insecure_channel(grpc_get_url()) + grpc.channel_ready_future(channel).result(timeout=2) + return channel + + +session_id_counter = 0 + + +def next_session_id(): + global session_id_counter + session_id = session_id_counter + session_id_counter += 1 + return str(session_id) + + +def grpc_query(query, user_, pass_, raise_exception): + try: + query_info = clickhouse_grpc_pb2.QueryInfo( + query=query, + session_id=next_session_id(), + user_name=user_, + password=pass_, + ) + channel = grpc_create_insecure_channel() + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(channel) + result = stub.ExecuteQuery(query_info) + if result and result.HasField("exception"): + raise Exception(result.exception.display_text) + + return result.output.decode(DEFAULT_ENCODING) + except Exception: + assert raise_exception + + +def postgres_query(query, user_, pass_, raise_exception): + try: + connection_string = f"host={instance.hostname} port={POSTGRES_SERVER_PORT} dbname=default user={user_} password={pass_}" + cluster.exec_in_container( + cluster.postgres_id, + [ + "/usr/bin/psql", + connection_string, + "--no-align", + "--field-separator=' '", + "-c", + query, + ], + shell=True, + ) + except Exception: + assert raise_exception + + +def mysql_query(query, user_, pass_, raise_exception): + try: + client = pymysql.connections.Connection( + host=instance.ip_address, + user=user_, + password=pass_, + database="default", + port=MYSQL_SERVER_PORT, + ) + cursor = client.cursor(pymysql.cursors.DictCursor) + if raise_exception: + with pytest.raises(Exception): + cursor.execute(query) + else: + cursor.execute(query) + cursor.fetchall() + except Exception: + assert raise_exception + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_grpc_session(started_cluster): + grpc_query("SELECT 1", "grpc_user", "pass", False) + grpc_query("SELECT 2", "grpc_user", "wrong_pass", True) + grpc_query("SELECT 3", "wrong_grpc_user", "pass", True) + + instance.query("SYSTEM FLUSH LOGS") + login_success_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginSuccess'" + ) + assert login_success_records == "grpc_user\t1\t1\n" + logout_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'Logout'" + ) + assert logout_records == "grpc_user\t1\t1\n" + login_failure_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginFailure'" + ) + assert login_failure_records == "grpc_user\t1\t1\n" + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "1\n" + + +def test_mysql_session(started_cluster): + mysql_query("SELECT 1", "mysql_user", "pass", False) + mysql_query("SELECT 2", "mysql_user", "wrong_pass", True) + mysql_query("SELECT 3", "wrong_mysql_user", "pass", True) + + instance.query("SYSTEM FLUSH LOGS") + login_success_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginSuccess'" + ) + assert login_success_records == "mysql_user\t1\t1\n" + logout_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'Logout'" + ) + assert logout_records == "mysql_user\t1\t1\n" + login_failure_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginFailure'" + ) + assert login_failure_records == "mysql_user\t1\t1\n" + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "1\n" + + +def test_postgres_session(started_cluster): + postgres_query("SELECT 1", "postgres_user", "pass", False) + postgres_query("SELECT 2", "postgres_user", "wrong_pass", True) + postgres_query("SELECT 3", "wrong_postgres_user", "pass", True) + + instance.query("SYSTEM FLUSH LOGS") + login_success_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginSuccess'" + ) + assert login_success_records == "postgres_user\t1\t1\n" + logout_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'Logout'" + ) + assert logout_records == "postgres_user\t1\t1\n" + login_failure_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginFailure'" + ) + assert login_failure_records == "postgres_user\t1\t1\n" + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "1\n" + + +def test_parallel_sessions(started_cluster): + thread_list = [] + for _ in range(10): + # Sleep time does not significantly matter here, + # test should pass even without sleeping. + for function in [postgres_query, grpc_query, mysql_query]: + thread = threading.Thread( + target=function, + args=( + f"SELECT sleep({random.uniform(0.03, 0.04)})", + "parallel_user", + "pass", + False, + ), + ) + thread.start() + thread_list.append(thread) + thread = threading.Thread( + target=function, + args=( + f"SELECT sleep({random.uniform(0.03, 0.04)})", + "parallel_user", + "wrong_pass", + True, + ), + ) + thread.start() + thread_list.append(thread) + thread = threading.Thread( + target=function, + args=( + f"SELECT sleep({random.uniform(0.03, 0.04)})", + "wrong_parallel_user", + "pass", + True, + ), + ) + thread.start() + thread_list.append(thread) + + for thread in thread_list: + thread.join() + + instance.query("SYSTEM FLUSH LOGS") + port_0_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user'" + ) + assert port_0_sessions == "90\n" + + port_0_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_port = 0" + ) + assert port_0_sessions == "0\n" + + address_0_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_address = toIPv6('::')" + ) + assert address_0_sessions == "0\n" + + grpc_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'gRPC'" + ) + assert grpc_sessions == "30\n" + + mysql_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'MySQL'" + ) + assert mysql_sessions == "30\n" + + postgres_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'PostgreSQL'" + ) + assert postgres_sessions == "30\n" + + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "30\n" + + logout_failure_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginFailure'" + ) + assert logout_failure_sessions == "30\n" diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index 13acc3f1042..5e40b534cee 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -88,7 +88,7 @@ def test_smoke(): ) ) assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 1, 0, "['robin']", "[]"] + ["xyz", "local_directory", 1, 0, "['robin']", "[]"] ] assert system_settings_profile_elements(profile_name="xyz") == [ [ @@ -120,7 +120,7 @@ def test_smoke(): instance.query("SET max_memory_usage = 80000000", user="robin") instance.query("SET max_memory_usage = 120000000", user="robin") assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 1, 0, "[]", "[]"] + ["xyz", "local_directory", 1, 0, "[]", "[]"] ] assert system_settings_profile_elements(user_name="robin") == [] @@ -201,7 +201,7 @@ def test_settings_from_granted_role(): ) ) assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 2, 0, "[]", "[]"] + ["xyz", "local_directory", 2, 0, "[]", "[]"] ] assert system_settings_profile_elements(profile_name="xyz") == [ [ @@ -276,7 +276,7 @@ def test_settings_from_granted_role(): ) ) assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 2, 0, "['worker']", "[]"] + ["xyz", "local_directory", 2, 0, "['worker']", "[]"] ] instance.query("ALTER SETTINGS PROFILE xyz TO NONE") @@ -293,7 +293,7 @@ def test_settings_from_granted_role(): ) instance.query("SET max_memory_usage = 120000000", user="robin") assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 2, 0, "[]", "[]"] + ["xyz", "local_directory", 2, 0, "[]", "[]"] ] @@ -323,7 +323,7 @@ def test_inheritance(): ) assert system_settings_profile("xyz") == [ - ["xyz", "local directory", 1, 0, "[]", "[]"] + ["xyz", "local_directory", 1, 0, "[]", "[]"] ] assert system_settings_profile_elements(profile_name="xyz") == [ [ @@ -340,7 +340,7 @@ def test_inheritance(): ] ] assert system_settings_profile("alpha") == [ - ["alpha", "local directory", 1, 0, "['robin']", "[]"] + ["alpha", "local_directory", 1, 0, "['robin']", "[]"] ] assert system_settings_profile_elements(profile_name="alpha") == [ ["alpha", "\\N", "\\N", 0, "\\N", "\\N", "\\N", "\\N", "\\N", "xyz"] @@ -589,10 +589,10 @@ def test_function_current_profiles(): def test_allow_ddl(): - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "CREATE TABLE tbl(a Int32) ENGINE=Log", user="robin" ) - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "GRANT CREATE ON tbl TO robin", user="robin" ) assert "DDL queries are prohibited" in instance.query_and_get_error( @@ -615,10 +615,10 @@ def test_allow_introspection(): assert "Introspection functions are disabled" in instance.query_and_get_error( "SELECT demangle('a')" ) - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "SELECT demangle('a')", user="robin" ) - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "SELECT demangle('a')", user="robin", settings={"allow_introspection_functions": 1}, @@ -659,7 +659,7 @@ def test_allow_introspection(): "REVOKE demangle ON *.* FROM robin", settings={"allow_introspection_functions": 1}, ) - assert "it's necessary to have grant" in instance.query_and_get_error( + assert "it's necessary to have the grant" in instance.query_and_get_error( "SELECT demangle('a')", user="robin" ) diff --git a/tests/integration/test_shutdown_wait_unfinished_queries/configs/users.xml b/tests/integration/test_shutdown_wait_unfinished_queries/configs/users.xml new file mode 100644 index 00000000000..b0990ca3a60 --- /dev/null +++ b/tests/integration/test_shutdown_wait_unfinished_queries/configs/users.xml @@ -0,0 +1,7 @@ + + + + 0 + + + diff --git a/tests/integration/test_shutdown_wait_unfinished_queries/test.py b/tests/integration/test_shutdown_wait_unfinished_queries/test.py index ae0710149de..074667fc92f 100644 --- a/tests/integration/test_shutdown_wait_unfinished_queries/test.py +++ b/tests/integration/test_shutdown_wait_unfinished_queries/test.py @@ -6,10 +6,16 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node_wait_queries = cluster.add_instance( - "node_wait_queries", main_configs=["configs/config_wait.xml"], stay_alive=True + "node_wait_queries", + main_configs=["configs/config_wait.xml"], + user_configs=["configs/users.xml"], + stay_alive=True, ) node_kill_queries = cluster.add_instance( - "node_kill_queries", main_configs=["configs/config_kill.xml"], stay_alive=True + "node_kill_queries", + main_configs=["configs/config_kill.xml"], + user_configs=["configs/users.xml"], + stay_alive=True, ) global result diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index ff2de7491e1..d1ae39ca378 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -208,7 +208,9 @@ def test_https_wrong_cert(): with pytest.raises(Exception) as err: execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning(f"Failed attempt with wrong cert, err: {err_str}") continue @@ -314,7 +316,9 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: peter, err: {err_str}" @@ -334,7 +338,9 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: jane, err: {err_str}" diff --git a/tests/integration/test_storage_azure_blob_storage/configs/cluster.xml b/tests/integration/test_storage_azure_blob_storage/configs/cluster.xml new file mode 100644 index 00000000000..ffa4673c9ee --- /dev/null +++ b/tests/integration/test_storage_azure_blob_storage/configs/cluster.xml @@ -0,0 +1,39 @@ + + + + + + node_0 + 9000 + + + node_1 + 9000 + + + node_2 + 9000 + + + + + + + + node_0 + 9000 + + + + + node_1 + 19000 + + + + + + + simple_cluster + + \ No newline at end of file diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 21f57a67495..37cdc9b609b 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -35,14 +35,19 @@ def cluster(): cluster.shutdown() -def azure_query(node, query, try_num=10, settings={}): +def azure_query(node, query, expect_error="false", try_num=10, settings={}): for i in range(try_num): try: - return node.query(query, settings=settings) + if expect_error == "true": + return node.query_and_get_error(query, settings=settings) + else: + return node.query(query, settings=settings) except Exception as ex: retriable_errors = [ "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", "DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected", + "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", + "DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read", ] retry = False for error in retriable_errors: @@ -611,3 +616,99 @@ def test_filter_using_file(cluster): query = f"select count(*) from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_partition_tf_*.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') WHERE _file='test_partition_tf_3.csv'" assert azure_query(node, query) == "1\n" + + +def test_read_subcolumns(cluster): + node = cluster.instances["node"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)", + ) + + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)", + ) + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.tsv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\tcont/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\tcont/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "0\tcont/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', 'test_subcolumns.jsonl', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert res == "42\tcont/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + + +def test_read_from_not_existing_container(cluster): + node = cluster.instances["node"] + query = f"select * from azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont_not_exists', 'test_table.csv', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')" + expected_err_msg = "container does not exist" + assert expected_err_msg in azure_query(node, query, expect_error="true") + + +def test_function_signatures(cluster): + node = cluster.instances["node"] + connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1;" + storage_account_url = "http://azurite1:10000/devstoreaccount1" + account_name = "devstoreaccount1" + account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32') VALUES (1),(2),(3)", + ) + + # " - connection_string, container_name, blobpath\n" + query_1 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv')" + assert azure_query(node, query_1) == "1\n2\n3\n" + + # " - connection_string, container_name, blobpath, structure \n" + query_2 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'column1 UInt32')" + assert azure_query(node, query_2) == "1\n2\n3\n" + + # " - connection_string, container_name, blobpath, format \n" + query_3 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV')" + assert azure_query(node, query_3) == "1\n2\n3\n" + + # " - connection_string, container_name, blobpath, format, compression \n" + query_4 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV', 'auto')" + assert azure_query(node, query_4) == "1\n2\n3\n" + + # " - connection_string, container_name, blobpath, format, compression, structure \n" + query_5 = f"select * from azureBlobStorage('{connection_string}', 'cont', 'test_signature.csv', 'CSV', 'auto', 'column1 UInt32')" + assert azure_query(node, query_5) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key\n" + query_6 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}')" + assert azure_query(node, query_6) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n" + query_7 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'column1 UInt32')" + assert azure_query(node, query_7) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key, format\n" + query_8 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV')" + assert azure_query(node, query_8) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n" + query_9 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto')" + assert azure_query(node, query_9) == "1\n2\n3\n" + + # " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n" + query_10 = f"select * from azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32')" + assert azure_query(node, query_10) == "1\n2\n3\n" diff --git a/tests/integration/test_storage_azure_blob_storage/test_cluster.py b/tests/integration/test_storage_azure_blob_storage/test_cluster.py new file mode 100644 index 00000000000..454f1cdf294 --- /dev/null +++ b/tests/integration/test_storage_azure_blob_storage/test_cluster.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 + +import gzip +import json +import logging +import os +import io +import random +import threading +import time + +from azure.storage.blob import BlobServiceClient +import helpers.client +import pytest +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +from helpers.test_tools import TSV +from helpers.network import PartitionManager +from helpers.mock_servers import start_mock_servers +from helpers.test_tools import exec_query_with_retry +from test_storage_azure_blob_storage.test import azure_query + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node_0", + main_configs=["configs/named_collections.xml", "configs/cluster.xml"], + user_configs=["configs/disable_profilers.xml", "configs/users.xml"], + with_azurite=True, + ) + cluster.add_instance( + "node_1", + main_configs=["configs/named_collections.xml", "configs/cluster.xml"], + user_configs=["configs/disable_profilers.xml", "configs/users.xml"], + with_azurite=True, + ) + cluster.add_instance( + "node_2", + main_configs=["configs/named_collections.xml", "configs/cluster.xml"], + user_configs=["configs/disable_profilers.xml", "configs/users.xml"], + with_azurite=True, + ) + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def get_azure_file_content(filename): + container_name = "cont" + connection_string = "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;" + blob_service_client = BlobServiceClient.from_connection_string(connection_string) + container_client = blob_service_client.get_container_client(container_name) + blob_client = container_client.get_blob_client(filename) + download_stream = blob_client.download_blob() + return download_stream.readall().decode("utf-8") + + +def test_select_all(cluster): + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', " + "'auto', 'key UInt64, data String') VALUES (1, 'a'), (2, 'b')", + ) + print(get_azure_file_content("test_cluster_select_all.csv")) + + pure_azure = azure_query( + node, + """ + SELECT * from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto')""", + ) + print(pure_azure) + distributed_azure = azure_query( + node, + """ + SELECT * from azureBlobStorageCluster( + 'simple_cluster', 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto')""", + ) + print(distributed_azure) + assert TSV(pure_azure) == TSV(distributed_azure) + + +def test_count(cluster): + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', " + "'auto', 'key UInt64') VALUES (1), (2)", + ) + print(get_azure_file_content("test_cluster_count.csv")) + + pure_azure = azure_query( + node, + """ + SELECT count(*) from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto', 'key UInt64')""", + ) + print(pure_azure) + distributed_azure = azure_query( + node, + """ + SELECT count(*) from azureBlobStorageCluster( + 'simple_cluster', 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto', 'key UInt64')""", + ) + print(distributed_azure) + assert TSV(pure_azure) == TSV(distributed_azure) + + +def test_union_all(cluster): + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', " + "'auto', 'a Int32, b String') VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')", + ) + + pure_azure = azure_query( + node, + """ + SELECT * FROM + ( + SELECT * from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', + 'auto', 'a Int32, b String') + UNION ALL + SELECT * from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', + 'auto', 'a Int32, b String') + ) + ORDER BY (a) + """, + ) + azure_distributed = azure_query( + node, + """ + SELECT * FROM + ( + SELECT * from azureBlobStorageCluster( + 'simple_cluster', + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', + 'auto', 'a Int32, b String') + UNION ALL + SELECT * from azureBlobStorageCluster( + 'simple_cluster', + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_parquet_union_all', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', + 'auto', 'a Int32, b String') + ) + ORDER BY (a) + """, + ) + + assert TSV(pure_azure) == TSV(azure_distributed) + + +def test_skip_unavailable_shards(cluster): + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " + "'auto', 'a UInt64') VALUES (1), (2)", + ) + result = azure_query( + node, + """ + SELECT count(*) from azureBlobStorageCluster( + 'cluster_non_existent_port', + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') + SETTINGS skip_unavailable_shards = 1 + """, + ) + + assert result == "2\n" + + +def test_unset_skip_unavailable_shards(cluster): + # Although skip_unavailable_shards is not set, cluster table functions should always skip unavailable shards. + node = cluster.instances["node_0"] + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_unset_skip_unavailable.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " + "'auto', 'a UInt64') VALUES (1), (2)", + ) + result = azure_query( + node, + """ + SELECT count(*) from azureBlobStorageCluster( + 'cluster_non_existent_port', + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') + """, + ) + + assert result == "2\n" + + +def test_cluster_with_named_collection(cluster): + node = cluster.instances["node_0"] + + azure_query( + node, + "INSERT INTO TABLE FUNCTION azureBlobStorage(" + "'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1', " + "'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', " + "'auto', 'a UInt64') VALUES (1), (2)", + ) + + pure_azure = azure_query( + node, + """ + SELECT * from azureBlobStorage( + 'http://azurite1:10000/devstoreaccount1', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==') + """, + ) + + azure_cluster = azure_query( + node, + """ + SELECT * from azureBlobStorageCluster( + 'simple_cluster', azure_conf2, container='cont', blob_path='test_cluster_with_named_collection.csv') + """, + ) + + assert TSV(pure_azure) == TSV(azure_cluster) + + +def test_partition_parallel_readig_withcluster(cluster): + node = cluster.instances["node_0"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + partition_by = "column3" + values = "(1, 2, 3), (3, 2, 1), (78, 43, 45)" + filename = "test_tf_{_partition_id}.csv" + + azure_query( + node, + f"INSERT INTO TABLE FUNCTION azureBlobStorage('http://azurite1:10000/devstoreaccount1', 'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values}", + ) + + assert "1,2,3\n" == get_azure_file_content("test_tf_3.csv") + assert "3,2,1\n" == get_azure_file_content("test_tf_1.csv") + assert "78,43,45\n" == get_azure_file_content("test_tf_45.csv") + + azure_cluster = azure_query( + node, + """ + SELECT count(*) from azureBlobStorageCluster( + 'simple_cluster', + azure_conf2, container='cont', blob_path='test_tf_*.csv', format='CSV', compression='auto', structure='column1 UInt32, column2 UInt32, column3 UInt32') + """, + ) + + assert azure_cluster == "3\n" diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 5ac1d3bea6f..0824a7b5f0f 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -85,6 +85,32 @@ def test_read_write_storage_with_globs(started_cluster): assert "in readonly mode" in str(ex) +def test_storage_with_multidirectory_glob(started_cluster): + hdfs_api = started_cluster.hdfs_api + for i in ["1", "2"]: + hdfs_api.write_data( + f"/multiglob/p{i}/path{i}/postfix/data{i}", f"File{i}\t{i}{i}\n" + ) + assert ( + hdfs_api.read_data(f"/multiglob/p{i}/path{i}/postfix/data{i}") + == f"File{i}\t{i}{i}\n" + ) + + r = node1.query( + "SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p1/path1,p2/path2}/postfix/data{1,2}', TSV)" + ) + assert (r == f"File1\t11\nFile2\t22\n") or (r == f"File2\t22\nFile1\t11\n") + + try: + node1.query( + "SELECT * FROM hdfs('hdfs://hdfs1:9000/multiglob/{p4/path1,p2/path3}/postfix/data{1,2}.nonexist', TSV)" + ) + assert False, "Exception have to be thrown" + except Exception as ex: + print(ex) + assert "no files" in str(ex) + + def test_read_write_table(started_cluster): hdfs_api = started_cluster.hdfs_api @@ -866,6 +892,54 @@ def test_skip_empty_files(started_cluster): assert int(res) == 0 +def test_read_subcolumns(started_cluster): + node = started_cluster.instances["node1"] + + node.query( + f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + node.query( + f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert ( + res + == "2\thdfs://hdfs1:9000/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + ) + + res = node.query( + f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert ( + res + == "2\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + ) + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert ( + res + == "0\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + ) + + res = node.query( + f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert ( + res + == "42\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + ) + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_kafka/configs/users.xml b/tests/integration/test_storage_kafka/configs/users.xml index 3168de649f8..d13651d5f92 100644 --- a/tests/integration/test_storage_kafka/configs/users.xml +++ b/tests/integration/test_storage_kafka/configs/users.xml @@ -4,6 +4,7 @@ 1 0 + 0 diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 9a6d3e0513c..28919cc685e 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -444,6 +444,34 @@ def test_kafka_settings_new_syntax(kafka_cluster): assert members[0]["client_id"] == "instance test 1234" +def test_kafka_settings_predefined_macros(kafka_cluster): + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = '{kafka_broker}:19092', + kafka_topic_list = '{database}_{table}_topic', + kafka_group_name = '{database}_{table}_group', + kafka_format = '{kafka_format_json_each_row}', + kafka_row_delimiter = '\\n', + kafka_commit_on_select = 1, + kafka_client_id = '{database}_{table} test 1234', + kafka_skip_broken_messages = 1; + """ + ) + + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, "test_kafka_topic", messages) + + result = instance.query("SELECT * FROM test.kafka", ignore_error=True) + kafka_check_result(result, True) + + members = describe_consumer_group(kafka_cluster, "test_kafka_group") + assert members[0]["client_id"] == "test_kafka test 1234" + + def test_kafka_json_as_string(kafka_cluster): kafka_produce( kafka_cluster, @@ -762,7 +790,7 @@ def test_kafka_formats(kafka_cluster): ), ], "extra_settings": ", format_avro_schema_registry_url='http://{}:{}'".format( - kafka_cluster.schema_registry_host, 8081 + kafka_cluster.schema_registry_host, kafka_cluster.schema_registry_port ), "supports_empty_value": True, }, @@ -843,24 +871,7 @@ def test_kafka_formats(kafka_cluster): extra_settings=format_opts.get("extra_settings") or "", ) ) - - instance.wait_for_log_line( - "kafka.*Committed offset [0-9]+.*format_tests_", - repetitions=len(all_formats.keys()), - look_behind_lines=12000, - ) - - for format_name, format_opts in list(all_formats.items()): - logging.debug(("Checking {}".format(format_name))) - topic_name = f"format_tests_{format_name}" - # shift offsets by 1 if format supports empty value - offsets = ( - [1, 2, 3] if format_opts.get("supports_empty_value", False) else [0, 1, 2] - ) - result = instance.query( - "SELECT * FROM test.kafka_{format_name}_mv;".format(format_name=format_name) - ) - expected = """\ + raw_expected = """\ 0 0 AM 0.5 1 {topic_name} 0 {offset_0} 1 0 AM 0.5 1 {topic_name} 0 {offset_1} 2 0 AM 0.5 1 {topic_name} 0 {offset_1} @@ -878,7 +889,27 @@ def test_kafka_formats(kafka_cluster): 14 0 AM 0.5 1 {topic_name} 0 {offset_1} 15 0 AM 0.5 1 {topic_name} 0 {offset_1} 0 0 AM 0.5 1 {topic_name} 0 {offset_2} -""".format( +""" + + expected_rows_count = raw_expected.count("\n") + instance.query_with_retry( + f"SELECT * FROM test.kafka_{list(all_formats.keys())[-1]}_mv;", + retry_count=30, + sleep_time=1, + check_callback=lambda res: res.count("\n") == expected_rows_count, + ) + + for format_name, format_opts in list(all_formats.items()): + logging.debug(("Checking {}".format(format_name))) + topic_name = f"format_tests_{format_name}" + # shift offsets by 1 if format supports empty value + offsets = ( + [1, 2, 3] if format_opts.get("supports_empty_value", False) else [0, 1, 2] + ) + result = instance.query( + "SELECT * FROM test.kafka_{format_name}_mv;".format(format_name=format_name) + ) + expected = raw_expected.format( topic_name=topic_name, offset_0=offsets[0], offset_1=offsets[1], @@ -3755,19 +3786,7 @@ def test_kafka_formats_with_broken_message(kafka_cluster): ) ) - for format_name, format_opts in list(all_formats.items()): - logging.debug("Checking {format_name}") - topic_name = f"{topic_name_prefix}{format_name}" - # shift offsets by 1 if format supports empty value - offsets = ( - [1, 2, 3] if format_opts.get("supports_empty_value", False) else [0, 1, 2] - ) - result = instance.query( - "SELECT * FROM test.kafka_data_{format_name}_mv;".format( - format_name=format_name - ) - ) - expected = """\ + raw_expected = """\ 0 0 AM 0.5 1 {topic_name} 0 {offset_0} 1 0 AM 0.5 1 {topic_name} 0 {offset_1} 2 0 AM 0.5 1 {topic_name} 0 {offset_1} @@ -3785,7 +3804,29 @@ def test_kafka_formats_with_broken_message(kafka_cluster): 14 0 AM 0.5 1 {topic_name} 0 {offset_1} 15 0 AM 0.5 1 {topic_name} 0 {offset_1} 0 0 AM 0.5 1 {topic_name} 0 {offset_2} -""".format( +""" + + expected_rows_count = raw_expected.count("\n") + instance.query_with_retry( + f"SELECT * FROM test.kafka_data_{list(all_formats.keys())[-1]}_mv;", + retry_count=30, + sleep_time=1, + check_callback=lambda res: res.count("\n") == expected_rows_count, + ) + + for format_name, format_opts in list(all_formats.items()): + logging.debug(f"Checking {format_name}") + topic_name = f"{topic_name_prefix}{format_name}" + # shift offsets by 1 if format supports empty value + offsets = ( + [1, 2, 3] if format_opts.get("supports_empty_value", False) else [0, 1, 2] + ) + result = instance.query( + "SELECT * FROM test.kafka_data_{format_name}_mv;".format( + format_name=format_name + ) + ) + expected = raw_expected.format( topic_name=topic_name, offset_0=offsets[0], offset_1=offsets[1], @@ -4339,7 +4380,7 @@ def test_row_based_formats(kafka_cluster): f""" DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.kafka; - + CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', @@ -4347,10 +4388,10 @@ def test_row_based_formats(kafka_cluster): kafka_group_name = '{format_name}', kafka_format = '{format_name}', kafka_max_rows_per_message = 5; - + CREATE MATERIALIZED VIEW test.view Engine=Log AS SELECT key, value FROM test.kafka; - + INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); """ ) @@ -4459,17 +4500,17 @@ def test_block_based_formats_2(kafka_cluster): f""" DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.kafka; - + CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', kafka_topic_list = '{format_name}', kafka_group_name = '{format_name}', kafka_format = '{format_name}'; - + CREATE MATERIALIZED VIEW test.view Engine=Log AS SELECT key, value FROM test.kafka; - + INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0; """ ) diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py index 13f11852945..3957afe8b29 100644 --- a/tests/integration/test_storage_mongodb/test.py +++ b/tests/integration/test_storage_mongodb/test.py @@ -245,6 +245,12 @@ def test_arrays(started_cluster): == "[]\n" ) + # Test INSERT SELECT + node.query("INSERT INTO arrays_mongo_table SELECT * FROM arrays_mongo_table") + + assert node.query("SELECT COUNT() FROM arrays_mongo_table") == "200\n" + assert node.query("SELECT COUNT(DISTINCT *) FROM arrays_mongo_table") == "100\n" + node.query("DROP TABLE arrays_mongo_table") arrays_mongo_table.drop() diff --git a/tests/integration/test_storage_nats/clickhouse_path/format_schemas/nats.proto b/tests/integration/test_storage_nats/clickhouse_path/format_schemas/nats.proto deleted file mode 100755 index 090ed917cdd..00000000000 --- a/tests/integration/test_storage_nats/clickhouse_path/format_schemas/nats.proto +++ /dev/null @@ -1,6 +0,0 @@ -syntax = "proto3"; - - message ProtoKeyValue { - uint64 key = 1; - string value = 2; - } diff --git a/tests/integration/test_storage_nats/configs/macros.xml b/tests/integration/test_storage_nats/configs/macros.xml deleted file mode 100644 index 4aa547e049e..00000000000 --- a/tests/integration/test_storage_nats/configs/macros.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - nats1:4444 - macro - JSONEachRow - - diff --git a/tests/integration/test_storage_nats/configs/named_collection.xml b/tests/integration/test_storage_nats/configs/named_collection.xml deleted file mode 100644 index 15817f321f0..00000000000 --- a/tests/integration/test_storage_nats/configs/named_collection.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - nats1:4444 - named - JSONEachRow - 111 - 12 - click - house - - - diff --git a/tests/integration/test_storage_nats/configs/nats.xml b/tests/integration/test_storage_nats/configs/nats.xml deleted file mode 100644 index 0a8be9122ad..00000000000 --- a/tests/integration/test_storage_nats/configs/nats.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - click - house - - diff --git a/tests/integration/test_storage_nats/nats_certs.sh b/tests/integration/test_storage_nats/nats_certs.sh deleted file mode 100755 index 689221c39e4..00000000000 --- a/tests/integration/test_storage_nats/nats_certs.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -set -euxo pipefail - -mkdir -p "${NATS_CERT_DIR}/ca" -mkdir -p "${NATS_CERT_DIR}/nats" -openssl req -newkey rsa:4096 -x509 -days 365 -nodes -batch -keyout "${NATS_CERT_DIR}/ca/ca-key.pem" -out "${NATS_CERT_DIR}/ca/ca-cert.pem" -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=ca" -openssl req -newkey rsa:4096 -nodes -batch -keyout "${NATS_CERT_DIR}/nats/server-key.pem" -out "${NATS_CERT_DIR}/nats/server-req.pem" -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=server" -openssl x509 -req -days 365 -in "${NATS_CERT_DIR}/nats/server-req.pem" -CA "${NATS_CERT_DIR}/ca/ca-cert.pem" -CAkey "${NATS_CERT_DIR}/ca/ca-key.pem" -CAcreateserial -out "${NATS_CERT_DIR}/nats/server-cert.pem" -extfile <( -cat <<-EOF -subjectAltName = DNS:localhost, DNS:nats1 -EOF -) -rm -f "${NATS_CERT_DIR}/nats/server-req.pem" diff --git a/tests/integration/test_storage_nats/nats_pb2.py b/tests/integration/test_storage_nats/nats_pb2.py deleted file mode 100644 index e9e5cb72363..00000000000 --- a/tests/integration/test_storage_nats/nats_pb2.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: clickhouse_path/format_schemas/nats.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import descriptor_pool as _descriptor_pool -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n)clickhouse_path/format_schemas/nats.proto"+\n\rProtoKeyValue\x12\x0b\n\x03key\x18\x01 \x01(\x04\x12\r\n\x05value\x18\x02 \x01(\tb\x06proto3' -) - - -_PROTOKEYVALUE = DESCRIPTOR.message_types_by_name["ProtoKeyValue"] -ProtoKeyValue = _reflection.GeneratedProtocolMessageType( - "ProtoKeyValue", - (_message.Message,), - { - "DESCRIPTOR": _PROTOKEYVALUE, - "__module__": "clickhouse_path.format_schemas.nats_pb2" - # @@protoc_insertion_point(class_scope:ProtoKeyValue) - }, -) -_sym_db.RegisterMessage(ProtoKeyValue) - -if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - _PROTOKEYVALUE._serialized_start = 45 - _PROTOKEYVALUE._serialized_end = 88 -# @@protoc_insertion_point(module_scope) diff --git a/tests/integration/test_storage_nats/test.py b/tests/integration/test_storage_nats/test.py deleted file mode 100644 index 4d7e4cf813d..00000000000 --- a/tests/integration/test_storage_nats/test.py +++ /dev/null @@ -1,1875 +0,0 @@ -import pytest - -# FIXME This test is too flaky -# https://github.com/ClickHouse/ClickHouse/issues/39185 - -pytestmark = pytest.mark.skip - -import json -import os.path as p -import random -import subprocess -import threading -import logging -import time -from random import randrange -import math - -import asyncio -from google.protobuf.internal.encoder import _VarintBytes -from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster, check_nats_is_available, nats_connect_ssl -from helpers.test_tools import TSV - -from . import nats_pb2 - -cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance( - "instance", - main_configs=[ - "configs/nats.xml", - "configs/macros.xml", - "configs/named_collection.xml", - ], - user_configs=["configs/users.xml"], - with_nats=True, - clickhouse_path_dir="clickhouse_path", -) - - -# Helpers - - -def wait_nats_to_start(nats_port, ssl_ctx=None, timeout=180): - start = time.time() - while time.time() - start < timeout: - try: - if asyncio.run(check_nats_is_available(nats_port, ssl_ctx=ssl_ctx)): - logging.debug("NATS is available") - return - time.sleep(0.5) - except Exception as ex: - logging.debug("Can't connect to NATS " + str(ex)) - time.sleep(0.5) - - -def nats_check_result(result, check=False, ref_file="test_nats_json.reference"): - fpath = p.join(p.dirname(__file__), ref_file) - with open(fpath) as reference: - if check: - assert TSV(result) == TSV(reference) - else: - return TSV(result) == TSV(reference) - - -def kill_nats(nats_id): - p = subprocess.Popen(("docker", "stop", nats_id), stdout=subprocess.PIPE) - p.communicate() - return p.returncode == 0 - - -def revive_nats(nats_id, nats_port): - p = subprocess.Popen(("docker", "start", nats_id), stdout=subprocess.PIPE) - p.communicate() - wait_nats_to_start(nats_port) - - -# Fixtures - - -@pytest.fixture(scope="module") -def nats_cluster(): - try: - cluster.start() - logging.debug("nats_id is {}".format(instance.cluster.nats_docker_id)) - instance.query("CREATE DATABASE test") - - yield cluster - - finally: - cluster.shutdown() - - -@pytest.fixture(autouse=True) -def nats_setup_teardown(): - print("NATS is available - running test") - yield # run test - instance.query("DROP DATABASE test SYNC") - instance.query("CREATE DATABASE test") - - -# Tests - - -async def nats_produce_messages(cluster_inst, subject, messages=(), bytes=None): - nc = await nats_connect_ssl( - cluster_inst.nats_port, - user="click", - password="house", - ssl_ctx=cluster_inst.nats_ssl_context, - ) - logging.debug("NATS connection status: " + str(nc.is_connected)) - - for message in messages: - await nc.publish(subject, message.encode()) - if bytes is not None: - await nc.publish(subject, bytes) - logging.debug("Finished publising to " + subject) - - await nc.close() - return messages - - -def check_table_is_ready(instance, table_name): - try: - instance.query("SELECT * FROM {}".format(table_name)) - return True - except Exception: - return False - - -def test_nats_select(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'select', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "select", messages)) - - # The order of messages in select * from test.nats is not guaranteed, so sleep to collect everything in one select - time.sleep(1) - - result = "" - while True: - result += instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_select_empty(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'empty', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - """ - ) - - assert int(instance.query("SELECT count() FROM test.nats")) == 0 - - -def test_nats_json_without_delimiter(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'json', - nats_format = 'JSONEachRow'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = "" - for i in range(25): - messages += json.dumps({"key": i, "value": i}) + "\n" - - all_messages = [messages] - asyncio.run(nats_produce_messages(nats_cluster, "json", all_messages)) - - messages = "" - for i in range(25, 50): - messages += json.dumps({"key": i, "value": i}) + "\n" - all_messages = [messages] - asyncio.run(nats_produce_messages(nats_cluster, "json", all_messages)) - - time.sleep(1) - - result = "" - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result += instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_csv_with_delimiter(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'csv', - nats_format = 'CSV', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append("{i}, {i}".format(i=i)) - - asyncio.run(nats_produce_messages(nats_cluster, "csv", messages)) - - time.sleep(1) - - result = "" - for _ in range(60): - result += instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_tsv_with_delimiter(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'tsv', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append("{i}\t{i}".format(i=i)) - - asyncio.run(nats_produce_messages(nats_cluster, "tsv", messages)) - - result = "" - for _ in range(60): - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - -# - - -def test_nats_macros(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = '{nats_url}', - nats_subjects = '{nats_subjects}', - nats_format = '{nats_format}' - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - message = "" - for i in range(50): - message += json.dumps({"key": i, "value": i}) + "\n" - asyncio.run(nats_produce_messages(nats_cluster, "macro", [message])) - - time.sleep(1) - - result = "" - for _ in range(60): - result += instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_materialized_view(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'mv', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - - CREATE TABLE test.view2 (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS - SELECT * FROM test.nats group by (key, value); - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT * FROM test.view2 ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_materialized_view_with_subquery(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'mvsq', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM (SELECT * FROM test.nats); - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mvsq", messages)) - - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_many_materialized_views(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.view1; - DROP TABLE IF EXISTS test.view2; - DROP TABLE IF EXISTS test.consumer1; - DROP TABLE IF EXISTS test.consumer2; - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'mmv', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view1 (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE TABLE test.view2 (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer1 TO test.view1 AS - SELECT * FROM test.nats; - CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mmv", messages)) - - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result1 = instance.query("SELECT * FROM test.view1 ORDER BY key") - result2 = instance.query("SELECT * FROM test.view2 ORDER BY key") - if nats_check_result(result1) and nats_check_result(result2): - break - - instance.query( - """ - DROP TABLE test.consumer1; - DROP TABLE test.consumer2; - DROP TABLE test.view1; - DROP TABLE test.view2; - """ - ) - - nats_check_result(result1, True) - nats_check_result(result2, True) - - -def test_nats_protobuf(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value String) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'pb', - nats_format = 'Protobuf', - nats_schema = 'nats.proto:ProtoKeyValue'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - data = b"" - for i in range(0, 20): - msg = nats_pb2.ProtoKeyValue() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - asyncio.run(nats_produce_messages(nats_cluster, "pb", bytes=data)) - data = b"" - for i in range(20, 21): - msg = nats_pb2.ProtoKeyValue() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - asyncio.run(nats_produce_messages(nats_cluster, "pb", bytes=data)) - data = b"" - for i in range(21, 50): - msg = nats_pb2.ProtoKeyValue() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - asyncio.run(nats_produce_messages(nats_cluster, "pb", bytes=data)) - - result = "" - time_limit_sec = 60 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - -def test_nats_big_message(nats_cluster): - # Create batchs of messages of size ~100Kb - nats_messages = 1000 - batch_messages = 1000 - messages = [ - json.dumps({"key": i, "value": "x" * 100}) * batch_messages - for i in range(nats_messages) - ] - - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value String) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'big', - nats_format = 'JSONEachRow'; - CREATE TABLE test.view (key UInt64, value String) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - asyncio.run(nats_produce_messages(nats_cluster, "big", messages)) - - while True: - result = instance.query("SELECT count() FROM test.view") - if int(result) == batch_messages * nats_messages: - break - - assert ( - int(result) == nats_messages * batch_messages - ), "ClickHouse lost some messages: {}".format(result) - - -def test_nats_mv_combo(nats_cluster): - NUM_MV = 5 - NUM_CONSUMERS = 4 - - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'combo', - nats_num_consumers = {}, - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - """.format( - NUM_CONSUMERS - ) - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - for mv_id in range(NUM_MV): - instance.query( - """ - DROP TABLE IF EXISTS test.combo_{0}; - DROP TABLE IF EXISTS test.combo_{0}_mv; - CREATE TABLE test.combo_{0} (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.combo_{0}_mv TO test.combo_{0} AS - SELECT * FROM test.nats; - """.format( - mv_id - ) - ) - - time.sleep(2) - - i = [0] - messages_num = 10000 - - def produce(): - messages = [] - for _ in range(messages_num): - messages.append(json.dumps({"key": i[0], "value": i[0]})) - i[0] += 1 - asyncio.run(nats_produce_messages(nats_cluster, "combo", messages)) - - threads = [] - threads_num = 20 - - for _ in range(threads_num): - threads.append(threading.Thread(target=produce)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - while True: - result = 0 - for mv_id in range(NUM_MV): - result += int( - instance.query("SELECT count() FROM test.combo_{0}".format(mv_id)) - ) - if int(result) == messages_num * threads_num * NUM_MV: - break - time.sleep(1) - - for thread in threads: - thread.join() - - for mv_id in range(NUM_MV): - instance.query( - """ - DROP TABLE test.combo_{0}_mv; - DROP TABLE test.combo_{0}; - """.format( - mv_id - ) - ) - - assert ( - int(result) == messages_num * threads_num * NUM_MV - ), "ClickHouse lost some messages: {}".format(result) - - -def test_nats_insert(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'insert', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - values = [] - for i in range(50): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("insert") - await sub.unsubscribe(50) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - while True: - try: - instance.query("INSERT INTO test.nats VALUES {}".format(values)) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - thread.join() - - result = "\n".join(insert_messages) - nats_check_result(result, True) - - -def test_nats_many_subjects_insert_wrong(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'insert1,insert2.>,insert3.*.foo', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - values = [] - for i in range(50): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - # no subject specified - instance.query_and_get_error("INSERT INTO test.nats VALUES {}".format(values)) - - # can't insert into wildcard subjects - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert2.>' VALUES {}".format( - values - ) - ) - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert3.*.foo' VALUES {}".format( - values - ) - ) - - # specified subject is not among engine's subjects - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert4' VALUES {}".format( - values - ) - ) - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='insert3.foo.baz' VALUES {}".format( - values - ) - ) - instance.query_and_get_error( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='foo.insert2' VALUES {}".format( - values - ) - ) - - -def test_nats_many_subjects_insert_right(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'right_insert1,right_insert2', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - values = [] - for i in range(50): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("right_insert1") - await sub.unsubscribe(50) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - while True: - try: - instance.query( - "INSERT INTO test.nats SETTINGS stream_like_engine_insert_queue='right_insert1' VALUES {}".format( - values - ) - ) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - thread.join() - - result = "\n".join(insert_messages) - nats_check_result(result, True) - - -def test_nats_many_inserts(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.nats_many; - DROP TABLE IF EXISTS test.nats_consume; - DROP TABLE IF EXISTS test.view_many; - DROP TABLE IF EXISTS test.consumer_many; - CREATE TABLE test.nats_many (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'many_inserts', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.nats_consume (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'many_inserts', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view_many (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer_many TO test.view_many AS - SELECT * FROM test.nats_consume; - """ - ) - while not check_table_is_ready(instance, "test.nats_consume"): - logging.debug("Table test.nats_consume is not yet ready") - time.sleep(0.5) - - messages_num = 10000 - values = [] - for i in range(messages_num): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - def insert(): - while True: - try: - instance.query("INSERT INTO test.nats_many VALUES {}".format(values)) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - - threads = [] - threads_num = 10 - for _ in range(threads_num): - threads.append(threading.Thread(target=insert)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - for thread in threads: - thread.join() - - time_limit_sec = 300 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT count() FROM test.view_many") - print(result, messages_num * threads_num) - if int(result) >= messages_num * threads_num: - break - time.sleep(1) - - instance.query( - """ - DROP TABLE test.nats_consume; - DROP TABLE test.nats_many; - DROP TABLE test.consumer_many; - DROP TABLE test.view_many; - """ - ) - - assert ( - int(result) == messages_num * threads_num - ), "ClickHouse lost some messages or got duplicated ones. Total count: {}".format( - result - ) - - -def test_nats_overloaded_insert(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.view_overload; - DROP TABLE IF EXISTS test.consumer_overload; - DROP TABLE IF EXISTS test.nats_consume; - CREATE TABLE test.nats_consume (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'over', - nats_num_consumers = 5, - nats_max_block_size = 10000, - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.nats_overload (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'over', - nats_format = 'TSV', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view_overload (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key - SETTINGS old_parts_lifetime=5, cleanup_delay_period=2, cleanup_delay_period_random_add=3, - cleanup_thread_preferred_points_per_iteration=0; - CREATE MATERIALIZED VIEW test.consumer_overload TO test.view_overload AS - SELECT * FROM test.nats_consume; - """ - ) - while not check_table_is_ready(instance, "test.nats_consume"): - logging.debug("Table test.nats_consume is not yet ready") - time.sleep(0.5) - - messages_num = 100000 - - def insert(): - values = [] - for i in range(messages_num): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - while True: - try: - instance.query( - "INSERT INTO test.nats_overload VALUES {}".format(values) - ) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - - threads = [] - threads_num = 5 - for _ in range(threads_num): - threads.append(threading.Thread(target=insert)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - time_limit_sec = 300 - deadline = time.monotonic() + time_limit_sec - - while time.monotonic() < deadline: - result = instance.query("SELECT count() FROM test.view_overload") - time.sleep(1) - if int(result) >= messages_num * threads_num: - break - - instance.query( - """ - DROP TABLE test.consumer_overload; - DROP TABLE test.view_overload; - DROP TABLE test.nats_consume; - DROP TABLE test.nats_overload; - """ - ) - - for thread in threads: - thread.join() - - assert ( - int(result) == messages_num * threads_num - ), "ClickHouse lost some messages or got duplicated ones. Total count: {}".format( - result - ) - - -def test_nats_virtual_column(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats_virtuals (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'virtuals', - nats_format = 'JSONEachRow'; - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT value, key, _subject FROM test.nats_virtuals; - """ - ) - while not check_table_is_ready(instance, "test.nats_virtuals"): - logging.debug("Table test.nats_virtuals is not yet ready") - time.sleep(0.5) - - message_num = 10 - i = 0 - messages = [] - for _ in range(message_num): - messages.append(json.dumps({"key": i, "value": i})) - i += 1 - - asyncio.run(nats_produce_messages(nats_cluster, "virtuals", messages)) - - while True: - result = instance.query("SELECT count() FROM test.view") - time.sleep(1) - if int(result) == message_num: - break - - result = instance.query( - """ - SELECT key, value, _subject - FROM test.view ORDER BY key - """ - ) - - expected = """\ -0 0 virtuals -1 1 virtuals -2 2 virtuals -3 3 virtuals -4 4 virtuals -5 5 virtuals -6 6 virtuals -7 7 virtuals -8 8 virtuals -9 9 virtuals -""" - - instance.query( - """ - DROP TABLE test.nats_virtuals; - DROP TABLE test.view; - """ - ) - - assert TSV(result) == TSV(expected) - - -def test_nats_virtual_column_with_materialized_view(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats_virtuals_mv (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'virtuals_mv', - nats_format = 'JSONEachRow'; - CREATE TABLE test.view (key UInt64, value UInt64, subject String) ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT *, _subject as subject - FROM test.nats_virtuals_mv; - """ - ) - while not check_table_is_ready(instance, "test.nats_virtuals_mv"): - logging.debug("Table test.nats_virtuals_mv is not yet ready") - time.sleep(0.5) - - message_num = 10 - i = 0 - messages = [] - for _ in range(message_num): - messages.append(json.dumps({"key": i, "value": i})) - i += 1 - - asyncio.run(nats_produce_messages(nats_cluster, "virtuals_mv", messages)) - - while True: - result = instance.query("SELECT count() FROM test.view") - time.sleep(1) - if int(result) == message_num: - break - - result = instance.query("SELECT key, value, subject FROM test.view ORDER BY key") - expected = """\ -0 0 virtuals_mv -1 1 virtuals_mv -2 2 virtuals_mv -3 3 virtuals_mv -4 4 virtuals_mv -5 5 virtuals_mv -6 6 virtuals_mv -7 7 virtuals_mv -8 8 virtuals_mv -9 9 virtuals_mv -""" - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - DROP TABLE test.nats_virtuals_mv - """ - ) - - assert TSV(result) == TSV(expected) - - -def test_nats_many_consumers_to_each_queue(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.destination; - CREATE TABLE test.destination(key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - """ - ) - - num_tables = 4 - for table_id in range(num_tables): - print(("Setting up table {}".format(table_id))) - instance.query( - """ - DROP TABLE IF EXISTS test.many_consumers_{0}; - DROP TABLE IF EXISTS test.many_consumers_{0}_mv; - CREATE TABLE test.many_consumers_{0} (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'many_consumers', - nats_num_consumers = 2, - nats_queue_group = 'many_consumers', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - CREATE MATERIALIZED VIEW test.many_consumers_{0}_mv TO test.destination AS - SELECT key, value FROM test.many_consumers_{0}; - """.format( - table_id - ) - ) - while not check_table_is_ready( - instance, "test.many_consumers_{}".format(table_id) - ): - logging.debug( - "Table test.many_consumers_{} is not yet ready".format(table_id) - ) - time.sleep(0.5) - - i = [0] - messages_num = 1000 - - def produce(): - messages = [] - for _ in range(messages_num): - messages.append(json.dumps({"key": i[0], "value": i[0]})) - i[0] += 1 - asyncio.run(nats_produce_messages(nats_cluster, "many_consumers", messages)) - - threads = [] - threads_num = 20 - - for _ in range(threads_num): - threads.append(threading.Thread(target=produce)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - result1 = "" - while True: - result1 = instance.query("SELECT count() FROM test.destination") - time.sleep(1) - if int(result1) == messages_num * threads_num: - break - - for thread in threads: - thread.join() - - for consumer_id in range(num_tables): - instance.query( - """ - DROP TABLE test.many_consumers_{0}; - DROP TABLE test.many_consumers_{0}_mv; - """.format( - consumer_id - ) - ) - - instance.query( - """ - DROP TABLE test.destination; - """ - ) - - assert ( - int(result1) == messages_num * threads_num - ), "ClickHouse lost some messages: {}".format(result1) - - -def test_nats_restore_failed_connection_without_losses_on_write(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.consume; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key; - CREATE TABLE test.consume (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'producer_reconnect', - nats_format = 'JSONEachRow', - nats_num_consumers = 2, - nats_row_delimiter = '\\n'; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.consume; - DROP TABLE IF EXISTS test.producer_reconnect; - CREATE TABLE test.producer_reconnect (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'producer_reconnect', - nats_format = 'JSONEachRow', - nats_row_delimiter = '\\n'; - """ - ) - while not check_table_is_ready(instance, "test.consume"): - logging.debug("Table test.consume is not yet ready") - time.sleep(0.5) - - messages_num = 100000 - values = [] - for i in range(messages_num): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - while True: - try: - instance.query( - "INSERT INTO test.producer_reconnect VALUES {}".format(values) - ) - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e): - continue - else: - raise - - while int(instance.query("SELECT count() FROM test.view")) == 0: - time.sleep(0.1) - - kill_nats(nats_cluster.nats_docker_id) - time.sleep(4) - revive_nats(nats_cluster.nats_docker_id, nats_cluster.nats_port) - - while True: - result = instance.query("SELECT count(DISTINCT key) FROM test.view") - time.sleep(1) - if int(result) == messages_num: - break - - instance.query( - """ - DROP TABLE test.consume; - DROP TABLE test.producer_reconnect; - """ - ) - - assert int(result) == messages_num, "ClickHouse lost some messages: {}".format( - result - ) - - -def test_nats_no_connection_at_startup_1(nats_cluster): - # no connection when table is initialized - nats_cluster.pause_container("nats1") - instance.query_and_get_error( - """ - CREATE TABLE test.cs (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'cs', - nats_format = 'JSONEachRow', - nats_num_consumers = '5', - nats_row_delimiter = '\\n'; - """ - ) - nats_cluster.unpause_container("nats1") - - -def test_nats_no_connection_at_startup_2(nats_cluster): - instance.query( - """ - CREATE TABLE test.cs (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'cs', - nats_format = 'JSONEachRow', - nats_num_consumers = '5', - nats_row_delimiter = '\\n'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.cs; - """ - ) - - instance.query("DETACH TABLE test.cs") - nats_cluster.pause_container("nats1") - instance.query("ATTACH TABLE test.cs") - nats_cluster.unpause_container("nats1") - while not check_table_is_ready(instance, "test.cs"): - logging.debug("Table test.cs is not yet ready") - time.sleep(0.5) - - messages_num = 1000 - messages = [] - for i in range(messages_num): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "cs", messages)) - - for _ in range(20): - result = instance.query("SELECT count() FROM test.view") - time.sleep(1) - if int(result) == messages_num: - break - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.cs; - """ - ) - - assert int(result) == messages_num, "ClickHouse lost some messages: {}".format( - result - ) - - -def test_nats_format_factory_settings(nats_cluster): - instance.query( - """ - CREATE TABLE test.format_settings ( - id String, date DateTime - ) ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'format_settings', - nats_format = 'JSONEachRow', - date_time_input_format = 'best_effort'; - """ - ) - while not check_table_is_ready(instance, "test.format_settings"): - logging.debug("Table test.format_settings is not yet ready") - time.sleep(0.5) - - message = json.dumps( - {"id": "format_settings_test", "date": "2021-01-19T14:42:33.1829214Z"} - ) - expected = instance.query( - """SELECT parseDateTimeBestEffort(CAST('2021-01-19T14:42:33.1829214Z', 'String'))""" - ) - - asyncio.run(nats_produce_messages(nats_cluster, "format_settings", [message])) - - while True: - result = instance.query("SELECT date FROM test.format_settings") - if result == expected: - break - - instance.query( - """ - CREATE TABLE test.view ( - id String, date DateTime - ) ENGINE = MergeTree ORDER BY id; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.format_settings; - """ - ) - - asyncio.run(nats_produce_messages(nats_cluster, "format_settings", [message])) - while True: - result = instance.query("SELECT date FROM test.view") - if result == expected: - break - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.format_settings; - """ - ) - - assert result == expected - - -def test_nats_bad_args(nats_cluster): - instance.query_and_get_error( - """ - CREATE TABLE test.drop (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_secure = true, - nats_format = 'JSONEachRow'; - """ - ) - - -def test_nats_drop_mv(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'mv', - nats_format = 'JSONEachRow'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - messages = [] - for i in range(20): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - instance.query("DROP VIEW test.consumer") - messages = [] - for i in range(20, 40): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - instance.query( - """ - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.nats; - """ - ) - messages = [] - for i in range(40, 50): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - while True: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if nats_check_result(result): - break - - nats_check_result(result, True) - - instance.query("DROP VIEW test.consumer") - messages = [] - for i in range(50, 60): - messages.append(json.dumps({"key": i, "value": i})) - asyncio.run(nats_produce_messages(nats_cluster, "mv", messages)) - - count = 0 - while True: - count = int(instance.query("SELECT count() FROM test.nats")) - if count: - break - - assert count > 0 - - -def test_nats_predefined_configuration(nats_cluster): - instance.query( - """ - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS(nats1) """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - asyncio.run( - nats_produce_messages( - nats_cluster, "named", [json.dumps({"key": 1, "value": 2})] - ) - ) - while True: - result = instance.query( - "SELECT * FROM test.nats ORDER BY key", ignore_error=True - ) - if result == "1\t2\n": - break - - -def test_format_with_prefix_and_suffix(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'custom', - nats_format = 'CustomSeparated'; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("custom") - await sub.unsubscribe(2) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - instance.query( - "INSERT INTO test.nats select number*10 as key, number*100 as value from numbers(2) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" - ) - - thread.join() - - assert ( - "".join(insert_messages) - == "\n0\t0\n\n\n10\t100\n\n" - ) - - -def test_max_rows_per_message(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'custom1', - nats_format = 'CustomSeparated', - nats_max_rows_per_message = 3, - format_custom_result_before_delimiter = '\n', - format_custom_result_after_delimiter = '\n'; - - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.nats; - """ - ) - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - num_rows = 5 - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("custom1") - await sub.unsubscribe(2) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - instance.query( - f"INSERT INTO test.nats select number*10 as key, number*100 as value from numbers({num_rows}) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" - ) - - thread.join() - - assert ( - "".join(insert_messages) - == "\n0\t0\n10\t100\n20\t200\n\n\n30\t300\n40\t400\n\n" - ) - - attempt = 0 - rows = 0 - while attempt < 100: - rows = int(instance.query("SELECT count() FROM test.view")) - if rows == num_rows: - break - attempt += 1 - - assert rows == num_rows - - result = instance.query("SELECT * FROM test.view") - assert result == "0\t0\n10\t100\n20\t200\n30\t300\n40\t400\n" - - -def test_row_based_formats(nats_cluster): - num_rows = 10 - - for format_name in [ - "TSV", - "TSVWithNamesAndTypes", - "TSKV", - "CSV", - "CSVWithNamesAndTypes", - "CustomSeparatedWithNamesAndTypes", - "Values", - "JSON", - "JSONEachRow", - "JSONCompactEachRow", - "JSONCompactEachRowWithNamesAndTypes", - "JSONObjectEachRow", - "Avro", - "RowBinary", - "RowBinaryWithNamesAndTypes", - "MsgPack", - ]: - print(format_name) - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = '{format_name}', - nats_format = '{format_name}'; - - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.nats; - """ - ) - - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - insert_messages = 0 - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe(format_name) - await sub.unsubscribe(2) - async for msg in sub.messages: - nonlocal insert_messages - insert_messages += 1 - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - instance.query( - f"INSERT INTO test.nats select number*10 as key, number*100 as value from numbers({num_rows})" - ) - - thread.join() - - assert insert_messages == 2 - - attempt = 0 - rows = 0 - while attempt < 100: - rows = int(instance.query("SELECT count() FROM test.view")) - if rows == num_rows: - break - attempt += 1 - - assert rows == num_rows - - expected = "" - for i in range(num_rows): - expected += str(i * 10) + "\t" + str(i * 100) + "\n" - - result = instance.query("SELECT * FROM test.view") - assert result == expected - - -def test_block_based_formats_1(nats_cluster): - instance.query( - """ - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = 'PrettySpace', - nats_format = 'PrettySpace'; - """ - ) - - insert_messages = [] - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe("PrettySpace") - await sub.unsubscribe(3) - async for msg in sub.messages: - insert_messages.append(msg.data.decode()) - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - attempt = 0 - while attempt < 100: - try: - instance.query( - "INSERT INTO test.nats SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0;" - ) - break - except Exception: - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - attempt += 1 - thread.join() - - data = [] - for message in insert_messages: - splitted = message.split("\n") - assert splitted[0] == " \x1b[1mkey\x1b[0m \x1b[1mvalue\x1b[0m" - assert splitted[1] == "" - assert splitted[-1] == "" - data += [line.split() for line in splitted[2:-1]] - - assert data == [ - ["0", "0"], - ["10", "100"], - ["20", "200"], - ["30", "300"], - ["40", "400"], - ] - - -def test_block_based_formats_2(nats_cluster): - num_rows = 100 - - for format_name in [ - "JSONColumns", - "Native", - "Arrow", - "Parquet", - "ORC", - "JSONCompactColumns", - ]: - print(format_name) - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.nats; - - CREATE TABLE test.nats (key UInt64, value UInt64) - ENGINE = NATS - SETTINGS nats_url = 'nats1:4444', - nats_subjects = '{format_name}', - nats_format = '{format_name}'; - - CREATE MATERIALIZED VIEW test.view Engine=Log AS - SELECT key, value FROM test.nats; - """ - ) - - while not check_table_is_ready(instance, "test.nats"): - logging.debug("Table test.nats is not yet ready") - time.sleep(0.5) - - insert_messages = 0 - - async def sub_to_nats(): - nc = await nats_connect_ssl( - nats_cluster.nats_port, - user="click", - password="house", - ssl_ctx=nats_cluster.nats_ssl_context, - ) - sub = await nc.subscribe(format_name) - await sub.unsubscribe(9) - async for msg in sub.messages: - nonlocal insert_messages - insert_messages += 1 - - await sub.drain() - await nc.drain() - - def run_sub(): - asyncio.run(sub_to_nats()) - - thread = threading.Thread(target=run_sub) - thread.start() - time.sleep(1) - - instance.query( - f"INSERT INTO test.nats SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0;" - ) - - thread.join() - - assert insert_messages == 9 - - attempt = 0 - rows = 0 - while attempt < 100: - rows = int(instance.query("SELECT count() FROM test.view")) - if rows == num_rows: - break - attempt += 1 - - assert rows == num_rows - - result = instance.query("SELECT * FROM test.view ORDER by key") - expected = "" - for i in range(num_rows): - expected += str(i * 10) + "\t" + str(i * 100) + "\n" - assert result == expected - - -if __name__ == "__main__": - cluster.start() - input("Cluster created, press any key to destroy...") - cluster.shutdown() diff --git a/tests/integration/test_storage_nats/test_nats_json.reference b/tests/integration/test_storage_nats/test_nats_json.reference deleted file mode 100644 index 959bb2aad74..00000000000 --- a/tests/integration/test_storage_nats/test_nats_json.reference +++ /dev/null @@ -1,50 +0,0 @@ -0 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 6 -7 7 -8 8 -9 9 -10 10 -11 11 -12 12 -13 13 -14 14 -15 15 -16 16 -17 17 -18 18 -19 19 -20 20 -21 21 -22 22 -23 23 -24 24 -25 25 -26 26 -27 27 -28 28 -29 29 -30 30 -31 31 -32 32 -33 33 -34 34 -35 35 -36 36 -37 37 -38 38 -39 39 -40 40 -41 41 -42 42 -43 43 -44 44 -45 45 -46 46 -47 47 -48 48 -49 49 diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 686eb1ea751..d4f8fab3a82 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -726,6 +726,22 @@ def test_auto_close_connection(started_cluster): assert count == 2 +def test_literal_escaping(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + cursor.execute(f"DROP TABLE IF EXISTS escaping") + cursor.execute(f"CREATE TABLE escaping(text varchar(255))") + node1.query( + "CREATE TABLE default.escaping (text String) ENGINE = PostgreSQL('postgres1:5432', 'postgres', 'escaping', 'postgres', 'mysecretpassword')" + ) + node1.query("SELECT * FROM escaping WHERE text = ''''") # ' -> '' + node1.query("SELECT * FROM escaping WHERE text = '\\''") # ' -> '' + node1.query("SELECT * FROM escaping WHERE text = '\\\\\\''") # \' -> \'' + node1.query("SELECT * FROM escaping WHERE text = '\\\\\\''") # \' -> \'' + node1.query("SELECT * FROM escaping WHERE text like '%a''a%'") # %a'a% -> %a''a% + node1.query("SELECT * FROM escaping WHERE text like '%a\\'a%'") # %a'a% -> %a''a% + cursor.execute(f"DROP TABLE escaping") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_postgresql_replica/configs/log_conf.xml b/tests/integration/test_storage_postgresql_replica/configs/log_conf.xml deleted file mode 100644 index 27c7107ce5e..00000000000 --- a/tests/integration/test_storage_postgresql_replica/configs/log_conf.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - trace - /var/log/clickhouse-server/log.log - /var/log/clickhouse-server/log.err.log - 1000M - 10 - /var/log/clickhouse-server/stderr.log - /var/log/clickhouse-server/stdout.log - - diff --git a/tests/integration/test_storage_postgresql_replica/test.py b/tests/integration/test_storage_postgresql_replica/test.py deleted file mode 100644 index 66495700102..00000000000 --- a/tests/integration/test_storage_postgresql_replica/test.py +++ /dev/null @@ -1,780 +0,0 @@ -import pytest - -# FIXME Tests with MaterializedPostgresSQL are temporarily disabled -# https://github.com/ClickHouse/ClickHouse/issues/36898 -# https://github.com/ClickHouse/ClickHouse/issues/38677 -# https://github.com/ClickHouse/ClickHouse/pull/39272#issuecomment-1190087190 - -pytestmark = pytest.mark.skip - -import time -import psycopg2 -import os.path as p - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry -from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT -from helpers.test_tools import TSV - -import threading - -cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance( - "instance", - main_configs=["configs/log_conf.xml"], - with_postgres=True, - stay_alive=True, -) - -postgres_table_template = """ - CREATE TABLE IF NOT EXISTS {} ( - key Integer NOT NULL, value Integer, PRIMARY KEY(key)) - """ - -queries = [ - "INSERT INTO {} select i, i from generate_series(0, 10000) as t(i);", - "DELETE FROM {} WHERE (value*value) % 3 = 0;", - "UPDATE {} SET value = value + 125 WHERE key % 2 = 0;", - "UPDATE {} SET key=key+20000 WHERE key%2=0", - "INSERT INTO {} select i, i from generate_series(40000, 50000) as t(i);", - "DELETE FROM {} WHERE key % 10 = 0;", - "UPDATE {} SET value = value + 101 WHERE key % 2 = 1;", - "UPDATE {} SET key=key+80000 WHERE key%2=1", - "DELETE FROM {} WHERE value % 2 = 0;", - "UPDATE {} SET value = value + 2000 WHERE key % 5 = 0;", - "INSERT INTO {} select i, i from generate_series(200000, 250000) as t(i);", - "DELETE FROM {} WHERE value % 3 = 0;", - "UPDATE {} SET value = value * 2 WHERE key % 3 = 0;", - "UPDATE {} SET key=key+500000 WHERE key%2=1", - "INSERT INTO {} select i, i from generate_series(1000000, 1050000) as t(i);", - "DELETE FROM {} WHERE value % 9 = 2;", - "UPDATE {} SET key=key+10000000", - "UPDATE {} SET value = value + 2 WHERE key % 3 = 1;", - "DELETE FROM {} WHERE value%5 = 0;", -] - - -@pytest.mark.timeout(30) -def check_tables_are_synchronized( - table_name, order_by="key", postgres_database="postgres_database" -): - while True: - expected = instance.query( - "select * from {}.{} order by {};".format( - postgres_database, table_name, order_by - ) - ) - result = instance.query( - "select * from test.{} order by {};".format(table_name, order_by) - ) - if result == expected: - break - - assert result == expected - - -def get_postgres_conn( - ip, port, database=False, auto_commit=True, database_name="postgres_database" -): - if database == True: - conn_string = "host={} port={} dbname='{}' user='postgres' password='mysecretpassword'".format( - ip, port, database_name - ) - else: - conn_string = ( - "host={} port={} user='postgres' password='mysecretpassword'".format( - ip, port - ) - ) - - conn = psycopg2.connect(conn_string) - if auto_commit: - conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) - conn.autocommit = True - return conn - - -def create_postgres_db(cursor, name): - cursor.execute("CREATE DATABASE {}".format(name)) - - -def create_clickhouse_postgres_db(ip, port, name="postgres_database"): - instance.query( - """ - CREATE DATABASE {} - ENGINE = PostgreSQL('{}:{}', '{}', 'postgres', 'mysecretpassword')""".format( - name, ip, port, name - ) - ) - - -def create_materialized_table(ip, port, table_name="postgresql_replica"): - instance.query( - f""" - CREATE TABLE test.{table_name} (key Int64, value Int64) - ENGINE = MaterializedPostgreSQL( - '{ip}:{port}', 'postgres_database', '{table_name}', 'postgres', 'mysecretpassword') - PRIMARY KEY key; """ - ) - - -def create_postgres_table(cursor, table_name, replica_identity_full=False): - cursor.execute("DROP TABLE IF EXISTS {}".format(table_name)) - cursor.execute(postgres_table_template.format(table_name)) - if replica_identity_full: - cursor.execute("ALTER TABLE {} REPLICA IDENTITY FULL;".format(table_name)) - - -def postgresql_replica_check_result( - result, check=False, ref_file="test_postgresql_replica.reference" -): - fpath = p.join(p.dirname(__file__), ref_file) - with open(fpath) as reference: - if check: - assert TSV(result) == TSV(reference) - else: - return TSV(result) == TSV(reference) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - conn = get_postgres_conn(ip=cluster.postgres_ip, port=cluster.postgres_port) - cursor = conn.cursor() - create_postgres_db(cursor, "postgres_database") - create_clickhouse_postgres_db( - ip=cluster.postgres_ip, port=cluster.postgres_port - ) - - instance.query("CREATE DATABASE test") - yield cluster - - finally: - cluster.shutdown() - - -@pytest.mark.timeout(320) -def test_initial_load_from_snapshot(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.2) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -@pytest.mark.timeout(320) -def test_no_connection_at_startup(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - time.sleep(3) - - instance.query("DETACH TABLE test.postgresql_replica") - started_cluster.pause_container("postgres1") - - instance.query("ATTACH TABLE test.postgresql_replica") - time.sleep(3) - started_cluster.unpause_container("postgres1") - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) == 0: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -@pytest.mark.timeout(320) -def test_detach_attach_is_ok(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) == 0: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - postgresql_replica_check_result(result, True) - - instance.query("DETACH TABLE test.postgresql_replica") - instance.query("ATTACH TABLE test.postgresql_replica") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.5) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -@pytest.mark.timeout(320) -def test_replicating_insert_queries(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(10)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 10: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 10 + number, 10 + number from numbers(10)" - ) - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 20 + number, 20 + number from numbers(10)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 30: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 30 + number, 30 + number from numbers(10)" - ) - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 40 + number, 40 + number from numbers(10)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 50: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -@pytest.mark.timeout(320) -def test_replicating_delete_queries(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.2) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, 50 + number from numbers(50)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 100: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - cursor.execute("DELETE FROM postgresql_replica WHERE key > 49;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.5) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - - -@pytest.mark.timeout(320) -def test_replicating_update_queries(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number + 10 from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 50: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - cursor.execute("UPDATE postgresql_replica SET value = value - 10;") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.5) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - - -@pytest.mark.timeout(320) -def test_resume_from_written_version(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number + 10 from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 50: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, 50 + number from numbers(50)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 100: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query("DETACH TABLE test.postgresql_replica") - - cursor.execute("DELETE FROM postgresql_replica WHERE key > 49;") - cursor.execute("UPDATE postgresql_replica SET value = value - 10;") - - instance.query("ATTACH TABLE test.postgresql_replica") - - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - while postgresql_replica_check_result(result) == False: - time.sleep(0.5) - result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") - - cursor.execute("DROP TABLE postgresql_replica;") - postgresql_replica_check_result(result, True) - - -@pytest.mark.timeout(320) -def test_many_replication_messages(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(100000)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 100000: - time.sleep(0.2) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - print("SYNC OK") - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(100000, 100000)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 200000: - time.sleep(1) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - print("INSERT OK") - - result = instance.query("SELECT key FROM test.postgresql_replica ORDER BY key;") - expected = instance.query("SELECT number from numbers(200000)") - assert result == expected - - cursor.execute("UPDATE postgresql_replica SET value = key + 1 WHERE key < 100000;") - - result = instance.query( - "SELECT key FROM test.postgresql_replica WHERE value = key + 1 ORDER BY key;" - ) - expected = instance.query("SELECT number from numbers(100000)") - - while result != expected: - time.sleep(1) - result = instance.query( - "SELECT key FROM test.postgresql_replica WHERE value = key + 1 ORDER BY key;" - ) - print("UPDATE OK") - - cursor.execute("DELETE FROM postgresql_replica WHERE key % 2 = 1;") - cursor.execute("DELETE FROM postgresql_replica WHERE key != value;") - - result = instance.query( - "SELECT count() FROM (SELECT * FROM test.postgresql_replica);" - ) - while int(result) != 50000: - time.sleep(1) - result = instance.query( - "SELECT count() FROM (SELECT * FROM test.postgresql_replica);" - ) - print("DELETE OK") - - cursor.execute("DROP TABLE postgresql_replica;") - - -@pytest.mark.timeout(320) -def test_connection_loss(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - i = 50 - while i < 100000: - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT {} + number, number from numbers(10000)".format( - i - ) - ) - i += 10000 - - started_cluster.pause_container("postgres1") - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - print(int(result)) - time.sleep(6) - - started_cluster.unpause_container("postgres1") - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) < 100050: - time.sleep(1) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - cursor.execute("DROP TABLE postgresql_replica;") - assert int(result) == 100050 - - -@pytest.mark.timeout(320) -def test_clickhouse_restart(started_cluster): - pytest.skip("Temporary disabled (FIXME)") - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(50)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - i = 50 - while i < 100000: - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT {} + number, number from numbers(10000)".format( - i - ) - ) - i += 10000 - - instance.restart_clickhouse() - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) < 100050: - time.sleep(1) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - cursor.execute("DROP TABLE postgresql_replica;") - print(result) - assert int(result) == 100050 - - -def test_rename_table(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(25)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 25: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - instance.query( - "RENAME TABLE test.postgresql_replica TO test.postgresql_replica_renamed" - ) - assert ( - int(instance.query("SELECT count() FROM test.postgresql_replica_renamed;")) - == 25 - ) - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(25, 25)" - ) - - result = instance.query("SELECT count() FROM test.postgresql_replica_renamed;") - while int(result) != 50: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica_renamed;") - - result = instance.query( - "SELECT * FROM test.postgresql_replica_renamed ORDER BY key;" - ) - postgresql_replica_check_result(result, True) - cursor.execute("DROP TABLE postgresql_replica;") - instance.query("DROP TABLE IF EXISTS test.postgresql_replica_renamed") - - -def test_virtual_columns(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(10)" - ) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - while int(result) != 10: - time.sleep(0.5) - result = instance.query("SELECT count() FROM test.postgresql_replica;") - - # just check that it works, no check with `expected` because _version is taken as LSN, which will be different each time. - result = instance.query( - "SELECT key, value, _sign, _version FROM test.postgresql_replica;" - ) - print(result) - cursor.execute("DROP TABLE postgresql_replica;") - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -def test_abrupt_connection_loss_while_heavy_replication(started_cluster): - instance.query("DROP DATABASE IF EXISTS test_database") - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - table_name = "postgresql_replica" - create_postgres_table(cursor, table_name) - - instance.query(f"DROP TABLE IF EXISTS test.{table_name}") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - - for i in range(len(queries)): - query = queries[i].format(table_name) - cursor.execute(query) - print("query {}".format(query.format(table_name))) - - started_cluster.pause_container("postgres1") - - result = instance.query("SELECT count() FROM test.postgresql_replica") - print(result) # Just debug - - started_cluster.unpause_container("postgres1") - - check_tables_are_synchronized("postgresql_replica") - - result = instance.query("SELECT count() FROM test.postgresql_replica") - print(result) # Just debug - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -def test_abrupt_server_restart_while_heavy_replication(started_cluster): - # FIXME (kssenii) temporary disabled - if instance.is_built_with_sanitizer(): - pytest.skip("Temporary disabled (FIXME)") - - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - table_name = "postgresql_replica_697" - create_postgres_table(cursor, table_name) - - instance.query(f"INSERT INTO postgres_database.{table_name} SELECT -1, 1") - instance.query(f"DROP TABLE IF EXISTS test.{table_name} SYNC") - create_materialized_table( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - table_name=table_name, - ) - - n = 1 - while int(instance.query(f"select count() from test.{table_name}")) != 1: - sleep(1) - n += 1 - if n > 10: - break - - for query in queries: - cursor.execute(query.format(table_name)) - print("query {}".format(query.format(table_name))) - - instance.restart_clickhouse() - - result = instance.query(f"SELECT count() FROM test.{table_name}") - print(result) # Just debug - - check_tables_are_synchronized(table_name) - - result = instance.query(f"SELECT count() FROM test.{table_name}") - print(result) # Just debug - instance.query(f"DROP TABLE test.{table_name} SYNC") - - -def test_drop_table_immediately(started_cluster): - conn = get_postgres_conn( - ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True, - ) - cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") - instance.query( - "INSERT INTO postgres_database.postgresql_replica SELECT number, number from numbers(100000)" - ) - - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - instance.query("DROP TABLE test.postgresql_replica") - create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port - ) - check_tables_are_synchronized("postgresql_replica") - instance.query(f"DROP TABLE test.postgresql_replica SYNC") - - -if __name__ == "__main__": - cluster.start() - input("Cluster created, press any key to destroy...") - cluster.shutdown() diff --git a/tests/integration/test_storage_postgresql_replica/test_postgresql_replica.reference b/tests/integration/test_storage_postgresql_replica/test_postgresql_replica.reference deleted file mode 100644 index 959bb2aad74..00000000000 --- a/tests/integration/test_storage_postgresql_replica/test_postgresql_replica.reference +++ /dev/null @@ -1,50 +0,0 @@ -0 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 6 -7 7 -8 8 -9 9 -10 10 -11 11 -12 12 -13 13 -14 14 -15 15 -16 16 -17 17 -18 18 -19 19 -20 20 -21 21 -22 22 -23 23 -24 24 -25 25 -26 26 -27 27 -28 28 -29 29 -30 30 -31 31 -32 32 -33 33 -34 34 -35 35 -36 36 -37 37 -38 38 -39 39 -40 40 -41 41 -42 42 -43 43 -44 44 -45 45 -46 46 -47 47 -48 48 -49 49 diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 751279f5e5a..80d2050b394 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -511,69 +511,6 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster): rabbitmq_check_result(result2, True) -@pytest.mark.skip(reason="clichouse_path with rabbitmq.proto fails to be exported") -def test_rabbitmq_protobuf(rabbitmq_cluster): - instance.query( - """ - CREATE TABLE test.rabbitmq (key UInt64, value String) - ENGINE = RabbitMQ - SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_exchange_name = 'pb', - rabbitmq_format = 'Protobuf', - rabbitmq_flush_interval_ms=1000, - rabbitmq_max_block_size=100, - rabbitmq_schema = 'rabbitmq.proto:KeyValueProto'; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.rabbitmq; - """ - ) - - credentials = pika.PlainCredentials("root", "clickhouse") - parameters = pika.ConnectionParameters( - rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials - ) - connection = pika.BlockingConnection(parameters) - channel = connection.channel() - - data = "" - for i in range(0, 20): - msg = rabbitmq_pb2.KeyValueProto() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - channel.basic_publish(exchange="pb", routing_key="", body=data) - data = "" - for i in range(20, 21): - msg = rabbitmq_pb2.KeyValueProto() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - channel.basic_publish(exchange="pb", routing_key="", body=data) - data = "" - for i in range(21, 50): - msg = rabbitmq_pb2.KeyValueProto() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - channel.basic_publish(exchange="pb", routing_key="", body=data) - - connection.close() - - result = "" - while True: - result = instance.query("SELECT * FROM test.view ORDER BY key") - if rabbitmq_check_result(result): - break - - rabbitmq_check_result(result, True) - - def test_rabbitmq_big_message(rabbitmq_cluster): # Create batchs of messages of size ~100Kb rabbitmq_messages = 1000 diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 45437fefa79..9fb794648cb 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -944,7 +944,7 @@ def test_predefined_connection_configuration(started_cluster): f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')" ) assert ( - "To execute this query it's necessary to have grant NAMED COLLECTION ON s3_conf1" + "To execute this query, it's necessary to have the grant NAMED COLLECTION ON s3_conf1" in error ) error = instance.query_and_get_error( @@ -952,7 +952,7 @@ def test_predefined_connection_configuration(started_cluster): user="user", ) assert ( - "To execute this query it's necessary to have grant NAMED COLLECTION ON s3_conf1" + "To execute this query, it's necessary to have the grant NAMED COLLECTION ON s3_conf1" in error ) @@ -973,12 +973,12 @@ def test_predefined_connection_configuration(started_cluster): error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") assert ( - "To execute this query it's necessary to have grant NAMED COLLECTION ON no_collection" + "To execute this query, it's necessary to have the grant NAMED COLLECTION ON no_collection" in error ) error = instance.query_and_get_error("SELECT * FROM s3(no_collection)", user="user") assert ( - "To execute this query it's necessary to have grant NAMED COLLECTION ON no_collection" + "To execute this query, it's necessary to have the grant NAMED COLLECTION ON no_collection" in error ) instance = started_cluster.instances["dummy"] # has named collection access @@ -1408,16 +1408,16 @@ def test_select_columns(started_cluster): instance.query("SYSTEM FLUSH LOGS") result1 = instance.query( - f"SELECT read_bytes FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT value2 FROM {name}'" + f"SELECT ProfileEvents['ReadBufferFromS3Bytes'] FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT value2 FROM {name}'" ) instance.query(f"SELECT * FROM {name}") instance.query("SYSTEM FLUSH LOGS") result2 = instance.query( - f"SELECT read_bytes FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT * FROM {name}'" + f"SELECT ProfileEvents['ReadBufferFromS3Bytes'] FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT * FROM {name}'" ) - assert int(result1) * 3 <= int(result2) + assert round(int(result2) / int(result1)) == 3 def test_insert_select_schema_inference(started_cluster): @@ -1827,3 +1827,66 @@ def test_skip_empty_files(started_cluster): ) assert len(res.strip()) == 0 + + +def test_read_subcolumns(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + instance.query( + f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)" + ) + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\troot/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\troot/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "0\troot/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert res == "42\troot/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\t/root/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + + res = instance.query( + f"select a.b.d, _path, a.b, _file, a.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "2\t/root/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + ) + + assert res == "0\t/root/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + + res = instance.query( + f"select x.b.d, _path, x.b, _file, x.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + ) + + assert ( + res == "42\t/root/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + ) diff --git a/tests/integration/test_storage_s3_queue/__init__.py b/tests/integration/test_storage_s3_queue/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_storage_s3_queue/configs/defaultS3.xml b/tests/integration/test_storage_s3_queue/configs/defaultS3.xml new file mode 100644 index 00000000000..7dac6d9fbb5 --- /dev/null +++ b/tests/integration/test_storage_s3_queue/configs/defaultS3.xml @@ -0,0 +1,11 @@ + + + + http://resolver:8080 +
Authorization: Bearer TOKEN
+
+ + http://resolver:8080/root-with-auth/restricteddirectory/ + +
+
diff --git a/tests/integration/test_storage_s3_queue/configs/named_collections.xml b/tests/integration/test_storage_s3_queue/configs/named_collections.xml new file mode 100644 index 00000000000..64674e2a3e3 --- /dev/null +++ b/tests/integration/test_storage_s3_queue/configs/named_collections.xml @@ -0,0 +1,43 @@ + + + + http://minio1:9001/root/test_table + minio + minio123 + + + http://minio1:9001/root/test_parquet + minio + minio123 + + + http://minio1:9001/root/test_parquet_gz + minio + minio123 + + + http://minio1:9001/root/test_orc + minio + minio123 + + + http://minio1:9001/root/test_native + minio + minio123 + + + http://minio1:9001/root/test.arrow + minio + minio123 + + + http://minio1:9001/root/test.parquet + minio + minio123 + + + http://minio1:9001/root/test_cache4.jsonl + true + + + diff --git a/tests/integration/test_storage_nats/configs/users.xml b/tests/integration/test_storage_s3_queue/configs/users.xml similarity index 100% rename from tests/integration/test_storage_nats/configs/users.xml rename to tests/integration/test_storage_s3_queue/configs/users.xml diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py new file mode 100644 index 00000000000..484ab6d7e95 --- /dev/null +++ b/tests/integration/test_storage_s3_queue/test.py @@ -0,0 +1,879 @@ +import io +import logging +import os +import random +import time + +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +import json + +""" +export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-server +export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-client +export CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH=/home/sergey/vkr/ClickHouse/build/programs/clickhouse-odbc-bridge +export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/home/sergey/vkr/ClickHouse/programs/server + +""" + + +def prepare_s3_bucket(started_cluster): + # Allows read-write access for bucket without authorization. + bucket_read_write_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:GetBucketLocation", + "Resource": "arn:aws:s3:::root", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:ListBucket", + "Resource": "arn:aws:s3:::root", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:GetObject", + "Resource": "arn:aws:s3:::root/*", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:PutObject", + "Resource": "arn:aws:s3:::root/*", + }, + { + "Sid": "", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "s3:DeleteObject", + "Resource": "arn:aws:s3:::root/*", + }, + ], + } + + minio_client = started_cluster.minio_client + minio_client.set_bucket_policy( + started_cluster.minio_bucket, json.dumps(bucket_read_write_policy) + ) + + started_cluster.minio_restricted_bucket = "{}-with-auth".format( + started_cluster.minio_bucket + ) + if minio_client.bucket_exists(started_cluster.minio_restricted_bucket): + minio_client.remove_bucket(started_cluster.minio_restricted_bucket) + + minio_client.make_bucket(started_cluster.minio_restricted_bucket) + + +@pytest.fixture(autouse=True) +def s3_queue_setup_teardown(started_cluster): + instance = started_cluster.instances["instance"] + instance_2 = started_cluster.instances["instance2"] + + instance.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") + instance_2.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") + + minio = started_cluster.minio_client + objects = list( + minio.list_objects(started_cluster.minio_restricted_bucket, recursive=True) + ) + for obj in objects: + minio.remove_object(started_cluster.minio_restricted_bucket, obj.object_name) + yield # run test + + +MINIO_INTERNAL_PORT = 9001 +AVAILABLE_MODES = ["unordered", "ordered"] +AUTH = "'minio','minio123'," + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +def put_s3_file_content(started_cluster, bucket, filename, data): + buf = io.BytesIO(data) + started_cluster.minio_client.put_object(bucket, filename, buf, len(data)) + + +def generate_random_files( + count, prefix, cluster, bucket, column_num=3, row_num=10, start_ind=0 +): + total_values = [] + to_generate = [ + (f"{prefix}/test_{i}.csv", i) for i in range(start_ind, start_ind + count) + ] + to_generate.sort(key=lambda x: x[0]) + + for filename, i in to_generate: + rand_values = [ + [random.randint(0, 50) for _ in range(column_num)] for _ in range(row_num) + ] + total_values += rand_values + values_csv = ( + "\n".join((",".join(map(str, row)) for row in rand_values)) + "\n" + ).encode() + put_s3_file_content(cluster, bucket, filename, values_csv) + return total_values + + +# Returns content of given S3 file as string. +def get_s3_file_content(started_cluster, bucket, filename, decode=True): + # type: (ClickHouseCluster, str, str, bool) -> str + + data = started_cluster.minio_client.get_object(bucket, filename) + data_str = b"" + for chunk in data.stream(): + data_str += chunk + if decode: + return data_str.decode() + return data_str + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "instance", + user_configs=["configs/users.xml"], + with_minio=True, + with_zookeeper=True, + main_configs=["configs/defaultS3.xml", "configs/named_collections.xml"], + ) + cluster.add_instance( + "instance2", + user_configs=["configs/users.xml"], + with_minio=True, + with_zookeeper=True, + main_configs=["configs/defaultS3.xml", "configs/named_collections.xml"], + ) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + prepare_s3_bucket(cluster) + yield cluster + finally: + cluster.shutdown() + + +def run_query(instance, query, stdin=None, settings=None): + # type: (ClickHouseInstance, str, object, dict) -> str + + logging.info("Running query '{}'...".format(query)) + result = instance.query(query, stdin=stdin, settings=settings) + logging.info("Query finished") + + return result + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_delete_after_processing(started_cluster, mode): + prefix = "delete" + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + total_values = generate_random_files(5, prefix, started_cluster, bucket) + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + CREATE TABLE test.s3_queue ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_delete_{mode}', + s3queue_loading_retries = 3, + after_processing='delete'; + """ + ) + + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == total_values + minio = started_cluster.minio_client + objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) + assert len(objects) == 0 + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_failed_retry(started_cluster, mode): + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + values = [ + ["failed", 1, 1], + ] + values_csv = ( + "\n".join((",".join(map(str, row)) for row in values)) + "\n" + ).encode() + filename = f"test.csv" + put_s3_file_content(started_cluster, bucket, filename, values_csv) + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + CREATE TABLE test.s3_queue ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/select_failed_retry_{mode}', + s3queue_loading_retries = 3; + """ + ) + + # first try + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + # second try + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + # upload correct file + values = [ + [1, 1, 1], + ] + values_csv = ( + "\n".join((",".join(map(str, row)) for row in values)) + "\n" + ).encode() + put_s3_file_content(started_cluster, bucket, filename, values_csv) + + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == values + + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_direct_select_file(started_cluster, mode): + auth = "'minio','minio123'," + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + values = [ + [12549, 2463, 19893], + [64021, 38652, 66703], + [81611, 39650, 83516], + ] + values_csv = ( + "\n".join((",".join(map(str, row)) for row in values)) + "\n" + ).encode() + filename = f"test.csv" + put_s3_file_content(started_cluster, bucket, filename, values_csv) + instance.query( + """ + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.s3_queue_2; + DROP TABLE IF EXISTS test.s3_queue_3; + """ + ) + + instance.query( + f""" + CREATE TABLE test.s3_queue ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/*', {auth}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/select_{mode}' + """ + ) + + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == values + + instance.query( + f""" + CREATE TABLE test.s3_queue_2 ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/*', {auth}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/select_{mode}' + """ + ) + + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + # New table with same zookeeper path + get_query = f"SELECT * FROM test.s3_queue_2" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == [] + # New table with different zookeeper path + instance.query( + f""" + CREATE TABLE test.s3_queue_3 ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/*', {auth}'CSV') + SETTINGS + mode = '{mode}', + keeper_path='/clickhouse/select_{mode}_2' + """ + ) + get_query = f"SELECT * FROM test.s3_queue_3" + assert [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] == values + + values = [ + [1, 1, 1], + ] + values_csv = ( + "\n".join((",".join(map(str, row)) for row in values)) + "\n" + ).encode() + filename = f"t.csv" + put_s3_file_content(started_cluster, bucket, filename, values_csv) + + get_query = f"SELECT * FROM test.s3_queue_3" + if mode == "unordered": + assert [ + list(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + ] == values + elif mode == "ordered": + assert [ + list(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + ] == [] + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_direct_select_multiple_files(started_cluster, mode): + prefix = f"multiple_files_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + instance.query("drop table if exists test.s3_queue") + instance.query( + f""" + CREATE TABLE test.s3_queue ({table_format}) + ENGINE = S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/select_multiple_{mode}' + """ + ) + + for i in range(5): + rand_values = [[random.randint(0, 50) for _ in range(3)] for _ in range(10)] + + values_csv = ( + "\n".join((",".join(map(str, row)) for row in rand_values)) + "\n" + ).encode() + filename = f"{prefix}/test_{i}.csv" + put_s3_file_content(started_cluster, bucket, filename, values_csv) + + get_query = f"SELECT * FROM test.s3_queue" + assert [ + list(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + ] == rand_values + + total_values = generate_random_files( + 4, prefix, started_cluster, bucket, start_ind=5 + ) + get_query = f"SELECT * FROM test.s3_queue" + assert { + tuple(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + } == set([tuple(i) for i in total_values]) + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_streaming_to_view_(started_cluster, mode): + prefix = f"streaming_files_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + total_values = generate_random_files(10, prefix, started_cluster, bucket) + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue_persistent; + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv; + + CREATE TABLE test.s3_queue_persistent ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/view_{mode}'; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv TO test.s3_queue_persistent AS + SELECT + * + FROM test.s3_queue; + """ + ) + expected_values = set([tuple(i) for i in total_values]) + for i in range(10): + get_query = f"SELECT * FROM test.persistent_s3_queue_mv" + + selected_values = { + tuple(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + } + if selected_values != expected_values: + time.sleep(1) + else: + break + + assert selected_values == expected_values + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_streaming_to_many_views(started_cluster, mode): + prefix = f"streaming_files_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + retry_cnt = 10 + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue_persistent; + DROP TABLE IF EXISTS test.s3_queue_persistent_2; + DROP TABLE IF EXISTS test.s3_queue_persistent_3; + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv_2; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv_3; + + + CREATE TABLE test.s3_queue_persistent ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue_persistent_2 ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue_persistent_3 ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/multiple_view_{mode}'; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv TO test.s3_queue_persistent AS + SELECT + * + FROM test.s3_queue; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv_2 TO test.s3_queue_persistent_2 AS + SELECT + * + FROM test.s3_queue; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv_3 TO test.s3_queue_persistent_3 AS + SELECT + * + FROM test.s3_queue; + """ + ) + total_values = generate_random_files(5, prefix, started_cluster, bucket) + expected_values = set([tuple(i) for i in total_values]) + + for i in range(retry_cnt): + retry = False + for get_query in [ + f"SELECT * FROM test.s3_queue_persistent", + f"SELECT * FROM test.s3_queue_persistent_2", + f"SELECT * FROM test.s3_queue_persistent_3", + ]: + selected_values = { + tuple(map(int, l.split())) + for l in run_query(instance, get_query).splitlines() + } + if i == retry_cnt - 1: + assert selected_values == expected_values + if selected_values != expected_values: + retry = True + break + if retry: + time.sleep(1) + else: + break + + +def test_multiple_tables_meta_mismatch(started_cluster): + prefix = f"test_meta" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'ordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + # check mode + failed = False + try: + instance.query( + f""" + CREATE TABLE test.s3_queue_copy ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'unordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + except QueryRuntimeException as e: + assert "Existing table metadata in ZooKeeper differs in engine mode" in str(e) + failed = True + assert failed is True + + # check columns + table_format_copy = table_format + ", column4 UInt32" + try: + instance.query( + f""" + CREATE TABLE test.s3_queue_copy ({table_format_copy}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'ordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + except QueryRuntimeException as e: + assert ( + "Table columns structure in ZooKeeper is different from local table structure" + in str(e) + ) + failed = True + + assert failed is True + + # check format + try: + instance.query( + f""" + CREATE TABLE test.s3_queue_copy ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'TSV') + SETTINGS + mode = 'ordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + except QueryRuntimeException as e: + assert "Existing table metadata in ZooKeeper differs in format name" in str(e) + failed = True + assert failed is True + + # create working engine + instance.query( + f""" + CREATE TABLE test.s3_queue_copy ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'ordered', + keeper_path = '/clickhouse/test_meta'; + """ + ) + + +def test_max_set_age(started_cluster): + files_to_generate = 10 + max_age = 1 + prefix = f"test_multiple" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'unordered', + keeper_path = '/clickhouse/test_set_age', + s3queue_tracked_files_limit = 10, + s3queue_tracked_file_ttl_sec = {max_age}; + """ + ) + + total_values = generate_random_files( + files_to_generate, prefix, started_cluster, bucket, row_num=1 + ) + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == total_values + time.sleep(max_age + 1) + + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == total_values + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_multiple_tables_streaming_sync(started_cluster, mode): + files_to_generate = 300 + poll_size = 30 + prefix = f"test_multiple_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.s3_queue_copy; + DROP TABLE IF EXISTS test.s3_queue_copy_2; + + DROP TABLE IF EXISTS test.s3_queue_persistent; + DROP TABLE IF EXISTS test.s3_queue_persistent_copy; + DROP TABLE IF EXISTS test.s3_queue_persistent_copy_2; + + DROP TABLE IF EXISTS test.persistent_s3_queue_mv; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv_copy; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv_copy_2; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_multiple_consumers_sync_{mode}', + s3queue_polling_size = {poll_size}; + + CREATE TABLE test.s3_queue_copy ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_multiple_consumers_sync_{mode}', + s3queue_polling_size = {poll_size}; + + CREATE TABLE test.s3_queue_copy_2 ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_multiple_consumers_sync_{mode}', + s3queue_polling_size = {poll_size}; + + CREATE TABLE test.s3_queue_persistent ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue_persistent_copy ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE TABLE test.s3_queue_persistent_copy_2 ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv TO test.s3_queue_persistent AS + SELECT + * + FROM test.s3_queue; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv_copy TO test.s3_queue_persistent_copy AS + SELECT + * + FROM test.s3_queue_copy; + + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv_copy_2 TO test.s3_queue_persistent_copy_2 AS + SELECT + * + FROM test.s3_queue_copy_2; + """ + ) + total_values = generate_random_files( + files_to_generate, prefix, started_cluster, bucket, row_num=1 + ) + + def get_count(table_name): + return int(run_query(instance, f"SELECT count() FROM {table_name}")) + + for _ in range(100): + if ( + get_count("test.s3_queue_persistent") + + get_count("test.s3_queue_persistent_copy") + + get_count("test.s3_queue_persistent_copy_2") + ) == files_to_generate: + break + time.sleep(1) + + get_query = f"SELECT * FROM test.s3_queue_persistent" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + get_query_copy = f"SELECT * FROM test.s3_queue_persistent_copy" + res2 = [ + list(map(int, l.split())) + for l in run_query(instance, get_query_copy).splitlines() + ] + get_query_copy_2 = f"SELECT * FROM test.s3_queue_persistent_copy_2" + res3 = [ + list(map(int, l.split())) + for l in run_query(instance, get_query_copy_2).splitlines() + ] + assert {tuple(v) for v in res1 + res2 + res3} == set( + [tuple(i) for i in total_values] + ) + + # Checking that all files were processed only once + time.sleep(10) + assert ( + get_count("test.s3_queue_persistent") + + get_count("test.s3_queue_persistent_copy") + + get_count("test.s3_queue_persistent_copy_2") + ) == files_to_generate + + +@pytest.mark.parametrize("mode", AVAILABLE_MODES) +def test_multiple_tables_streaming_sync_distributed(started_cluster, mode): + files_to_generate = 100 + poll_size = 2 + prefix = f"test_multiple_{mode}" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + instance_2 = started_cluster.instances["instance2"] + + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + for inst in [instance, instance_2]: + inst.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + DROP TABLE IF EXISTS test.s3_queue_persistent; + DROP TABLE IF EXISTS test.persistent_s3_queue_mv; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = '{mode}', + keeper_path = '/clickhouse/test_multiple_consumers_{mode}', + s3queue_polling_size = {poll_size}; + + CREATE TABLE test.s3_queue_persistent ({table_format}) + ENGINE = MergeTree() + ORDER BY column1; + """ + ) + + for inst in [instance, instance_2]: + inst.query( + f""" + CREATE MATERIALIZED VIEW test.persistent_s3_queue_mv TO test.s3_queue_persistent AS + SELECT + * + FROM test.s3_queue; + """ + ) + + total_values = generate_random_files( + files_to_generate, prefix, started_cluster, bucket, row_num=1 + ) + + def get_count(node, table_name): + return int(run_query(node, f"SELECT count() FROM {table_name}")) + + for _ in range(150): + if ( + get_count(instance, "test.s3_queue_persistent") + + get_count(instance_2, "test.s3_queue_persistent") + ) == files_to_generate: + break + time.sleep(1) + + get_query = f"SELECT * FROM test.s3_queue_persistent" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + res2 = [ + list(map(int, l.split())) for l in run_query(instance_2, get_query).splitlines() + ] + + assert len(res1) + len(res2) == files_to_generate + + # Checking that all engines have made progress + assert len(res1) > 0 + assert len(res2) > 0 + + assert {tuple(v) for v in res1 + res2} == set([tuple(i) for i in total_values]) + + # Checking that all files were processed only once + time.sleep(10) + assert ( + get_count(instance, "test.s3_queue_persistent") + + get_count(instance_2, "test.s3_queue_persistent") + ) == files_to_generate + + +def test_max_set_size(started_cluster): + files_to_generate = 10 + prefix = f"test_multiple" + bucket = started_cluster.minio_restricted_bucket + instance = started_cluster.instances["instance"] + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + + instance.query( + f""" + DROP TABLE IF EXISTS test.s3_queue; + + CREATE TABLE test.s3_queue ({table_format}) + ENGINE=S3Queue('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{prefix}/*', {AUTH}'CSV') + SETTINGS + mode = 'unordered', + keeper_path = '/clickhouse/test_set_size', + s3queue_tracked_files_limit = {files_to_generate - 1}; + """ + ) + + total_values = generate_random_files( + files_to_generate, prefix, started_cluster, bucket, start_ind=0, row_num=1 + ) + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == total_values + + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == [total_values[0]] + + get_query = f"SELECT * FROM test.s3_queue" + res1 = [ + list(map(int, l.split())) for l in run_query(instance, get_query).splitlines() + ] + assert res1 == [total_values[1]] diff --git a/tests/integration/test_storage_url/test.py b/tests/integration/test_storage_url/test.py index 7f359078967..7ff7a871413 100644 --- a/tests/integration/test_storage_url/test.py +++ b/tests/integration/test_storage_url/test.py @@ -68,13 +68,13 @@ def test_url_cluster_with_named_collection(): def test_table_function_url_access_rights(): node1.query("CREATE USER OR REPLACE u1") - expected_error = "necessary to have grant CREATE TEMPORARY TABLE, URL ON *.*" + expected_error = "necessary to have the grant CREATE TEMPORARY TABLE, URL ON *.*" assert expected_error in node1.query_and_get_error( f"SELECT * FROM url('http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')", user="u1", ) - expected_error = "necessary to have grant CREATE TEMPORARY TABLE, URL ON *.*" + expected_error = "necessary to have the grant CREATE TEMPORARY TABLE, URL ON *.*" assert expected_error in node1.query_and_get_error( f"SELECT * FROM url('http://nginx:80/test_1', 'TSV')", user="u1" ) @@ -89,7 +89,7 @@ def test_table_function_url_access_rights(): user="u1", ) == TSV([["column1", "UInt32"], ["column2", "UInt32"], ["column3", "UInt32"]]) - expected_error = "necessary to have grant URL ON *.*" + expected_error = "necessary to have the grant URL ON *.*" assert expected_error in node1.query_and_get_error( f"DESCRIBE TABLE url('http://nginx:80/test_1', 'TSV')", user="u1" ) diff --git a/tests/integration/test_system_flush_logs/test.py b/tests/integration/test_system_flush_logs/test.py index d9ab76d2d61..084d342d736 100644 --- a/tests/integration/test_system_flush_logs/test.py +++ b/tests/integration/test_system_flush_logs/test.py @@ -2,11 +2,16 @@ # pylint: disable=unused-argument # pylint: disable=redefined-outer-name +import time import pytest from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node_default") +node = cluster.add_instance( + "node_default", + stay_alive=True, +) system_logs = [ # disabled by default @@ -40,7 +45,11 @@ def test_system_logs(flush_logs, table, exists): if exists: node.query(q) else: - assert "Table {} doesn't exist".format(table) in node.query_and_get_error(q) + response = node.query_and_get_error(q) + assert ( + "Table {} does not exist".format(table) in response + or "Unknown table expression identifier '{}'".format(table) in response + ) # Logic is tricky, let's check that there is no hang in case of message queue @@ -64,3 +73,95 @@ def test_system_suspend(): node.query("SYSTEM SUSPEND FOR 1 SECOND;") node.query("INSERT INTO t VALUES (now());") assert "1\n" == node.query("SELECT max(x) - min(x) >= 1 FROM t;") + + +def test_log_max_size(start_cluster): + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + + 1000000 + 10 + 10 + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """, + ] + ) + node.restart_clickhouse() + for i in range(10): + node.query(f"select {i}") + + assert node.query("select count() >= 10 from system.query_log") == "1\n" + node.exec_in_container( + ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] + ) + + +def test_log_buffer_size_rows_flush_threshold(start_cluster): + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + + 1000000 + 10 + 10000 + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """, + ] + ) + node.restart_clickhouse() + node.query(f"TRUNCATE TABLE IF EXISTS system.query_log") + for i in range(10): + node.query(f"select {i}") + + assert_eq_with_retry( + node, + f"select count() >= 11 from system.query_log", + "1", + sleep_time=0.2, + retry_count=100, + ) + + node.query(f"TRUNCATE TABLE IF EXISTS system.query_log") + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + + 1000000 + 10000 + 10000 + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """, + ] + ) + node.restart_clickhouse() + for i in range(10): + node.query(f"select {i}") + + # Logs aren't flushed + assert_eq_with_retry( + node, + f"select count() < 10 from system.query_log", + "1", + sleep_time=0.2, + retry_count=100, + ) + + node.exec_in_container( + ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] + ) diff --git a/tests/integration/test_system_logs/test_system_logs.py b/tests/integration/test_system_logs/test_system_logs.py index aac5ee53819..72249cd64ee 100644 --- a/tests/integration/test_system_logs/test_system_logs.py +++ b/tests/integration/test_system_logs/test_system_logs.py @@ -88,3 +88,53 @@ def test_system_logs_settings_expr(start_cluster): assert expected in node3.query( "SELECT engine_full FROM system.tables WHERE database='system' and name='query_log'" ) + + +def test_max_size_0(start_cluster): + node1.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + + 0 + 0 + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """, + ] + ) + with pytest.raises(Exception): + node1.restart_clickhouse() + + node1.exec_in_container( + ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] + ) + node1.restart_clickhouse() + + +def test_reserved_size_greater_max_size(start_cluster): + node1.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + + 10 + 11 + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """, + ] + ) + with pytest.raises(Exception): + node1.restart_clickhouse() + + node1.exec_in_container( + ["rm", f"/etc/clickhouse-server/config.d/yyy-override-query_log.xml"] + ) + node1.restart_clickhouse() diff --git a/tests/integration/test_system_start_stop_listen/__init__.py b/tests/integration/test_system_start_stop_listen/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_system_start_stop_listen/configs/cluster.xml b/tests/integration/test_system_start_stop_listen/configs/cluster.xml new file mode 100644 index 00000000000..34b6c32c6d0 --- /dev/null +++ b/tests/integration/test_system_start_stop_listen/configs/cluster.xml @@ -0,0 +1,16 @@ + + + + + + main_node + 9000 + + + backup_node + 9000 + + + + + diff --git a/tests/integration/test_system_start_stop_listen/configs/protocols.xml b/tests/integration/test_system_start_stop_listen/configs/protocols.xml new file mode 100644 index 00000000000..1d8608bcaca --- /dev/null +++ b/tests/integration/test_system_start_stop_listen/configs/protocols.xml @@ -0,0 +1,23 @@ + + 0.0.0.0 + + + 9000 + 8123 + 9004 + + + + + tcp + 0.0.0.0 + 9001 + native protocol (tcp) + + + http + 8124 + http protocol + + + diff --git a/tests/integration/test_system_start_stop_listen/test.py b/tests/integration/test_system_start_stop_listen/test.py new file mode 100644 index 00000000000..1925685af03 --- /dev/null +++ b/tests/integration/test_system_start_stop_listen/test.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 + + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.client import Client +import requests + +cluster = ClickHouseCluster(__file__) +main_node = cluster.add_instance( + "main_node", + main_configs=["configs/cluster.xml", "configs/protocols.xml"], + with_zookeeper=True, +) +backup_node = cluster.add_instance( + "backup_node", main_configs=["configs/cluster.xml"], with_zookeeper=True +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def http_works(port=8123): + try: + response = requests.post(f"http://{main_node.ip_address}:{port}/ping") + if response.status_code == 400: + return True + except: + pass + + return False + + +def assert_everything_works(): + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + main_node.query(QUERY) + main_node.query(MYSQL_QUERY) + custom_client.query(QUERY) + assert http_works() + assert http_works(8124) + + +QUERY = "SELECT 1" +MYSQL_QUERY = "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', 'default', '', SETTINGS connect_timeout = 100, connection_wait_timeout = 100)" + + +def test_default_protocols(started_cluster): + # TCP + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN TCP") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + backup_node.query("SYSTEM START LISTEN ON CLUSTER default TCP") + + # HTTP + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN HTTP") + assert http_works() == False + main_node.query("SYSTEM START LISTEN HTTP") + + # MySQL + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN MYSQL") + assert "Connections to mysql failed" in main_node.query_and_get_error(MYSQL_QUERY) + main_node.query("SYSTEM START LISTEN MYSQL") + + assert_everything_works() + + +def test_custom_protocols(started_cluster): + # TCP + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN CUSTOM 'tcp'") + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + main_node.query("SYSTEM START LISTEN CUSTOM 'tcp'") + + # HTTP + assert_everything_works() + main_node.query("SYSTEM STOP LISTEN CUSTOM 'http'") + assert http_works(8124) == False + main_node.query("SYSTEM START LISTEN CUSTOM 'http'") + + assert_everything_works() + + +def test_all_protocols(started_cluster): + custom_client = Client(main_node.ip_address, 9001, command=cluster.client_bin_path) + assert_everything_works() + + # STOP LISTEN QUERIES ALL + main_node.query("SYSTEM STOP LISTEN QUERIES ALL") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() == False + assert http_works(8124) == False + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + # STOP LISTEN QUERIES DEFAULT + assert_everything_works() + + main_node.query("SYSTEM STOP LISTEN QUERIES DEFAULT") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + custom_client.query(QUERY) + assert http_works() == False + assert http_works(8124) + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES DEFAULT") + + # STOP LISTEN QUERIES CUSTOM + assert_everything_works() + + main_node.query("SYSTEM STOP LISTEN QUERIES CUSTOM") + main_node.query(QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() + assert http_works(8124) == False + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES CUSTOM") + + # Disable all protocols, check first START LISTEN QUERIES DEFAULT then START LISTEN QUERIES CUSTOM + assert_everything_works() + + main_node.query("SYSTEM STOP LISTEN QUERIES ALL") + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES DEFAULT") + main_node.query(QUERY) + assert "Connection refused" in custom_client.query_and_get_error(QUERY) + assert http_works() + assert http_works(8124) == False + + main_node.query("SYSTEM STOP LISTEN QUERIES ALL") + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES CUSTOM") + assert "Connection refused" in main_node.query_and_get_error(QUERY) + custom_client.query(QUERY) + assert http_works() == False + assert http_works(8124) + + backup_node.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + assert_everything_works() diff --git a/tests/integration/test_table_functions_access_rights/test.py b/tests/integration/test_table_functions_access_rights/test.py index 09a05122c07..b1d1a291bc5 100644 --- a/tests/integration/test_table_functions_access_rights/test.py +++ b/tests/integration/test_table_functions_access_rights/test.py @@ -40,7 +40,7 @@ def test_merge(): instance.query("CREATE USER A") assert ( - "it's necessary to have grant CREATE TEMPORARY TABLE ON *.*" + "it's necessary to have the grant CREATE TEMPORARY TABLE ON *.*" in instance.query_and_get_error(select_query, user="A") ) @@ -62,7 +62,7 @@ def test_merge(): instance.query("GRANT SELECT ON default.table1 TO A") instance.query("GRANT INSERT ON default.table2 TO A") assert ( - "it's necessary to have grant SELECT ON default.table2" + "it's necessary to have the grant SELECT ON default.table2" in instance.query_and_get_error(select_query, user="A") ) diff --git a/tests/integration/test_temporary_data/__init__.py b/tests/integration/test_temporary_data/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_temporary_data/test.py b/tests/integration/test_temporary_data/test.py new file mode 100644 index 00000000000..9228da0698f --- /dev/null +++ b/tests/integration/test_temporary_data/test.py @@ -0,0 +1,57 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import pytest +import time + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_tmp_data_no_leftovers(start_cluster): + q = node.get_query_request + + settings = { + "max_bytes_before_external_group_by": "10K", + "max_bytes_before_external_sort": "10K", + "join_algorithm": "grace_hash", + "max_bytes_in_join": "10K", + "grace_hash_join_initial_buckets": "16", + } + + # Run some queries in the background to generate temporary data + q( + "SELECT ignore(*) FROM numbers(10 * 1024 * 1024) ORDER BY sipHash64(number)", + settings=settings, + ) + q("SELECT * FROM system.numbers GROUP BY ALL", settings=settings) + q( + "SELECT * FROM system.numbers as t1 JOIN system.numbers as t2 USING (number)", + settings=settings, + ) + + # Wait a bit to make sure the temporary data is written to disk + time.sleep(5) + + # Hard restart the node + node.restart_clickhouse(kill=True) + path_to_data = "/var/lib/clickhouse/" + + # Check that there are no temporary files left + result = node.exec_in_container(["ls", path_to_data + "tmp/"]) + assert result == "" diff --git a/tests/integration/test_temporary_data_in_cache/test.py b/tests/integration/test_temporary_data_in_cache/test.py index e6142c7eef1..ed06a70cf5a 100644 --- a/tests/integration/test_temporary_data_in_cache/test.py +++ b/tests/integration/test_temporary_data_in_cache/test.py @@ -2,6 +2,7 @@ # pylint: disable=redefined-outer-name import pytest +import fnmatch from helpers.cluster import ClickHouseCluster from helpers.client import QueryRuntimeException @@ -68,7 +69,9 @@ def test_cache_evicted_by_temporary_data(start_cluster): "max_bytes_before_external_sort": "4M", }, ) - assert "Failed to reserve space for the file cache" in str(exc.value) + assert fnmatch.fnmatch( + str(exc.value), "*Failed to reserve * for temporary file*" + ), exc.value # Some data evicted from cache by temporary data cache_size_after_eviction = get_cache_size() @@ -104,6 +107,8 @@ def test_cache_evicted_by_temporary_data(start_cluster): "SELECT randomPrintableASCII(1024) FROM numbers(32 * 1024) FORMAT TSV", params={"buffer_size": 0, "wait_end_of_query": 1}, ) - assert "Failed to reserve space for the file cache" in str(exc.value) + assert fnmatch.fnmatch( + str(exc.value), "*Failed to reserve * for temporary file*" + ), exc.value q("DROP TABLE IF EXISTS t1") diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py index ff8e7154d0d..a27bb472ea8 100644 --- a/tests/integration/test_throttling/test.py +++ b/tests/integration/test_throttling/test.py @@ -114,7 +114,11 @@ def node_update_config(mode, setting, value=None): def assert_took(took, should_took): - assert took >= should_took[0] * 0.9 and took < should_took[1] + # we need to decrease the lower limit because the server limits could + # be enforced by throttling some server background IO instead of query IO + # and we have no control over it + # Note that throttler does not apply any restrictions on upper bound, so we can only tell how much time required "at least", not "at most" + assert took >= should_took * 0.85 @pytest.mark.parametrize( @@ -129,7 +133,7 @@ def assert_took(took, should_took): None, None, None, - (0, 3), + 0, id="no_local_throttling", ), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds @@ -139,7 +143,7 @@ def assert_took(took, should_took): "user", "max_backup_bandwidth", "1M", - (7, 14), + 7, id="user_local_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -149,7 +153,7 @@ def assert_took(took, should_took): "server", "max_backup_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_local_throttling", ), # @@ -161,7 +165,7 @@ def assert_took(took, should_took): None, None, None, - (0, 3), + 0, id="no_remote_to_local_throttling", ), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds @@ -171,7 +175,7 @@ def assert_took(took, should_took): "user", "max_backup_bandwidth", "1M", - (7, 14), + 7, id="user_remote_to_local_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -181,7 +185,7 @@ def assert_took(took, should_took): "server", "max_backup_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_remote_to_local_throttling", ), # @@ -193,7 +197,7 @@ def assert_took(took, should_took): None, None, None, - (0, 3), + 0, id="no_remote_to_remote_throttling", ), # No throttling for S3-to-S3, uses native copy @@ -203,7 +207,7 @@ def assert_took(took, should_took): "user", "max_backup_bandwidth", "1M", - (0, 3), + 0, id="user_remote_to_remote_throttling", ), # No throttling for S3-to-S3, uses native copy @@ -213,7 +217,7 @@ def assert_took(took, should_took): "server", "max_backup_bandwidth_for_server", "2M", - (0, 3), + 0, id="server_remote_to_remote_throttling", ), # @@ -230,7 +234,7 @@ def assert_took(took, should_took): None, None, None, - (0, 3), + 0, id="no_local_to_remote_throttling", ), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds, but for S3Client it is 2x more @@ -240,7 +244,7 @@ def assert_took(took, should_took): "user", "max_backup_bandwidth", "1M", - (7 * 3, 7 * 4 - 1), + 7 * 3, id="user_local_to_remote_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds, but for S3Client it is 2x more @@ -250,7 +254,7 @@ def assert_took(took, should_took): "server", "max_backup_bandwidth_for_server", "2M", - (3 * 3, 3 * 5), + 3 * 3, id="server_local_to_remote_throttling", ), ], @@ -287,7 +291,7 @@ def test_backup_throttling_override(): }, ) # reading 1e6*8 bytes with 500Ki default bandwith should take (8-0.5)/0.5=15 seconds - assert_took(took, (15, 20)) + assert_took(took, 15) @pytest.mark.parametrize( @@ -296,14 +300,14 @@ def test_backup_throttling_override(): # # Local # - pytest.param("default", None, None, None, (0, 3), id="no_local_throttling"), + pytest.param("default", None, None, None, 0, id="no_local_throttling"), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds pytest.param( "default", "user", "max_local_read_bandwidth", "1M", - (7, 14), + 7, id="user_local_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -312,20 +316,20 @@ def test_backup_throttling_override(): "server", "max_local_read_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_local_throttling", ), # # Remote # - pytest.param("s3", None, None, None, (0, 3), id="no_remote_throttling"), + pytest.param("s3", None, None, None, 0, id="no_remote_throttling"), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds pytest.param( "s3", "user", "max_remote_read_network_bandwidth", "1M", - (7, 14), + 7, id="user_remote_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -334,7 +338,7 @@ def test_backup_throttling_override(): "server", "max_remote_read_network_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_remote_throttling", ), ], @@ -358,14 +362,14 @@ def test_read_throttling(policy, mode, setting, value, should_took): # # Local # - pytest.param("default", None, None, None, (0, 3), id="no_local_throttling"), + pytest.param("default", None, None, None, 0, id="no_local_throttling"), # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds pytest.param( "default", "user", "max_local_write_bandwidth", "1M", - (7, 14), + 7, id="local_user_throttling", ), # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds @@ -374,29 +378,29 @@ def test_read_throttling(policy, mode, setting, value, should_took): "server", "max_local_write_bandwidth_for_server", "2M", - (3, 7), + 3, id="local_server_throttling", ), # # Remote # - pytest.param("s3", None, None, None, (0, 3), id="no_remote_throttling"), - # writeing 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds + pytest.param("s3", None, None, None, 0, id="no_remote_throttling"), + # writing 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds pytest.param( "s3", "user", "max_remote_write_network_bandwidth", "1M", - (7, 14), + 7, id="user_remote_throttling", ), - # writeing 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds + # writing 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds pytest.param( "s3", "server", "max_remote_write_network_bandwidth_for_server", "2M", - (3, 7), + 3, id="server_remote_throttling", ), ], diff --git a/tests/integration/test_tlsv1_3/test.py b/tests/integration/test_tlsv1_3/test.py index f5c2be51ed7..87c03c56f91 100644 --- a/tests/integration/test_tlsv1_3/test.py +++ b/tests/integration/test_tlsv1_3/test.py @@ -96,7 +96,9 @@ def test_https_wrong_cert(): with pytest.raises(Exception) as err: execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning(f"Failed attempt with wrong cert, err: {err_str}") continue @@ -202,7 +204,9 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: peter, err: {err_str}" @@ -222,7 +226,9 @@ def test_https_non_ssl_auth(): cert_name="wrong", ) err_str = str(err.value) - if count < MAX_RETRY and "Broken pipe" in err_str: + if count < MAX_RETRY and ( + ("Broken pipe" in err_str) or ("EOF occurred" in err_str) + ): count = count + 1 logging.warning( f"Failed attempt with wrong cert, user: jane, err: {err_str}" diff --git a/tests/integration/test_transactions/test.py b/tests/integration/test_transactions/test.py index a12d30915dd..46660581223 100644 --- a/tests/integration/test_transactions/test.py +++ b/tests/integration/test_transactions/test.py @@ -105,8 +105,6 @@ def test_rollback_unfinished_on_restart1(start_cluster): "0_4_4_0_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n" "0_8_8_0\t0\ttid5\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_1_1_0\t0\ttid0\tcsn1_\ttid1\tcsn_1\n" - "1_1_1_1\t1\ttid1\tcsn_1\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" - "1_1_1_1_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_3_3_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" "1_3_3_0_7\t0\ttid3\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_5_5_0\t1\ttid6\tcsn_6\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" @@ -194,6 +192,5 @@ def test_rollback_unfinished_on_restart2(start_cluster): "0_4_4_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" "0_5_5_0\t0\ttid5\tcsn18446744073709551615_\ttid0\tcsn0_\n" "1_1_1_0\t0\ttid0\tcsn1_\ttid1\tcsn_1\n" - "1_1_1_1\t1\ttid1\tcsn_1\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" "1_3_3_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" ) diff --git a/tests/integration/test_truncate_database/__init__.py b/tests/integration/test_truncate_database/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_truncate_database/configs/distributed_servers.xml b/tests/integration/test_truncate_database/configs/distributed_servers.xml new file mode 100644 index 00000000000..68b420f36b4 --- /dev/null +++ b/tests/integration/test_truncate_database/configs/distributed_servers.xml @@ -0,0 +1,18 @@ + + + + + + node1 + 9000 + + + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_truncate_database/configs/replicated_servers.xml b/tests/integration/test_truncate_database/configs/replicated_servers.xml new file mode 100644 index 00000000000..8e318d385c3 --- /dev/null +++ b/tests/integration/test_truncate_database/configs/replicated_servers.xml @@ -0,0 +1,21 @@ + + + + + true + + node1 + 9000 + + + node2 + 9000 + + + node3 + 9000 + + + + + diff --git a/tests/integration/test_truncate_database/test_distributed.py b/tests/integration/test_truncate_database/test_distributed.py new file mode 100644 index 00000000000..fed3d16d190 --- /dev/null +++ b/tests/integration/test_truncate_database/test_distributed.py @@ -0,0 +1,53 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", main_configs=["configs/distributed_servers.xml"], with_zookeeper=True +) +node2 = cluster.add_instance( + "node2", with_zookeeper=True, main_configs=["configs/distributed_servers.xml"] +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + for node in (node1, node2): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.local_table(id UInt32, val String) ENGINE = MergeTree ORDER BY id + """ + ) + + node1.query("INSERT INTO test.local_table VALUES (1, 'node1')") + node2.query("INSERT INTO test.local_table VALUES (2, 'node2')") + + node1.query( + "CREATE TABLE test.distributed(id UInt32, val String) ENGINE = Distributed(test_cluster, test, local_table)" + ) + node2.query( + "CREATE TABLE test.distributed(id UInt32, val String) ENGINE = Distributed(test_cluster, test, local_table)" + ) + + yield cluster + + finally: + cluster.shutdown() + + +def test_truncate_database_distributed(started_cluster): + query1 = "SELECT count() FROM test.distributed WHERE (id, val) IN ((1, 'node1'), (2, 'a'), (3, 'b'))" + query2 = "SELECT sum((id, val) IN ((1, 'node1'), (2, 'a'), (3, 'b'))) FROM test.distributed" + assert node1.query(query1) == "1\n" + assert node1.query(query2) == "1\n" + assert node2.query(query1) == "1\n" + assert node2.query(query2) == "1\n" + assert node2.query("SHOW DATABASES LIKE 'test'") == "test\n" + node1.query("TRUNCATE DATABASE test ON CLUSTER test_cluster SYNC") + assert node2.query("SHOW TABLES FROM test") == "" diff --git a/tests/integration/test_truncate_database/test_replicated.py b/tests/integration/test_truncate_database/test_replicated.py new file mode 100644 index 00000000000..59830d44378 --- /dev/null +++ b/tests/integration/test_truncate_database/test_replicated.py @@ -0,0 +1,56 @@ +import time + +import pytest +from helpers.cluster import ClickHouseCluster + + +def fill_nodes(nodes, shard): + for node in nodes: + node.query( + """ + CREATE DATABASE test; + + CREATE TABLE test.test_table(date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{shard}/replicated/test_table', '{replica}') ORDER BY id PARTITION BY toYYYYMM(date); + """.format( + shard=shard, replica=node.name + ) + ) + + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", with_zookeeper=True, main_configs=["configs/replicated_servers.xml"] +) +node2 = cluster.add_instance( + "node2", with_zookeeper=True, main_configs=["configs/replicated_servers.xml"] +) +node3 = cluster.add_instance( + "node3", with_zookeeper=True, main_configs=["configs/replicated_servers.xml"] +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + fill_nodes([node1, node2, node3], 1) + + yield cluster + + except Exception as ex: + print(ex) + + finally: + cluster.shutdown() + + +def test_truncate_database_replicated(start_cluster): + node1.query( + "INSERT INTO test.test_table SELECT number, toString(number) FROM numbers(100)" + ) + assert node2.query("SELECT id FROM test.test_table LIMIT 1") == "0\n" + assert node3.query("SHOW DATABASES LIKE 'test'") == "test\n" + node3.query("TRUNCATE DATABASE test ON CLUSTER test_cluster SYNC") + assert node2.query("SHOW TABLES FROM test") == "" diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index a2f28e21666..c1c076277bb 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -1529,106 +1529,6 @@ def test_concurrent_alter_with_ttl_move(started_cluster, name, engine): node1.query("DROP TABLE IF EXISTS {name} SYNC".format(name=name)) -@pytest.mark.skip(reason="Flacky test") -@pytest.mark.parametrize( - "name,positive", - [ - pytest.param("test_double_move_while_select_negative", 0, id="negative"), - pytest.param("test_double_move_while_select_positive", 1, id="positive"), - ], -) -def test_double_move_while_select(started_cluster, name, positive): - name = unique_table_name(name) - - try: - node1.query( - """ - CREATE TABLE {name} ( - n Int64, - s String - ) ENGINE = MergeTree - ORDER BY tuple() - PARTITION BY n - SETTINGS storage_policy='small_jbod_with_external',temporary_directories_lifetime=1 - """.format( - name=name - ) - ) - - node1.query( - "INSERT INTO {name} VALUES (1, randomPrintableASCII(10*1024*1024))".format( - name=name - ) - ) - - parts = node1.query( - "SELECT name FROM system.parts WHERE table = '{name}' AND active = 1".format( - name=name - ) - ).splitlines() - assert len(parts) == 1 - - node1.query( - "ALTER TABLE {name} MOVE PART '{part}' TO DISK 'external'".format( - name=name, part=parts[0] - ) - ) - - def long_select(): - if positive: - node1.query( - "SELECT sleep(3), sleep(2), sleep(1), n FROM {name}".format( - name=name - ) - ) - - thread = threading.Thread(target=long_select) - thread.start() - - time.sleep(1) - - node1.query( - "ALTER TABLE {name} MOVE PART '{part}' TO DISK 'jbod1'".format( - name=name, part=parts[0] - ) - ) - - # Fill jbod1 to force ClickHouse to make move of partition 1 to external. - node1.query( - "INSERT INTO {name} VALUES (2, randomPrintableASCII(9*1024*1024))".format( - name=name - ) - ) - node1.query( - "INSERT INTO {name} VALUES (3, randomPrintableASCII(9*1024*1024))".format( - name=name - ) - ) - node1.query( - "INSERT INTO {name} VALUES (4, randomPrintableASCII(9*1024*1024))".format( - name=name - ) - ) - - wait_parts_mover(node1, name, retry_count=40) - - # If SELECT locked old part on external, move shall fail. - assert node1.query( - "SELECT disk_name FROM system.parts WHERE table = '{name}' AND active = 1 AND name = '{part}'".format( - name=name, part=parts[0] - ) - ).splitlines() == ["jbod1" if positive else "external"] - - thread.join() - - assert node1.query( - "SELECT n FROM {name} ORDER BY n".format(name=name) - ).splitlines() == ["1", "2", "3", "4"] - - finally: - node1.query("DROP TABLE IF EXISTS {name} SYNC".format(name=name)) - - @pytest.mark.parametrize( "name,engine,positive", [ diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index 3b031569b8a..117ebe37dd2 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -23,6 +23,7 @@ node4 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) node5 = cluster.add_instance( @@ -35,6 +36,7 @@ node5 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) node6 = cluster.add_instance( "node6", @@ -46,6 +48,7 @@ node6 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) diff --git a/tests/integration/test_user_directories/test.py b/tests/integration/test_user_directories/test.py index 45afb86f464..704fb30b2fd 100644 --- a/tests/integration/test_user_directories/test.py +++ b/tests/integration/test_user_directories/test.py @@ -38,14 +38,14 @@ def test_old_style(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users2.xml"}', 1, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access2\\\\/"}', 2, ], @@ -62,20 +62,20 @@ def test_local_directories(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users3.xml"}', 1, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access3\\\\/"}', 2, ], [ "local directory (ro)", - "local directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access3-ro\\\\/","readonly":true}', 3, ], @@ -92,8 +92,8 @@ def test_relative_path(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users4.xml"}', 1, ] @@ -110,8 +110,8 @@ def test_memory(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users5.xml"}', 1, ], @@ -129,20 +129,20 @@ def test_mixed_style(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users6.xml"}', 1, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access6\\\\/"}', 2, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access6a\\\\/"}', 3, ], @@ -160,14 +160,14 @@ def test_duplicates(): assert node.query("SELECT * FROM system.user_directories") == TSV( [ [ - "users.xml", - "users.xml", + "users_xml", + "users_xml", '{"path":"\\\\/etc\\\\/clickhouse-server\\\\/users7.xml"}', 1, ], [ - "local directory", - "local directory", + "local_directory", + "local_directory", '{"path":"\\\\/var\\\\/lib\\\\/clickhouse\\\\/access7\\\\/"}', 2, ], diff --git a/tests/integration/test_version_update/test.py b/tests/integration/test_version_update/test.py index 3332fe69e86..b8fa3e7ebb4 100644 --- a/tests/integration/test_version_update/test.py +++ b/tests/integration/test_version_update/test.py @@ -15,6 +15,7 @@ node2 = cluster.add_instance( tag="21.2", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) # Use differents nodes because if there is node.restart_from_latest_version(), then in later tests @@ -25,6 +26,7 @@ node3 = cluster.add_instance( tag="21.5", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) node4 = cluster.add_instance( "node4", @@ -32,6 +34,7 @@ node4 = cluster.add_instance( tag="21.5", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) node5 = cluster.add_instance( "node5", @@ -39,6 +42,7 @@ node5 = cluster.add_instance( tag="21.5", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) node6 = cluster.add_instance( "node6", @@ -46,6 +50,7 @@ node6 = cluster.add_instance( tag="21.5", with_installed_binary=True, stay_alive=True, + allow_analyzer=False, ) diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index c80205d48c1..f3ae190ee46 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -16,6 +16,7 @@ node1 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) node2 = cluster.add_instance( "node2", @@ -27,6 +28,7 @@ node2 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) node3 = cluster.add_instance( "node3", @@ -38,6 +40,7 @@ node3 = cluster.add_instance( main_configs=[ "configs/compat.xml", ], + allow_analyzer=False, ) @@ -67,6 +70,8 @@ def test_mutate_and_upgrade(start_cluster): node2.query("DETACH TABLE mt") # stop being leader node1.query("DETACH TABLE mt") # stop being leader + node1.query("SYSTEM FLUSH LOGS") + node2.query("SYSTEM FLUSH LOGS") node1.restart_with_latest_version(signal=9, fix_metadata=True) node2.restart_with_latest_version(signal=9, fix_metadata=True) diff --git a/tests/integration/test_wrong_db_or_table_name/__init__.py b/tests/integration/test_wrong_db_or_table_name/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_wrong_db_or_table_name/test.py b/tests/integration/test_wrong_db_or_table_name/test.py new file mode 100644 index 00000000000..68af383b6c3 --- /dev/null +++ b/tests/integration/test_wrong_db_or_table_name/test.py @@ -0,0 +1,108 @@ +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node") + + +@pytest.fixture(scope="module") +def start(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_wrong_database_name(start): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.table_test (i Int64) ENGINE=Memory; + INSERT INTO test.table_test SELECT 1; + """ + ) + + with pytest.raises( + QueryRuntimeException, + match="DB::Exception: Database tes does not exist. Maybe you meant test?.", + ): + node.query("SELECT * FROM tes.table_test LIMIT 1;") + assert int(node.query("SELECT count() FROM test.table_test;")) == 1 + node.query( + """ + DROP TABLE test.table_test; + DROP DATABASE test; + """ + ) + + +def test_drop_wrong_database_name(start): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.table_test (i Int64) ENGINE=Memory; + INSERT INTO test.table_test SELECT 1; + """ + ) + + with pytest.raises( + QueryRuntimeException, + match="DB::Exception: Database tes does not exist. Maybe you meant test?.", + ): + node.query("DROP DATABASE tes;") + assert int(node.query("SELECT count() FROM test.table_test;")) == 1 + node.query("DROP DATABASE test;") + + +def test_wrong_table_name(start): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.table_test (i Int64) ENGINE=Memory; + CREATE TABLE test.table_test2 (i Int64) ENGINE=Memory; + INSERT INTO test.table_test SELECT 1; + """ + ) + with pytest.raises( + QueryRuntimeException, + match="DB::Exception: Table test.table_test1 does not exist. Maybe you meant table_test?.", + ): + node.query( + """ + SELECT * FROM test.table_test1 LIMIT 1; + """ + ) + assert int(node.query("SELECT count() FROM test.table_test;")) == 1 + node.query( + """ + DROP TABLE test.table_test; + DROP TABLE test.table_test2; + DROP DATABASE test; + """ + ) + + +def test_drop_wrong_table_name(start): + node.query( + """ + CREATE DATABASE test; + CREATE TABLE test.table_test (i Int64) ENGINE=Memory; + INSERT INTO test.table_test SELECT 1; + """ + ) + + with pytest.raises( + QueryRuntimeException, + match="DB::Exception: Table test.table_tes does not exist. Maybe you meant table_test?.", + ): + node.query("DROP TABLE test.table_tes;") + assert int(node.query("SELECT count() FROM test.table_test;")) == 1 + node.query( + """ + DROP TABLE test.table_test; + DROP DATABASE test; + """ + ) diff --git a/tests/integration/test_zero_copy_fetch/configs/users.xml b/tests/integration/test_zero_copy_fetch/configs/users.xml new file mode 100644 index 00000000000..b0990ca3a60 --- /dev/null +++ b/tests/integration/test_zero_copy_fetch/configs/users.xml @@ -0,0 +1,7 @@ + + + + 0 + + + diff --git a/tests/integration/test_zero_copy_fetch/test.py b/tests/integration/test_zero_copy_fetch/test.py index 4f3d42096c3..dc79e5d8723 100644 --- a/tests/integration/test_zero_copy_fetch/test.py +++ b/tests/integration/test_zero_copy_fetch/test.py @@ -19,12 +19,14 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/storage_conf.xml"], + user_configs=["configs/users.xml"], with_minio=True, with_zookeeper=True, ) cluster.add_instance( "node2", main_configs=["configs/storage_conf.xml"], + user_configs=["configs/users.xml"], with_minio=True, with_zookeeper=True, ) diff --git a/tests/integration/test_zookeeper_config/test.py b/tests/integration/test_zookeeper_config/test.py index 65f82c2286b..0c0f77ec597 100644 --- a/tests/integration/test_zookeeper_config/test.py +++ b/tests/integration/test_zookeeper_config/test.py @@ -2,6 +2,7 @@ import time import pytest import logging from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster( __file__, zookeeper_config_path="configs/zookeeper_config_root_a.xml" @@ -56,10 +57,10 @@ def test_chroot_with_same_root(started_cluster): for j in range(2): # Second insert to test deduplication node.query("INSERT INTO simple VALUES ({0}, {0})".format(i)) - time.sleep(1) - - assert node1.query("select count() from simple").strip() == "2" - assert node2.query("select count() from simple").strip() == "2" + node1.query("SYSTEM SYNC REPLICA simple") + assert_eq_with_retry(node1, "select count() from simple", "2") + node2.query("SYSTEM SYNC REPLICA simple") + assert_eq_with_retry(node2, "select count() from simple", "2") def test_chroot_with_different_root(started_cluster): @@ -76,5 +77,7 @@ def test_chroot_with_different_root(started_cluster): for j in range(2): # Second insert to test deduplication node.query("INSERT INTO simple_different VALUES ({0}, {0})".format(i)) - assert node1.query("select count() from simple_different").strip() == "1" - assert node3.query("select count() from simple_different").strip() == "1" + node1.query("SYSTEM SYNC REPLICA simple_different") + assert_eq_with_retry(node1, "select count() from simple_different", "1") + node3.query("SYSTEM SYNC REPLICA simple_different") + assert_eq_with_retry(node3, "select count() from simple_different", "1") diff --git a/tests/integration/test_zookeeper_fallback_session/__init__.py b/tests/integration/test_zookeeper_fallback_session/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_zookeeper_fallback_session/configs/remote_servers.xml b/tests/integration/test_zookeeper_fallback_session/configs/remote_servers.xml new file mode 100644 index 00000000000..63fdcea5dab --- /dev/null +++ b/tests/integration/test_zookeeper_fallback_session/configs/remote_servers.xml @@ -0,0 +1,23 @@ + + + + + + node1 + 9000 + + + + node2 + 9000 + + + + node3 + 9000 + + + + + + diff --git a/tests/integration/test_zookeeper_fallback_session/configs/zookeeper_load_balancing.xml b/tests/integration/test_zookeeper_fallback_session/configs/zookeeper_load_balancing.xml new file mode 100644 index 00000000000..b0844ab4e73 --- /dev/null +++ b/tests/integration/test_zookeeper_fallback_session/configs/zookeeper_load_balancing.xml @@ -0,0 +1,23 @@ + + + + in_order + + 2 + 4 + + + zoo1 + 2181 + + + zoo2 + 2181 + + + zoo3 + 2181 + + 500 + + diff --git a/tests/integration/test_zookeeper_fallback_session/test.py b/tests/integration/test_zookeeper_fallback_session/test.py new file mode 100644 index 00000000000..570eca4f0a6 --- /dev/null +++ b/tests/integration/test_zookeeper_fallback_session/test.py @@ -0,0 +1,101 @@ +import pytest +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +from helpers.network import PartitionManager + + +cluster = ClickHouseCluster( + __file__, zookeeper_config_path="configs/zookeeper_load_balancing.xml" +) + +node1 = cluster.add_instance( + "node1", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_load_balancing.xml"], +) +node2 = cluster.add_instance( + "node2", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_load_balancing.xml"], +) +node3 = cluster.add_instance( + "node3", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_load_balancing.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + for node in [node1, node2, node3]: + node.query("DROP TABLE IF EXISTS simple SYNC") + node.query( + """ + CREATE TABLE simple (date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}') PARTITION BY toYYYYMM(date) ORDER BY id; + """.format( + replica=node.name + ) + ) + yield cluster + finally: + cluster.shutdown() + + +def assert_uses_zk_node(node: ClickHouseInstance, zk_node): + def check_callback(host): + return host.strip() == expected_zk_ip_addr + + expected_zk_ip_addr = node.cluster.get_instance_ip(zk_node) + + host = node.query_with_retry( + "select host from system.zookeeper_connection", check_callback=check_callback + ) + assert host.strip() == expected_zk_ip_addr + + +def test_fallback_session(started_cluster: ClickHouseCluster): + # only leave connecting to zoo3 possible + with PartitionManager() as pm: + for node in started_cluster.instances.values(): + for zk in ["zoo1", "zoo2"]: + pm._add_rule( + { + "source": node.ip_address, + "destination": cluster.get_instance_ip(zk), + "action": "REJECT --reject-with tcp-reset", + } + ) + + for node in [node1, node2, node3]: + # all nodes will have to switch to zoo3 + assert_uses_zk_node(node, "zoo3") + + node1.query_with_retry("INSERT INTO simple VALUES ({0}, {0})".format(1)) + + # and replication still works + for node in [node2, node3]: + assert ( + node.query_with_retry( + "SELECT count() from simple", + check_callback=lambda count: count.strip() == "1", + ) + == "1\n" + ) + + # at this point network partitioning has been reverted. + # the nodes should switch to zoo1 automatically because of `in_order` load-balancing. + # otherwise they would connect to a random replica + for node in [node1, node2, node3]: + assert_uses_zk_node(node, "zoo1") + + node1.query_with_retry("INSERT INTO simple VALUES ({0}, {0})".format(2)) + for node in [node2, node3]: + assert ( + node.query_with_retry( + "SELECT count() from simple", + check_callback=lambda count: count.strip() == "2", + ) + == "2\n" + ) diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml new file mode 100644 index 00000000000..91763c69bb9 --- /dev/null +++ b/tests/performance/aggregate_with_serialized_method.xml @@ -0,0 +1,32 @@ + + + 8 + 0 + 4 + + + + CREATE TABLE t_nullable + ( + key_string1 Nullable(String), + key_string2 Nullable(String), + key_string3 Nullable(String), + key_int64_1 Nullable(Int64), + key_int64_2 Nullable(Int64), + key_int64_3 Nullable(Int64), + key_int64_4 Nullable(Int64), + key_int64_5 Nullable(Int64), + m1 Int64, + m2 Int64 + ) + ENGINE = Memory + + insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%10+1, number%10+2, number%10+3, number%10+4,number%10+5, number%6000+1, number%5000+2 from numbers_mt(20000000) + select key_string1,key_string2,key_string3, min(m1) from t_nullable group by key_string1,key_string2,key_string3 + select key_string3,key_int64_1,key_int64_2, min(m1) from t_nullable group by key_string3,key_int64_1,key_int64_2 + select key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5, min(m1) from t_nullable group by key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5 + select toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5), min(m1) from t_nullable group by toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5) limit 10 + select toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3), min(m1) from t_nullable group by toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3) limit 10 + + drop table if exists t_nullable + \ No newline at end of file diff --git a/tests/performance/duplicate_order_by_and_distinct.xml b/tests/performance/duplicate_order_by_and_distinct.xml deleted file mode 100644 index e36bc470512..00000000000 --- a/tests/performance/duplicate_order_by_and_distinct.xml +++ /dev/null @@ -1,8 +0,0 @@ - - 1 - - - SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY EventDate, CounterID FORMAT Null - SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single) FORMAT Null - SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY toStartOfWeek(EventDate) FORMAT Null - diff --git a/tests/performance/join_filter_pushdown.xml b/tests/performance/join_filter_pushdown.xml new file mode 100644 index 00000000000..3adbbb3029e --- /dev/null +++ b/tests/performance/join_filter_pushdown.xml @@ -0,0 +1,9 @@ + + create table t(a UInt64) engine=MergeTree order by tuple() + insert into t select * from numbers_mt(5e6) + + select * from t as t0 inner join t as t1 using(a) where t1.a = 100 + + drop table t + + diff --git a/tests/performance/prepare_hash_before_merge.xml b/tests/performance/prepare_hash_before_merge.xml new file mode 100644 index 00000000000..e99f762927f --- /dev/null +++ b/tests/performance/prepare_hash_before_merge.xml @@ -0,0 +1,4 @@ + + SELECT COUNT(DISTINCT Title) FROM test.hits SETTINGS max_threads = 24 + SELECT COUNT(DISTINCT Referer) FROM test.hits SETTINGS max_threads = 22 + diff --git a/tests/performance/re2_regex_caching.xml b/tests/performance/re2_regex_caching.xml index 6edc83097ba..9778a8d4c0c 100644 --- a/tests/performance/re2_regex_caching.xml +++ b/tests/performance/re2_regex_caching.xml @@ -24,8 +24,8 @@ '.*' || toString(number) || '.' '.*' || toString(number % 10) || '.' - - '([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number) + + '([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number % 10)