diff --git a/.github/workflows/anchore-analysis.yml b/.github/workflows/anchore-analysis.yml index 1005c8f6c38..9f3f944c696 100644 --- a/.github/workflows/anchore-analysis.yml +++ b/.github/workflows/anchore-analysis.yml @@ -8,6 +8,10 @@ name: Docker Container Scan (clickhouse-server) +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + "on": pull_request: paths: diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml index 284af965714..05cfc6a9405 100644 --- a/.github/workflows/backport.yml +++ b/.github/workflows/backport.yml @@ -1,4 +1,9 @@ name: CherryPick + +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + concurrency: group: cherry-pick on: # yamllint disable-line rule:truthy @@ -9,10 +14,13 @@ jobs: runs-on: [self-hosted, style-checker] steps: - name: Set envs + # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings run: | cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/cherry_pick - ROBOT_CLICKHOUSE_SSH_KEY=${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}} + ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/keeper_jepsen + REPO_COPY=${{runner.temp}}/keeper_jepsen/ClickHouse + EOF + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Jepsen Test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 keeper_jepsen_check.py + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index acd365aea9a..c42513ff9a8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,4 +1,9 @@ name: CIGithubActions + +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + on: # yamllint disable-line rule:truthy pull_request: types: @@ -231,6 +236,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -275,6 +281,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -319,6 +326,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -329,6 +337,48 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH $CACHES_PATH + BuilderDebAarch64: + needs: [DockerHubPush, FastTest] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + CHECK_NAME=ClickHouse build check (actions) + BUILD_NAME=package_aarch64 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'true' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH $CACHES_PATH BuilderDebAsan: needs: [DockerHubPush, FastTest] runs-on: [self-hosted, builder] @@ -363,6 +413,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -407,6 +458,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -451,6 +503,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -495,6 +548,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -539,6 +593,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -586,6 +641,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -630,6 +686,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -674,6 +731,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -718,6 +776,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -762,6 +821,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -806,6 +866,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -850,6 +911,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -867,13 +929,14 @@ jobs: needs: - BuilderDebRelease - BuilderBinRelease + - BuilderDebAarch64 - BuilderDebAsan - BuilderDebTsan - BuilderDebUBsan - BuilderDebMsan - BuilderDebDebug runs-on: [self-hosted, style-checker] - if: always() + if: ${{ success() || failure() }} steps: - name: Set envs run: | @@ -913,7 +976,7 @@ jobs: - BuilderBinDarwinAarch64 - BuilderBinPPC64 runs-on: [self-hosted, style-checker] - if: always() + if: ${{ success() || failure() }} steps: - name: Set envs run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 4a756c5e0e9..c2ed39224aa 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -1,4 +1,9 @@ name: MasterCI + +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + on: # yamllint disable-line rule:truthy push: branches: @@ -152,6 +157,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -196,6 +202,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -241,6 +248,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -285,6 +293,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -329,6 +338,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -373,6 +383,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -417,6 +428,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -461,6 +473,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -509,6 +522,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -554,6 +568,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -599,6 +614,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -644,6 +660,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -689,6 +706,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -734,6 +752,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} @@ -779,6 +798,7 @@ jobs: cp -r $GITHUB_WORKSPACE $TEMP_PATH cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: name: ${{ env.BUILD_NAME }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 977dbf07dbe..1212bddb4a5 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,4 +1,9 @@ name: DocsReleaseChecks + +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + concurrency: group: master-release cancel-in-progress: true @@ -35,6 +40,17 @@ jobs: needs: DockerHubPush runs-on: [self-hosted, func-tester] steps: + - name: Set envs + # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/docs_release + REPO_COPY=${{runner.temp}}/docs_release/ClickHouse + CLOUDFLARE_TOKEN=${{secrets.CLOUDFLARE}} + ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (address, actions) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=2 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] steps: @@ -449,6 +499,82 @@ jobs: CHECK_NAME=Stateless tests (thread, actions) REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (thread, actions) + REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (thread, actions) + REPO_COPY=${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -507,7 +633,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan: + FunctionalStatelessTestMsan0: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] steps: @@ -519,6 +645,8 @@ jobs: CHECK_NAME=Stateless tests (memory, actions) REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -542,7 +670,81 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug: + FunctionalStatelessTestMsan1: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_memory + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (memory, actions) + REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestMsan2: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_memory + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (memory, actions) + REPO_COPY=${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug0: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] steps: @@ -554,6 +756,82 @@ jobs: CHECK_NAME=Stateless tests (debug, actions) REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug1: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug, actions) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug2: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (debug, actions) + REPO_COPY=${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT=10800 + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -970,8 +1248,8 @@ jobs: ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# - IntegrationTestsAsan: - needs: [BuilderDebAsan, FunctionalStatelessTestAsan] + IntegrationTestsAsan0: + needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -981,6 +1259,8 @@ jobs: REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=Integration tests (asan, actions) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=3 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1004,8 +1284,80 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsTsan: - needs: [BuilderDebTsan, FunctionalStatelessTestTsan] + IntegrationTestsAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, actions) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, actions) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=3 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan0: + needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -1015,6 +1367,8 @@ jobs: REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=Integration tests (thread, actions) REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=4 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1038,8 +1392,116 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsRelease: - needs: [BuilderDebRelease, FunctionalStatelessTestRelease] + IntegrationTestsTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (thread, actions) + REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (thread, actions) + REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan3: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_tsan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (thread, actions) + REPO_COPY=${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=4 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease0: + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Set envs @@ -1049,6 +1511,44 @@ jobs: REPORTS_PATH=${{runner.temp}}/reports_dir CHECK_NAME=Integration tests (release, actions) REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=2 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease1: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (release, actions) + REPO_COPY=${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=2 EOF - name: Download json reports uses: actions/download-artifact@v2 @@ -1076,11 +1576,18 @@ jobs: needs: - DockerHubPush - BuilderReport - - FunctionalStatelessTestDebug + - FunctionalStatelessTestDebug0 + - FunctionalStatelessTestDebug1 + - FunctionalStatelessTestDebug2 - FunctionalStatelessTestRelease - - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan - - FunctionalStatelessTestMsan + - FunctionalStatelessTestAsan0 + - FunctionalStatelessTestAsan1 + - FunctionalStatelessTestTsan0 + - FunctionalStatelessTestTsan1 + - FunctionalStatelessTestTsan2 + - FunctionalStatelessTestMsan0 + - FunctionalStatelessTestMsan1 + - FunctionalStatelessTestMsan2 - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease @@ -1093,9 +1600,15 @@ jobs: - StressTestTsan - StressTestMsan - StressTestUBsan - - IntegrationTestsAsan - - IntegrationTestsRelease - - IntegrationTestsTsan + - IntegrationTestsAsan0 + - IntegrationTestsAsan1 + - IntegrationTestsAsan2 + - IntegrationTestsRelease0 + - IntegrationTestsRelease1 + - IntegrationTestsTsan0 + - IntegrationTestsTsan1 + - IntegrationTestsTsan2 + - IntegrationTestsTsan3 - CompatibilityCheck runs-on: [self-hosted, style-checker] steps: diff --git a/.github/workflows/woboq.yml b/.github/workflows/woboq.yml new file mode 100644 index 00000000000..f3cd7ab6245 --- /dev/null +++ b/.github/workflows/woboq.yml @@ -0,0 +1,42 @@ +name: WoboqBuilder +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + +concurrency: + group: woboq +on: # yamllint disable-line rule:truthy + schedule: + - cron: '0 */18 * * *' + workflow_dispatch: +jobs: + # don't use dockerhub push because this image updates so rarely + WoboqCodebrowser: + runs-on: [self-hosted, style-checker] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/codebrowser + REPO_COPY=${{runner.temp}}/codebrowser/ClickHouse + IMAGES_PATH=${{runner.temp}}/images_path + EOF + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'true' + - name: Codebrowser + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 codebrowser_check.py + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH diff --git a/.gitmodules b/.gitmodules index ca98e0f57d2..5321712f1f1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -54,8 +54,8 @@ url = https://github.com/ClickHouse-Extras/Turbo-Base64.git [submodule "contrib/arrow"] path = contrib/arrow - url = https://github.com/ClickHouse-Extras/arrow - branch = clickhouse-arrow-2.0.0 + url = https://github.com/ClickHouse-Extras/arrow.git + branch = blessed/release-6.0.1 [submodule "contrib/thrift"] path = contrib/thrift url = https://github.com/apache/thrift.git @@ -190,8 +190,8 @@ url = https://github.com/xz-mirror/xz [submodule "contrib/abseil-cpp"] path = contrib/abseil-cpp - url = https://github.com/ClickHouse-Extras/abseil-cpp.git - branch = lts_2020_02_25 + url = https://github.com/abseil/abseil-cpp.git + branch = lts_2021_11_02 [submodule "contrib/dragonbox"] path = contrib/dragonbox url = https://github.com/ClickHouse-Extras/dragonbox.git diff --git a/CMakeLists.txt b/CMakeLists.txt index d59dbbfc8b3..fdc9cfcd303 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -424,6 +424,11 @@ if (OS_LINUX AND NOT SANITIZE) set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined") endif () +# Increase stack size on Musl. We need big stack for our recursive-descend parser. +if (USE_MUSL) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,stack-size=2097152") +endif () + include(cmake/dbms_glob_sources.cmake) if (OS_LINUX OR OS_ANDROID) @@ -451,6 +456,11 @@ if (MAKE_STATIC_LIBRARIES) endif () else () set (CMAKE_POSITION_INDEPENDENT_CODE ON) + # This is required for clang on Arch linux, that uses PIE by default. + # See enable-SSP-and-PIE-by-default.patch [1]. + # + # [1]: https://github.com/archlinux/svntogit-packages/blob/6e681aa860e65ad46a1387081482eb875c2200f2/trunk/enable-SSP-and-PIE-by-default.patch + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie") endif () if (ENABLE_TESTS) diff --git a/PreLoad.cmake b/PreLoad.cmake index 9fba896d72e..46bf8efed31 100644 --- a/PreLoad.cmake +++ b/PreLoad.cmake @@ -27,8 +27,7 @@ execute_process(COMMAND uname -m OUTPUT_VARIABLE ARCH) if (OS MATCHES "Linux" AND NOT DEFINED CMAKE_TOOLCHAIN_FILE AND NOT DISABLE_HERMETIC_BUILD - AND ($ENV{CC} MATCHES ".*clang.*" OR CMAKE_C_COMPILER MATCHES ".*clang.*") - AND (USE_STATIC_LIBRARIES OR NOT DEFINED USE_STATIC_LIBRARIES)) + AND ($ENV{CC} MATCHES ".*clang.*" OR CMAKE_C_COMPILER MATCHES ".*clang.*")) if (ARCH MATCHES "amd64|x86_64") set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-x86_64.cmake" CACHE INTERNAL "" FORCE) diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt index 452b483fb6a..be1a0fb2af1 100644 --- a/base/CMakeLists.txt +++ b/base/CMakeLists.txt @@ -9,7 +9,3 @@ add_subdirectory (pcg-random) add_subdirectory (widechar_width) add_subdirectory (readpassphrase) add_subdirectory (bridge) - -if (USE_MYSQL) - add_subdirectory (mysqlxx) -endif () diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index c0b0801bd2e..e62299f3d06 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -1,8 +1,6 @@ set (SRCS argsToConfig.cpp coverage.cpp - DateLUT.cpp - DateLUTImpl.cpp demangle.cpp getFQDNOrHostName.cpp getMemoryAmount.cpp @@ -18,7 +16,6 @@ set (SRCS sleep.cpp terminalColors.cpp errnoToString.cpp - getResource.cpp StringRef.cpp ) diff --git a/base/base/getPageSize.cpp b/base/base/getPageSize.cpp index 6f7e0c6e259..948fc4a7700 100644 --- a/base/base/getPageSize.cpp +++ b/base/base/getPageSize.cpp @@ -1,8 +1,11 @@ #include #include - +#include Int64 getPageSize() { - return sysconf(_SC_PAGESIZE); + Int64 page_size = sysconf(_SC_PAGESIZE); + if (page_size < 0) + abort(); + return page_size; } diff --git a/base/base/phdr_cache.cpp b/base/base/phdr_cache.cpp index 8ae10f6bf83..20a755ed7a4 100644 --- a/base/base/phdr_cache.cpp +++ b/base/base/phdr_cache.cpp @@ -123,6 +123,12 @@ bool hasPHDRCache() #else void updatePHDRCache() {} -bool hasPHDRCache() { return false; } + +#if defined(USE_MUSL) + /// With statically linked with musl, dl_iterate_phdr is immutable. + bool hasPHDRCache() { return true; } +#else + bool hasPHDRCache() { return false; } +#endif #endif diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c index bfb68abbcfb..5a27cae0383 100644 --- a/base/harmful/harmful.c +++ b/base/harmful/harmful.c @@ -182,7 +182,6 @@ TRAP(vlimit) TRAP(wcsnrtombs) TRAP(wcsrtombs) TRAP(wctomb) -TRAP(wordexp) TRAP(basename) TRAP(catgets) TRAP(dbm_clearerr) @@ -195,9 +194,8 @@ TRAP(dbm_nextkey) TRAP(dbm_open) TRAP(dbm_store) TRAP(dirname) -#if !defined(SANITIZER) -TRAP(dlerror) // Used by tsan -#endif +// TRAP(dlerror) // It is not thread-safe. But it is used by dynamic linker to load some name resolution plugins. Also used by TSan. +/// Note: we should better get rid of glibc, dynamic linking and all that sort of annoying garbage altogether. TRAP(ftw) TRAP(getc_unlocked) //TRAP(getenv) // Ok at program startup @@ -245,4 +243,21 @@ TRAP(lgammaf32x) TRAP(lgammaf64) TRAP(lgammaf64x) +/// These functions are unused by ClickHouse and we should be aware if they are accidentally get used. +/// Sometimes people report that these function contain vulnerabilities (these reports are bogus for ClickHouse). +TRAP(mq_close) +TRAP(mq_getattr) +TRAP(mq_setattr) +TRAP(mq_notify) +TRAP(mq_open) +TRAP(mq_receive) +TRAP(mq_send) +TRAP(mq_unlink) +TRAP(mq_timedsend) +TRAP(mq_timedreceive) + +/// These functions are also unused by ClickHouse. +TRAP(wordexp) +TRAP(wordfree) + #endif diff --git a/base/mysqlxx/CMakeLists.txt b/base/mysqlxx/CMakeLists.txt deleted file mode 100644 index 80db50c2593..00000000000 --- a/base/mysqlxx/CMakeLists.txt +++ /dev/null @@ -1,61 +0,0 @@ -add_library (mysqlxx - Connection.cpp - Exception.cpp - Query.cpp - ResultBase.cpp - UseQueryResult.cpp - Row.cpp - Value.cpp - Pool.cpp - PoolFactory.cpp - PoolWithFailover.cpp -) - -target_include_directories (mysqlxx PUBLIC ..) - -if (NOT USE_INTERNAL_MYSQL_LIBRARY) - set(PLATFORM_LIBRARIES ${CMAKE_DL_LIBS}) - - if (USE_MYSQL) - target_include_directories (mysqlxx SYSTEM PRIVATE ${MYSQL_INCLUDE_DIR}) - endif () - - if (APPLE) - find_library (ICONV_LIBRARY iconv) - set (MYSQLCLIENT_LIBRARIES ${MYSQLCLIENT_LIBRARIES} ${STATIC_MYSQLCLIENT_LIB} ${ICONV_LIBRARY}) - elseif (USE_STATIC_LIBRARIES AND STATIC_MYSQLCLIENT_LIB) - set (MYSQLCLIENT_LIBRARIES ${STATIC_MYSQLCLIENT_LIB}) - endif () -endif () - -target_link_libraries (mysqlxx - PUBLIC - common - PRIVATE - ${MYSQLCLIENT_LIBRARIES} - ${ZLIB_LIBRARIES} -) - -if(OPENSSL_LIBRARIES) - target_link_libraries(mysqlxx PRIVATE ${OPENSSL_LIBRARIES}) -endif() - -target_link_libraries(mysqlxx PRIVATE ${PLATFORM_LIBRARIES}) - -if (NOT USE_INTERNAL_MYSQL_LIBRARY AND OPENSSL_INCLUDE_DIR) - target_include_directories (mysqlxx SYSTEM PRIVATE ${OPENSSL_INCLUDE_DIR}) -endif () - -target_no_warning(mysqlxx reserved-macro-identifier) - -if (NOT USE_INTERNAL_MYSQL_LIBRARY AND USE_STATIC_LIBRARIES) - message(WARNING "Statically linking with system mysql/mariadb only works " - "if mysql client libraries are built with same openssl version as " - "we are going to use now. It wouldn't work if GnuTLS is used. " - "Try -D\"USE_INTERNAL_MYSQL_LIBRARY\"=ON or -D\"ENABLE_MYSQL\"=OFF or " - "-D\"USE_STATIC_LIBRARIES\"=OFF") -endif () - -if (ENABLE_TESTS) - add_subdirectory (tests) -endif () diff --git a/cmake/find/blob_storage.cmake b/cmake/find/blob_storage.cmake index 4df25abb1ab..74a907da7db 100644 --- a/cmake/find/blob_storage.cmake +++ b/cmake/find/blob_storage.cmake @@ -1,8 +1,10 @@ +option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES}) + option(USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY "Set to FALSE to use system Azure SDK instead of bundled (OFF currently not implemented)" - ${ENABLE_LIBRARIES}) + ON) -if (USE_INTERNAL_AZURE_BLOB_STORAGE_LIBRARY) +if (ENABLE_AZURE_BLOB_STORAGE) set(USE_AZURE_BLOB_STORAGE 1) set(AZURE_BLOB_STORAGE_LIBRARY azure_sdk) endif() diff --git a/cmake/find/ccache.cmake b/cmake/find/ccache.cmake index 43c2de0c921..95ec3d8a034 100644 --- a/cmake/find/ccache.cmake +++ b/cmake/find/ccache.cmake @@ -32,11 +32,6 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE) if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}") - set (CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_FOUND} ${CMAKE_CXX_COMPILER_LAUNCHER}) - set (CMAKE_C_COMPILER_LAUNCHER ${CCACHE_FOUND} ${CMAKE_C_COMPILER_LAUNCHER}) - - set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND}) - # debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is # filled from the debian/changelog or current time. # @@ -49,11 +44,14 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE) # - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable. if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2") message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required") + set(LAUNCHER ${CCACHE_FOUND}) elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0") message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache") - set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH") - set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH") + set(LAUNCHER env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}) endif() + + set (CMAKE_CXX_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_CXX_COMPILER_LAUNCHER}) + set (CMAKE_C_COMPILER_LAUNCHER ${LAUNCHER} ${CMAKE_C_COMPILER_LAUNCHER}) else () message(${RECONFIGURE_MESSAGE_LEVEL} "Not using ${CCACHE_FOUND} ${CCACHE_VERSION} bug: https://bugzilla.samba.org/show_bug.cgi?id=8118") endif () diff --git a/cmake/linux/toolchain-x86_64.cmake b/cmake/linux/toolchain-x86_64.cmake index 879f35feb83..965ea024ab7 100644 --- a/cmake/linux/toolchain-x86_64.cmake +++ b/cmake/linux/toolchain-x86_64.cmake @@ -14,9 +14,12 @@ set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-x86_6 set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/x86_64-linux-gnu/libc") -set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") -set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") -set (CMAKE_ASM_FLAGS_INIT "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE) set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE) diff --git a/cmake/target.cmake b/cmake/target.cmake index 3c02c4313f1..4b109d165e7 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -42,6 +42,14 @@ if (CMAKE_CROSSCOMPILING) message (FATAL_ERROR "Trying to cross-compile to unsupported system: ${CMAKE_SYSTEM_NAME}!") endif () + if (USE_MUSL) + set (USE_SENTRY OFF CACHE INTERNAL "") + set (ENABLE_ODBC OFF CACHE INTERNAL "") + set (ENABLE_GRPC OFF CACHE INTERNAL "") + set (ENABLE_HDFS OFF CACHE INTERNAL "") + set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "") + endif () + # Don't know why but CXX_STANDARD doesn't work for cross-compilation set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++20") diff --git a/contrib/NuRaft b/contrib/NuRaft index bb69d48e0ee..ff100a87131 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit bb69d48e0ee35c87a0f19e509a09a914f71f0cff +Subproject commit ff100a8713146e1ca4b4158dd6cc4eef9af47fc3 diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index b004a8a0241..215105818df 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit b004a8a02418b83de8b686caa0b0f6e39ac2191f +Subproject commit 215105818dfde3174fe799600bb0f3cae233d0bf diff --git a/contrib/abseil-cpp-cmake/CMakeLists.txt b/contrib/abseil-cpp-cmake/CMakeLists.txt index c8cb512066a..65e4c24ff5a 100644 --- a/contrib/abseil-cpp-cmake/CMakeLists.txt +++ b/contrib/abseil-cpp-cmake/CMakeLists.txt @@ -2,6 +2,8 @@ set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") if(NOT EXISTS "${ABSL_ROOT_DIR}/CMakeLists.txt") message(FATAL_ERROR " submodule third_party/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive") endif() +set(BUILD_TESTING OFF) +set(ABSL_PROPAGATE_CXX_STD ON) add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp") add_library(abseil_swiss_tables INTERFACE) diff --git a/contrib/arrow b/contrib/arrow index 078e21bad34..aa9a7a698e3 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit 078e21bad344747b7656ef2d7a4f7410a0a303eb +Subproject commit aa9a7a698e33e278abe053f4634170b3b026e48e diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 231185462dc..e01b546310f 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -1,5 +1,22 @@ set (CMAKE_CXX_STANDARD 17) +set(ARROW_VERSION "6.0.1") +string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") + +set(ARROW_VERSION_MAJOR "6") +set(ARROW_VERSION_MINOR "0") +set(ARROW_VERSION_PATCH "1") + +if(ARROW_VERSION_MAJOR STREQUAL "0") + # Arrow 0.x.y => SO version is "x", full SO version is "x.y.0" + set(ARROW_SO_VERSION "${ARROW_VERSION_MINOR}") + set(ARROW_FULL_SO_VERSION "${ARROW_SO_VERSION}.${ARROW_VERSION_PATCH}.0") +else() + # Arrow 1.x.y => SO version is "10x", full SO version is "10x.y.0" + math(EXPR ARROW_SO_VERSION "${ARROW_VERSION_MAJOR} * 100 + ${ARROW_VERSION_MINOR}") + set(ARROW_FULL_SO_VERSION "${ARROW_SO_VERSION}.${ARROW_VERSION_PATCH}.0") +endif() + # === thrift set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp") @@ -93,6 +110,9 @@ add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}") message(STATUS "FLATBUFFERS_LIBRARY: ${FLATBUFFERS_LIBRARY}") +# === hdfs +set(HDFS_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/") + # arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features. # Apple Clang compiler failed to compile this code without specifying c++11 standard. # As result these compiler features detected as absent. In result it failed to compile orc itself. @@ -114,6 +134,7 @@ configure_file("${ORC_INCLUDE_DIR}/orc/orc-config.hh.in" "${ORC_BUILD_INCLUDE_DI configure_file("${ORC_SOURCE_SRC_DIR}/Adaptor.hh.in" "${ORC_BUILD_INCLUDE_DIR}/Adaptor.hh") +# ARROW_ORC + adapters/orc/CMakefiles set(ORC_SRCS "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter.cc" "${ARROW_SRC_DIR}/arrow/adapters/orc/adapter_util.cc" @@ -150,28 +171,8 @@ set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow") configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/cpp/src/arrow/util/config.h") -# arrow/cpp/src/arrow/CMakeLists.txt +# arrow/cpp/src/arrow/CMakeLists.txt (ARROW_SRCS + ARROW_COMPUTE + ARROW_IPC) set(ARROW_SRCS - "${LIBRARY_DIR}/buffer.cc" - "${LIBRARY_DIR}/builder.cc" - "${LIBRARY_DIR}/chunked_array.cc" - "${LIBRARY_DIR}/compare.cc" - "${LIBRARY_DIR}/datum.cc" - "${LIBRARY_DIR}/device.cc" - "${LIBRARY_DIR}/extension_type.cc" - "${LIBRARY_DIR}/memory_pool.cc" - "${LIBRARY_DIR}/pretty_print.cc" - "${LIBRARY_DIR}/record_batch.cc" - "${LIBRARY_DIR}/result.cc" - "${LIBRARY_DIR}/scalar.cc" - "${LIBRARY_DIR}/sparse_tensor.cc" - "${LIBRARY_DIR}/status.cc" - "${LIBRARY_DIR}/table_builder.cc" - "${LIBRARY_DIR}/table.cc" - "${LIBRARY_DIR}/tensor.cc" - "${LIBRARY_DIR}/type.cc" - "${LIBRARY_DIR}/visitor.cc" - "${LIBRARY_DIR}/array/array_base.cc" "${LIBRARY_DIR}/array/array_binary.cc" "${LIBRARY_DIR}/array/array_decimal.cc" @@ -191,25 +192,112 @@ set(ARROW_SRCS "${LIBRARY_DIR}/array/diff.cc" "${LIBRARY_DIR}/array/util.cc" "${LIBRARY_DIR}/array/validate.cc" + "${LIBRARY_DIR}/builder.cc" + "${LIBRARY_DIR}/buffer.cc" + "${LIBRARY_DIR}/chunked_array.cc" + "${LIBRARY_DIR}/compare.cc" + "${LIBRARY_DIR}/config.cc" + "${LIBRARY_DIR}/datum.cc" + "${LIBRARY_DIR}/device.cc" + "${LIBRARY_DIR}/extension_type.cc" + "${LIBRARY_DIR}/memory_pool.cc" + "${LIBRARY_DIR}/pretty_print.cc" + "${LIBRARY_DIR}/record_batch.cc" + "${LIBRARY_DIR}/result.cc" + "${LIBRARY_DIR}/scalar.cc" + "${LIBRARY_DIR}/sparse_tensor.cc" + "${LIBRARY_DIR}/status.cc" + "${LIBRARY_DIR}/table.cc" + "${LIBRARY_DIR}/table_builder.cc" + "${LIBRARY_DIR}/tensor.cc" + "${LIBRARY_DIR}/tensor/coo_converter.cc" + "${LIBRARY_DIR}/tensor/csf_converter.cc" + "${LIBRARY_DIR}/tensor/csx_converter.cc" + "${LIBRARY_DIR}/type.cc" + "${LIBRARY_DIR}/visitor.cc" + "${LIBRARY_DIR}/c/bridge.cc" + "${LIBRARY_DIR}/io/buffered.cc" + "${LIBRARY_DIR}/io/caching.cc" + "${LIBRARY_DIR}/io/compressed.cc" + "${LIBRARY_DIR}/io/file.cc" + "${LIBRARY_DIR}/io/hdfs.cc" + "${LIBRARY_DIR}/io/hdfs_internal.cc" + "${LIBRARY_DIR}/io/interfaces.cc" + "${LIBRARY_DIR}/io/memory.cc" + "${LIBRARY_DIR}/io/slow.cc" + "${LIBRARY_DIR}/io/stdio.cc" + "${LIBRARY_DIR}/io/transform.cc" + "${LIBRARY_DIR}/util/async_util.cc" + "${LIBRARY_DIR}/util/basic_decimal.cc" + "${LIBRARY_DIR}/util/bit_block_counter.cc" + "${LIBRARY_DIR}/util/bit_run_reader.cc" + "${LIBRARY_DIR}/util/bit_util.cc" + "${LIBRARY_DIR}/util/bitmap.cc" + "${LIBRARY_DIR}/util/bitmap_builders.cc" + "${LIBRARY_DIR}/util/bitmap_ops.cc" + "${LIBRARY_DIR}/util/bpacking.cc" + "${LIBRARY_DIR}/util/cancel.cc" + "${LIBRARY_DIR}/util/compression.cc" + "${LIBRARY_DIR}/util/counting_semaphore.cc" + "${LIBRARY_DIR}/util/cpu_info.cc" + "${LIBRARY_DIR}/util/decimal.cc" + "${LIBRARY_DIR}/util/delimiting.cc" + "${LIBRARY_DIR}/util/formatting.cc" + "${LIBRARY_DIR}/util/future.cc" + "${LIBRARY_DIR}/util/int_util.cc" + "${LIBRARY_DIR}/util/io_util.cc" + "${LIBRARY_DIR}/util/logging.cc" + "${LIBRARY_DIR}/util/key_value_metadata.cc" + "${LIBRARY_DIR}/util/memory.cc" + "${LIBRARY_DIR}/util/mutex.cc" + "${LIBRARY_DIR}/util/string.cc" + "${LIBRARY_DIR}/util/string_builder.cc" + "${LIBRARY_DIR}/util/task_group.cc" + "${LIBRARY_DIR}/util/tdigest.cc" + "${LIBRARY_DIR}/util/thread_pool.cc" + "${LIBRARY_DIR}/util/time.cc" + "${LIBRARY_DIR}/util/trie.cc" + "${LIBRARY_DIR}/util/unreachable.cc" + "${LIBRARY_DIR}/util/uri.cc" + "${LIBRARY_DIR}/util/utf8.cc" + "${LIBRARY_DIR}/util/value_parsing.cc" + "${LIBRARY_DIR}/vendored/base64.cpp" + "${LIBRARY_DIR}/vendored/datetime/tz.cpp" + + "${LIBRARY_DIR}/vendored/musl/strptime.c" + "${LIBRARY_DIR}/vendored/uriparser/UriCommon.c" + "${LIBRARY_DIR}/vendored/uriparser/UriCompare.c" + "${LIBRARY_DIR}/vendored/uriparser/UriEscape.c" + "${LIBRARY_DIR}/vendored/uriparser/UriFile.c" + "${LIBRARY_DIR}/vendored/uriparser/UriIp4Base.c" + "${LIBRARY_DIR}/vendored/uriparser/UriIp4.c" + "${LIBRARY_DIR}/vendored/uriparser/UriMemory.c" + "${LIBRARY_DIR}/vendored/uriparser/UriNormalizeBase.c" + "${LIBRARY_DIR}/vendored/uriparser/UriNormalize.c" + "${LIBRARY_DIR}/vendored/uriparser/UriParseBase.c" + "${LIBRARY_DIR}/vendored/uriparser/UriParse.c" + "${LIBRARY_DIR}/vendored/uriparser/UriQuery.c" + "${LIBRARY_DIR}/vendored/uriparser/UriRecompose.c" + "${LIBRARY_DIR}/vendored/uriparser/UriResolve.c" + "${LIBRARY_DIR}/vendored/uriparser/UriShorten.c" "${LIBRARY_DIR}/compute/api_aggregate.cc" "${LIBRARY_DIR}/compute/api_scalar.cc" "${LIBRARY_DIR}/compute/api_vector.cc" "${LIBRARY_DIR}/compute/cast.cc" "${LIBRARY_DIR}/compute/exec.cc" + "${LIBRARY_DIR}/compute/exec/aggregate_node.cc" + "${LIBRARY_DIR}/compute/exec/exec_plan.cc" + "${LIBRARY_DIR}/compute/exec/expression.cc" + "${LIBRARY_DIR}/compute/exec/filter_node.cc" + "${LIBRARY_DIR}/compute/exec/project_node.cc" + "${LIBRARY_DIR}/compute/exec/source_node.cc" + "${LIBRARY_DIR}/compute/exec/sink_node.cc" + "${LIBRARY_DIR}/compute/exec/order_by_impl.cc" "${LIBRARY_DIR}/compute/function.cc" "${LIBRARY_DIR}/compute/function_internal.cc" "${LIBRARY_DIR}/compute/kernel.cc" "${LIBRARY_DIR}/compute/registry.cc" - - "${LIBRARY_DIR}/compute/exec/exec_plan.cc" - "${LIBRARY_DIR}/compute/exec/expression.cc" - "${LIBRARY_DIR}/compute/exec/key_compare.cc" - "${LIBRARY_DIR}/compute/exec/key_encode.cc" - "${LIBRARY_DIR}/compute/exec/key_hash.cc" - "${LIBRARY_DIR}/compute/exec/key_map.cc" - "${LIBRARY_DIR}/compute/exec/util.cc" - "${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc" "${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc" "${LIBRARY_DIR}/compute/kernels/aggregate_quantile.cc" @@ -227,28 +315,31 @@ set(ARROW_SRCS "${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc" "${LIBRARY_DIR}/compute/kernels/scalar_compare.cc" - "${LIBRARY_DIR}/compute/kernels/scalar_fill_null.cc" - "${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc" "${LIBRARY_DIR}/compute/kernels/scalar_nested.cc" "${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc" "${LIBRARY_DIR}/compute/kernels/scalar_string.cc" - "${LIBRARY_DIR}/compute/kernels/scalar_temporal.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc" "${LIBRARY_DIR}/compute/kernels/scalar_validity.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc" "${LIBRARY_DIR}/compute/kernels/util_internal.cc" + "${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc" "${LIBRARY_DIR}/compute/kernels/vector_hash.cc" "${LIBRARY_DIR}/compute/kernels/vector_nested.cc" "${LIBRARY_DIR}/compute/kernels/vector_replace.cc" "${LIBRARY_DIR}/compute/kernels/vector_selection.cc" "${LIBRARY_DIR}/compute/kernels/vector_sort.cc" - - "${LIBRARY_DIR}/csv/chunker.cc" - "${LIBRARY_DIR}/csv/column_builder.cc" - "${LIBRARY_DIR}/csv/column_decoder.cc" - "${LIBRARY_DIR}/csv/converter.cc" - "${LIBRARY_DIR}/csv/options.cc" - "${LIBRARY_DIR}/csv/parser.cc" - "${LIBRARY_DIR}/csv/reader.cc" - "${LIBRARY_DIR}/csv/writer.cc" + "${LIBRARY_DIR}/compute/kernels/row_encoder.cc" + "${LIBRARY_DIR}/compute/exec/union_node.cc" + "${LIBRARY_DIR}/compute/exec/key_hash.cc" + "${LIBRARY_DIR}/compute/exec/key_map.cc" + "${LIBRARY_DIR}/compute/exec/key_compare.cc" + "${LIBRARY_DIR}/compute/exec/key_encode.cc" + "${LIBRARY_DIR}/compute/exec/util.cc" + "${LIBRARY_DIR}/compute/exec/hash_join_dict.cc" + "${LIBRARY_DIR}/compute/exec/hash_join.cc" + "${LIBRARY_DIR}/compute/exec/hash_join_node.cc" + "${LIBRARY_DIR}/compute/exec/task_util.cc" "${LIBRARY_DIR}/ipc/dictionary.cc" "${LIBRARY_DIR}/ipc/feather.cc" @@ -258,52 +349,6 @@ set(ARROW_SRCS "${LIBRARY_DIR}/ipc/reader.cc" "${LIBRARY_DIR}/ipc/writer.cc" - "${LIBRARY_DIR}/io/buffered.cc" - "${LIBRARY_DIR}/io/caching.cc" - "${LIBRARY_DIR}/io/compressed.cc" - "${LIBRARY_DIR}/io/file.cc" - "${LIBRARY_DIR}/io/interfaces.cc" - "${LIBRARY_DIR}/io/memory.cc" - "${LIBRARY_DIR}/io/slow.cc" - "${LIBRARY_DIR}/io/stdio.cc" - "${LIBRARY_DIR}/io/transform.cc" - - "${LIBRARY_DIR}/tensor/coo_converter.cc" - "${LIBRARY_DIR}/tensor/csf_converter.cc" - "${LIBRARY_DIR}/tensor/csx_converter.cc" - - "${LIBRARY_DIR}/util/basic_decimal.cc" - "${LIBRARY_DIR}/util/bit_block_counter.cc" - "${LIBRARY_DIR}/util/bit_run_reader.cc" - "${LIBRARY_DIR}/util/bit_util.cc" - "${LIBRARY_DIR}/util/bitmap_builders.cc" - "${LIBRARY_DIR}/util/bitmap_ops.cc" - "${LIBRARY_DIR}/util/bitmap.cc" - "${LIBRARY_DIR}/util/bpacking.cc" - "${LIBRARY_DIR}/util/cancel.cc" - "${LIBRARY_DIR}/util/compression.cc" - "${LIBRARY_DIR}/util/cpu_info.cc" - "${LIBRARY_DIR}/util/decimal.cc" - "${LIBRARY_DIR}/util/delimiting.cc" - "${LIBRARY_DIR}/util/formatting.cc" - "${LIBRARY_DIR}/util/future.cc" - "${LIBRARY_DIR}/util/int_util.cc" - "${LIBRARY_DIR}/util/io_util.cc" - "${LIBRARY_DIR}/util/key_value_metadata.cc" - "${LIBRARY_DIR}/util/logging.cc" - "${LIBRARY_DIR}/util/memory.cc" - "${LIBRARY_DIR}/util/mutex.cc" - "${LIBRARY_DIR}/util/string_builder.cc" - "${LIBRARY_DIR}/util/string.cc" - "${LIBRARY_DIR}/util/task_group.cc" - "${LIBRARY_DIR}/util/tdigest.cc" - "${LIBRARY_DIR}/util/thread_pool.cc" - "${LIBRARY_DIR}/util/time.cc" - "${LIBRARY_DIR}/util/trie.cc" - "${LIBRARY_DIR}/util/utf8.cc" - "${LIBRARY_DIR}/util/value_parsing.cc" - - "${LIBRARY_DIR}/vendored/base64.cpp" ${ORC_SRCS} ) @@ -373,6 +418,7 @@ target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_BUILD_INCLUDE_D target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_ADDITION_SOURCE_DIR}) target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ARROW_SRC_DIR}) target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${FLATBUFFERS_INCLUDE_DIR}) +target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${HDFS_INCLUDE_DIR}) # === parquet @@ -446,7 +492,7 @@ set (HAVE_STRERROR_R 1) set (HAVE_SCHED_GET_PRIORITY_MAX 1) set (HAVE_SCHED_GET_PRIORITY_MIN 1) -if (OS_LINUX) +if (OS_LINUX AND NOT USE_MUSL) set (STRERROR_R_CHAR_P 1) endif () diff --git a/contrib/azure-cmake/CMakeLists.txt b/contrib/azure-cmake/CMakeLists.txt index 7f9476e37b7..527503b85a2 100644 --- a/contrib/azure-cmake/CMakeLists.txt +++ b/contrib/azure-cmake/CMakeLists.txt @@ -46,14 +46,17 @@ include("${AZURE_DIR}/cmake-modules/AzureTransportAdapters.cmake") add_library(azure_sdk ${AZURE_SDK_UNIFIED_SRC}) if (COMPILER_CLANG) - target_compile_options(azure_sdk PUBLIC + target_compile_options(azure_sdk PRIVATE -Wno-deprecated-copy-dtor -Wno-extra-semi -Wno-suggest-destructor-override -Wno-inconsistent-missing-destructor-override -Wno-error=unknown-warning-option - -Wno-reserved-identifier ) + + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13) + target_compile_options(azure_sdk PRIVATE -Wno-reserved-identifier) + endif() endif() # Originally, on Windows azure-core is built with bcrypt and crypt32 by default @@ -68,4 +71,4 @@ endif() target_link_libraries(azure_sdk PRIVATE ${LIBXML2_LIBRARIES}) -target_include_directories(azure_sdk PUBLIC ${AZURE_SDK_INCLUDES}) +target_include_directories(azure_sdk SYSTEM PUBLIC ${AZURE_SDK_INCLUDES}) diff --git a/contrib/boost b/contrib/boost index fcb058e1459..c0807e83f28 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit fcb058e1459ac273ecfe7cdf72791cb1479115af +Subproject commit c0807e83f2824e8dd67a15b355496a9b784cdcd5 diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 057a893e926..4a21b8a0e2d 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -1,9 +1,7 @@ option (USE_INTERNAL_BOOST_LIBRARY "Use internal Boost library" ON) if (NOT USE_INTERNAL_BOOST_LIBRARY) - # 1.70 like in contrib/boost - # 1.71 on CI - set(BOOST_VERSION 1.71) + set(BOOST_VERSION 1.78) find_package(Boost ${BOOST_VERSION} COMPONENTS system @@ -66,9 +64,11 @@ if (NOT EXTERNAL_BOOST_FOUND) set (SRCS_FILESYSTEM "${LIBRARY_DIR}/libs/filesystem/src/codecvt_error_category.cpp" + "${LIBRARY_DIR}/libs/filesystem/src/directory.cpp" + "${LIBRARY_DIR}/libs/filesystem/src/exception.cpp" "${LIBRARY_DIR}/libs/filesystem/src/operations.cpp" - "${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp" "${LIBRARY_DIR}/libs/filesystem/src/path.cpp" + "${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp" "${LIBRARY_DIR}/libs/filesystem/src/portability.cpp" "${LIBRARY_DIR}/libs/filesystem/src/unique_path.cpp" "${LIBRARY_DIR}/libs/filesystem/src/utf8_codecvt_facet.cpp" @@ -126,24 +126,11 @@ if (NOT EXTERNAL_BOOST_FOUND) # regex set (SRCS_REGEX - "${LIBRARY_DIR}/libs/regex/src/c_regex_traits.cpp" - "${LIBRARY_DIR}/libs/regex/src/cpp_regex_traits.cpp" - "${LIBRARY_DIR}/libs/regex/src/cregex.cpp" - "${LIBRARY_DIR}/libs/regex/src/fileiter.cpp" - "${LIBRARY_DIR}/libs/regex/src/icu.cpp" - "${LIBRARY_DIR}/libs/regex/src/instances.cpp" - "${LIBRARY_DIR}/libs/regex/src/internals.hpp" "${LIBRARY_DIR}/libs/regex/src/posix_api.cpp" "${LIBRARY_DIR}/libs/regex/src/regex_debug.cpp" - "${LIBRARY_DIR}/libs/regex/src/regex_raw_buffer.cpp" - "${LIBRARY_DIR}/libs/regex/src/regex_traits_defaults.cpp" "${LIBRARY_DIR}/libs/regex/src/regex.cpp" "${LIBRARY_DIR}/libs/regex/src/static_mutex.cpp" - "${LIBRARY_DIR}/libs/regex/src/usinstances.cpp" - "${LIBRARY_DIR}/libs/regex/src/w32_regex_traits.cpp" - "${LIBRARY_DIR}/libs/regex/src/wc_regex_traits.cpp" "${LIBRARY_DIR}/libs/regex/src/wide_posix_api.cpp" - "${LIBRARY_DIR}/libs/regex/src/winstances.cpp" ) add_library (_boost_regex ${SRCS_REGEX}) @@ -166,7 +153,6 @@ if (NOT EXTERNAL_BOOST_FOUND) set (SRCS_CONTEXT "${LIBRARY_DIR}/libs/context/src/dummy.cpp" - "${LIBRARY_DIR}/libs/context/src/execution_context.cpp" "${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp" ) diff --git a/contrib/cassandra b/contrib/cassandra index eb9b68dadbb..f4a31e92a25 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit eb9b68dadbb4417a2c132ad4a1c2fa76e65e6fc1 +Subproject commit f4a31e92a25c34c02c7291ff97c7813bc83b0e09 diff --git a/contrib/jemalloc b/contrib/jemalloc index e6891d97461..a1404807211 160000 --- a/contrib/jemalloc +++ b/contrib/jemalloc @@ -1 +1 @@ -Subproject commit e6891d9746143bf2cf617493d880ba5a0b9a3efd +Subproject commit a1404807211b1612539f840b3dcb1bf38d1a269e diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt index 4fbd0575b55..dc47b0bf496 100644 --- a/contrib/libuv-cmake/CMakeLists.txt +++ b/contrib/libuv-cmake/CMakeLists.txt @@ -1,17 +1,8 @@ # This file is a modified version of contrib/libuv/CMakeLists.txt -include(CMakeDependentOption) - set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/libuv") set (BINARY_DIR "${CMAKE_BINARY_DIR}/contrib/libuv") - -if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") - list(APPEND uv_cflags -fvisibility=hidden --std=gnu89) - list(APPEND uv_cflags -Wall -Wextra -Wstrict-prototypes) - list(APPEND uv_cflags -Wno-unused-parameter) -endif() - set(uv_sources src/fs-poll.c src/idna.c @@ -76,7 +67,7 @@ endif() if(CMAKE_SYSTEM_NAME STREQUAL "Linux") list(APPEND uv_defines _GNU_SOURCE _POSIX_C_SOURCE=200112) - list(APPEND uv_libraries dl rt) + list(APPEND uv_libraries rt) list(APPEND uv_sources src/unix/linux-core.c src/unix/linux-inotify.c diff --git a/contrib/mariadb-connector-c-cmake/CMakeLists.txt b/contrib/mariadb-connector-c-cmake/CMakeLists.txt index ea74e13b7f0..7c3f25cdf87 100644 --- a/contrib/mariadb-connector-c-cmake/CMakeLists.txt +++ b/contrib/mariadb-connector-c-cmake/CMakeLists.txt @@ -236,8 +236,7 @@ set(LIBMARIADB_SOURCES ${LIBMARIADB_SOURCES} ${CC_SOURCE_DIR}/libmariadb/mariadb add_library(mariadbclient STATIC ${LIBMARIADB_SOURCES}) target_link_libraries(mariadbclient ${SYSTEM_LIBS}) -target_include_directories(mariadbclient - PRIVATE ${CC_BINARY_DIR}/include-private - PUBLIC ${CC_BINARY_DIR}/include-public ${CC_SOURCE_DIR}/include ${CC_SOURCE_DIR}/libmariadb) +target_include_directories(mariadbclient PRIVATE ${CC_BINARY_DIR}/include-private) +target_include_directories(mariadbclient SYSTEM PUBLIC ${CC_BINARY_DIR}/include-public ${CC_SOURCE_DIR}/include ${CC_SOURCE_DIR}/libmariadb) set_target_properties(mariadbclient PROPERTIES IMPORTED_INTERFACE_LINK_LIBRARIES "${SYSTEM_LIBS}") diff --git a/contrib/protobuf b/contrib/protobuf index c1c5d020260..6bb70196c53 160000 --- a/contrib/protobuf +++ b/contrib/protobuf @@ -1 +1 @@ -Subproject commit c1c5d02026059f4c3cb51aaa08e82288d3e08b89 +Subproject commit 6bb70196c5360268d9f021bb7936fb0b551724c2 diff --git a/contrib/s2geometry b/contrib/s2geometry index 38b7a290f92..471fe9dc931 160000 --- a/contrib/s2geometry +++ b/contrib/s2geometry @@ -1 +1 @@ -Subproject commit 38b7a290f927cc372218c2094602b83e35b18c05 +Subproject commit 471fe9dc931a4bb560333545186e9b5da168ac83 diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt index 41d570c9afd..e2b0f20f408 100644 --- a/contrib/s2geometry-cmake/CMakeLists.txt +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -1,8 +1,12 @@ set(S2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src") +set(ABSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") +if(NOT EXISTS "${ABSL_SOURCE_DIR}/CMakeLists.txt") + message(FATAL_ERROR " submodule contrib/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive") +endif() + + set(S2_SRCS - "${S2_SOURCE_DIR}/s2/base/stringprintf.cc" - "${S2_SOURCE_DIR}/s2/base/strtoint.cc" "${S2_SOURCE_DIR}/s2/encoded_s2cell_id_vector.cc" "${S2_SOURCE_DIR}/s2/encoded_s2point_vector.cc" "${S2_SOURCE_DIR}/s2/encoded_s2shape_index.cc" @@ -14,11 +18,14 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s1chord_angle.cc" "${S2_SOURCE_DIR}/s2/s1interval.cc" "${S2_SOURCE_DIR}/s2/s2boolean_operation.cc" + "${S2_SOURCE_DIR}/s2/s2buffer_operation.cc" "${S2_SOURCE_DIR}/s2/s2builder.cc" "${S2_SOURCE_DIR}/s2/s2builder_graph.cc" "${S2_SOURCE_DIR}/s2/s2builderutil_closed_set_normalizer.cc" "${S2_SOURCE_DIR}/s2/s2builderutil_find_polygon_degeneracies.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_get_snapped_winding_delta.cc" "${S2_SOURCE_DIR}/s2/s2builderutil_lax_polygon_layer.cc" + "${S2_SOURCE_DIR}/s2/s2builderutil_lax_polyline_layer.cc" "${S2_SOURCE_DIR}/s2/s2builderutil_s2point_vector_layer.cc" "${S2_SOURCE_DIR}/s2/s2builderutil_s2polygon_layer.cc" "${S2_SOURCE_DIR}/s2/s2builderutil_s2polyline_layer.cc" @@ -44,7 +51,6 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2edge_crossings.cc" "${S2_SOURCE_DIR}/s2/s2edge_distances.cc" "${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc" - "${S2_SOURCE_DIR}/s2/s2error.cc" "${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc" "${S2_SOURCE_DIR}/s2/s2latlng.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect.cc" @@ -55,6 +61,7 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2loop.cc" "${S2_SOURCE_DIR}/s2/s2loop_measures.cc" "${S2_SOURCE_DIR}/s2/s2measures.cc" + "${S2_SOURCE_DIR}/s2/s2memory_tracker.cc" "${S2_SOURCE_DIR}/s2/s2metrics.cc" "${S2_SOURCE_DIR}/s2/s2max_distance_targets.cc" "${S2_SOURCE_DIR}/s2/s2min_distance_targets.cc" @@ -82,28 +89,15 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2shapeutil_build_polygon_boundaries.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc" + "${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_range_iterator.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc" "${S2_SOURCE_DIR}/s2/s2text_format.cc" "${S2_SOURCE_DIR}/s2/s2wedge_relations.cc" - "${S2_SOURCE_DIR}/s2/strings/ostringstream.cc" + "${S2_SOURCE_DIR}/s2/s2winding_operation.cc" "${S2_SOURCE_DIR}/s2/strings/serialize.cc" - # ClickHouse doesn't use strings from abseil. - # So, there is no duplicate symbols. - "${S2_SOURCE_DIR}/s2/third_party/absl/base/dynamic_annotations.cc" - "${S2_SOURCE_DIR}/s2/third_party/absl/base/internal/raw_logging.cc" - "${S2_SOURCE_DIR}/s2/third_party/absl/base/internal/throw_delegate.cc" - "${S2_SOURCE_DIR}/s2/third_party/absl/numeric/int128.cc" - "${S2_SOURCE_DIR}/s2/third_party/absl/strings/ascii.cc" - "${S2_SOURCE_DIR}/s2/third_party/absl/strings/match.cc" - "${S2_SOURCE_DIR}/s2/third_party/absl/strings/numbers.cc" - "${S2_SOURCE_DIR}/s2/third_party/absl/strings/str_cat.cc" - "${S2_SOURCE_DIR}/s2/third_party/absl/strings/str_split.cc" - "${S2_SOURCE_DIR}/s2/third_party/absl/strings/string_view.cc" - "${S2_SOURCE_DIR}/s2/third_party/absl/strings/strip.cc" - "${S2_SOURCE_DIR}/s2/third_party/absl/strings/internal/memutil.cc" "${S2_SOURCE_DIR}/s2/util/bits/bit-interleave.cc" "${S2_SOURCE_DIR}/s2/util/bits/bits.cc" "${S2_SOURCE_DIR}/s2/util/coding/coder.cc" @@ -111,17 +105,41 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/util/math/exactfloat/exactfloat.cc" "${S2_SOURCE_DIR}/s2/util/math/mathutil.cc" "${S2_SOURCE_DIR}/s2/util/units/length-units.cc" + ) add_library(s2 ${S2_SRCS}) - -set_property(TARGET s2 PROPERTY CXX_STANDARD 11) +set_property(TARGET s2 PROPERTY CXX_STANDARD 17) if (OPENSSL_FOUND) target_link_libraries(s2 PRIVATE ${OPENSSL_LIBRARIES}) endif() +# Copied from contrib/s2geometry/CMakeLists +target_link_libraries(s2 PRIVATE + absl::base + absl::btree + absl::config + absl::core_headers + absl::dynamic_annotations + absl::endian + absl::fixed_array + absl::flat_hash_map + absl::flat_hash_set + absl::hash + absl::inlined_vector + absl::int128 + absl::log_severity + absl::memory + absl::span + absl::str_format + absl::strings + absl::type_traits + absl::utility + ) + target_include_directories(s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/") +target_include_directories(s2 SYSTEM PUBLIC "${ABSL_SOURCE_DIR}") if(M_LIBRARY) target_link_libraries(s2 PRIVATE ${M_LIBRARY}) diff --git a/contrib/sysroot b/contrib/sysroot index 410845187f5..bbcac834526 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit 410845187f582c5e6692b53dddbe43efbb728734 +Subproject commit bbcac834526d90d1e764164b861be426891d1743 diff --git a/debian/rules b/debian/rules index 4562d24bec4..e0ad0388de7 100755 --- a/debian/rules +++ b/debian/rules @@ -45,6 +45,10 @@ ifdef DEB_CXX ifeq ($(DEB_BUILD_GNU_TYPE),$(DEB_HOST_GNU_TYPE)) CC := $(DEB_CC) CXX := $(DEB_CXX) +else ifeq (clang,$(findstring clang,$(DEB_CXX))) +# If we crosscompile with clang, it knows what to do + CC := $(DEB_CC) + CXX := $(DEB_CXX) else CC := $(DEB_HOST_GNU_TYPE)-$(DEB_CC) CXX := $(DEB_HOST_GNU_TYPE)-$(DEB_CXX) @@ -77,10 +81,6 @@ else THREADS_COUNT = 1 endif -ifneq ($(THREADS_COUNT),) - THREADS_COUNT:=-j$(THREADS_COUNT) -endif - %: dh $@ $(DH_FLAGS) --buildsystem=cmake @@ -89,11 +89,11 @@ override_dh_auto_configure: override_dh_auto_build: # Fix for ninja. Do not add -O. - $(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET) + $(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET) override_dh_auto_test: ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS))) - cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V + cd $(BUILDDIR) && ctest -j$(THREADS_COUNT) -V endif override_dh_clean: @@ -120,7 +120,7 @@ override_dh_install: dh_install --list-missing --sourcedir=$(DESTDIR) override_dh_auto_install: - env DESTDIR=$(DESTDIR) $(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) install + env DESTDIR=$(DESTDIR) $(MAKE) -j$(THREADS_COUNT) -C $(BUILDDIR) install override_dh_shlibdeps: true # We depend only on libc and dh_shlibdeps gives us wrong (too strict) dependency. diff --git a/docker/images.json b/docker/images.json index a696b0597df..dc7126a3f5a 100644 --- a/docker/images.json +++ b/docker/images.json @@ -46,7 +46,6 @@ "name": "clickhouse/stateless-test", "dependent": [ "docker/test/stateful", - "docker/test/coverage", "docker/test/unit" ] }, @@ -56,10 +55,6 @@ "docker/test/stress" ] }, - "docker/test/coverage": { - "name": "clickhouse/test-coverage", - "dependent": [] - }, "docker/test/unit": { "name": "clickhouse/unit-test", "dependent": [] diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 28e84d359b3..8f886ea357d 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -24,40 +24,34 @@ RUN apt-get update \ && apt-key add /tmp/llvm-snapshot.gpg.key \ && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ && echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \ - /etc/apt/sources.list + /etc/apt/sources.list \ + && apt-get clean # initial packages -RUN apt-get update \ - && apt-get install \ - bash \ - fakeroot \ - ccache \ - curl \ - software-properties-common \ - --yes --no-install-recommends - RUN apt-get update \ && apt-get install \ bash \ build-essential \ ccache \ + clang-${LLVM_VERSION} \ + clang-tidy-${LLVM_VERSION} \ cmake \ curl \ + fakeroot \ gdb \ git \ gperf \ - clang-${LLVM_VERSION} \ - clang-tidy-${LLVM_VERSION} \ lld-${LLVM_VERSION} \ llvm-${LLVM_VERSION} \ llvm-${LLVM_VERSION}-dev \ - libicu-dev \ moreutils \ ninja-build \ pigz \ rename \ + software-properties-common \ tzdata \ - --yes --no-install-recommends + --yes --no-install-recommends \ + && apt-get clean # This symlink required by gcc to find lld compiler RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld @@ -66,7 +60,7 @@ ENV CC=clang-${LLVM_VERSION} ENV CXX=clang++-${LLVM_VERSION} # libtapi is required to support .tbh format from recent MacOS SDKs -RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \ +RUN git clone --depth 1 https://github.com/tpoechtrager/apple-libtapi.git \ && cd apple-libtapi \ && INSTALLPREFIX=/cctools ./build.sh \ && ./install.sh \ @@ -74,7 +68,7 @@ RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \ && rm -rf apple-libtapi # Build and install tools for cross-linking to Darwin (x86-64) -RUN git clone https://github.com/tpoechtrager/cctools-port.git \ +RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \ && cd cctools-port/cctools \ && ./configure --prefix=/cctools --with-libtapi=/cctools \ --target=x86_64-apple-darwin \ @@ -83,7 +77,7 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \ && rm -rf cctools-port # Build and install tools for cross-linking to Darwin (aarch64) -RUN git clone https://github.com/tpoechtrager/cctools-port.git \ +RUN git clone --depth 1 https://github.com/tpoechtrager/cctools-port.git \ && cd cctools-port/cctools \ && ./configure --prefix=/cctools --with-libtapi=/cctools \ --target=aarch64-apple-darwin \ @@ -97,7 +91,8 @@ RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacO # NOTE: Seems like gcc-11 is too new for ubuntu20 repository RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ && apt-get update \ - && apt-get install gcc-11 g++-11 --yes + && apt-get install gcc-11 g++-11 --yes \ + && apt-get clean COPY build.sh / diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 873edfe4afc..89c34846efa 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -64,8 +64,14 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ && apt-get install gcc-11 g++-11 --yes -# This symlink required by gcc to find lld compiler -RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld +# These symlinks are required: +# /usr/bin/ld.lld: by gcc to find lld compiler +# /usr/bin/aarch64-linux-gnu-obj*: for debug symbols stripping +RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld \ + && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objcopy /usr/bin/aarch64-linux-gnu-strip \ + && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objcopy /usr/bin/aarch64-linux-gnu-objcopy \ + && ln -sf /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-objdump /usr/bin/aarch64-linux-gnu-objdump + COPY build.sh / diff --git a/docker/packager/packager b/docker/packager/packager index 9cce12be949..c042db2251d 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -29,7 +29,13 @@ def pull_image(image_name): return False def build_image(image_name, filepath): - subprocess.check_call("docker build --network=host -t {} -f {} .".format(image_name, filepath), shell=True) + context = os.path.dirname(filepath) + subprocess.check_call( + "docker build --network=host -t {} -f {} {}".format( + image_name, filepath, context + ), + shell=True, + ) def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache_dir, docker_image_version): env_part = " -e ".join(env_variables) @@ -90,6 +96,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ elif is_cross_arm: cc = compiler[:-len(ARM_SUFFIX)] cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake") + result.append("DEB_ARCH_FLAG=-aarm64") elif is_cross_freebsd: cc = compiler[:-len(FREEBSD_SUFFIX)] cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/freebsd/toolchain-x86_64.cmake") @@ -98,6 +105,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ cmake_flags.append("-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake") else: cc = compiler + result.append("DEB_ARCH_FLAG=-aamd64") cxx = cc.replace('gcc', 'g++').replace('clang', 'clang++') diff --git a/docker/server/README.md b/docker/server/README.md index c63bb980c13..5a96a63bb05 100644 --- a/docker/server/README.md +++ b/docker/server/README.md @@ -17,6 +17,8 @@ $ docker run -d --name some-clickhouse-server --ulimit nofile=262144:262144 clic By default ClickHouse will be accessible only via docker network. See the [networking section below](#networking). +By default, starting above server instance will be run as default user without password. + ### connect to it from a native client ```bash $ docker run -it --rm --link some-clickhouse-server:clickhouse-server clickhouse/clickhouse-client --host clickhouse-server diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index 25fabca67b5..d1059b3dacc 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -6,7 +6,7 @@ FROM clickhouse/binary-builder ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list -RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-9 libllvm9 libclang-9-dev +RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-13 libllvm13 libclang-13-dev # repo versions doesn't work correctly with C++17 # also we push reports to s3, so we add index.html to subfolder urls @@ -23,12 +23,12 @@ ENV SOURCE_DIRECTORY=/repo_folder ENV BUILD_DIRECTORY=/build ENV HTML_RESULT_DIRECTORY=$BUILD_DIRECTORY/html_report ENV SHA=nosha -ENV DATA="data" +ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data" CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \ cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-13 -DCMAKE_C_COMPILER=/usr/bin/clang-13 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \ mkdir -p $HTML_RESULT_DIRECTORY && \ $CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \ cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\ - $CODEINDEX $HTML_RESULT_DIRECTORY -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \ + $CODEINDEX $HTML_RESULT_DIRECTORY -d "$DATA" | ts '%Y-%m-%d %H:%M:%S' && \ mv $HTML_RESULT_DIRECTORY /test_output diff --git a/docker/test/coverage/Dockerfile b/docker/test/coverage/Dockerfile deleted file mode 100644 index ccf0bbc7c83..00000000000 --- a/docker/test/coverage/Dockerfile +++ /dev/null @@ -1,18 +0,0 @@ -# docker build -t clickhouse/test-coverage . -FROM clickhouse/stateless-test - -RUN apt-get update -y \ - && env DEBIAN_FRONTEND=noninteractive \ - apt-get install --yes --no-install-recommends \ - cmake - -COPY s3downloader /s3downloader -COPY run.sh /run.sh - -ENV DATASETS="hits visits" -ENV COVERAGE_DIR=/coverage_reports -ENV SOURCE_DIR=/build -ENV OUTPUT_DIR=/output -ENV IGNORE='.*contrib.*' - -CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/coverage/run.sh b/docker/test/coverage/run.sh deleted file mode 100755 index 807efdf1e47..00000000000 --- a/docker/test/coverage/run.sh +++ /dev/null @@ -1,112 +0,0 @@ -#!/bin/bash - -kill_clickhouse () { - echo "clickhouse pids $(pgrep -u clickhouse)" | ts '%Y-%m-%d %H:%M:%S' - pkill -f "clickhouse-server" 2>/dev/null - - - for _ in {1..120} - do - if ! pkill -0 -f "clickhouse-server" ; then break ; fi - echo "ClickHouse still alive" | ts '%Y-%m-%d %H:%M:%S' - sleep 1 - done - - if pkill -0 -f "clickhouse-server" - then - pstree -apgT - jobs - echo "Failed to kill the ClickHouse server" | ts '%Y-%m-%d %H:%M:%S' - return 1 - fi -} - -start_clickhouse () { - LLVM_PROFILE_FILE='server_%h_%p_%m.profraw' sudo -Eu clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml & - counter=0 - until clickhouse-client --query "SELECT 1" - do - if [ "$counter" -gt 120 ] - then - echo "Cannot start clickhouse-server" - cat /var/log/clickhouse-server/stdout.log - tail -n1000 /var/log/clickhouse-server/stderr.log - tail -n1000 /var/log/clickhouse-server/clickhouse-server.log - break - fi - sleep 0.5 - counter=$((counter + 1)) - done -} - - -chmod 777 / - -dpkg -i package_folder/clickhouse-common-static_*.deb; \ - dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \ - dpkg -i package_folder/clickhouse-server_*.deb; \ - dpkg -i package_folder/clickhouse-client_*.deb; \ - dpkg -i package_folder/clickhouse-test_*.deb - -mkdir -p /var/lib/clickhouse -mkdir -p /var/log/clickhouse-server -chmod 777 -R /var/log/clickhouse-server/ - -# install test configs -/usr/share/clickhouse-test/config/install.sh - -start_clickhouse - -# shellcheck disable=SC2086 # No quotes because I want to split it into words. -if ! /s3downloader --dataset-names $DATASETS; then - echo "Cannot download datatsets" - exit 1 -fi - - -chmod 777 -R /var/lib/clickhouse - - -LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "SHOW DATABASES" -LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary" -LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "CREATE DATABASE test" - -kill_clickhouse -start_clickhouse - -LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "SHOW TABLES FROM datasets" -LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "SHOW TABLES FROM test" -LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" -LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" -LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "SHOW TABLES FROM test" - -LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-test -j 8 --testname --shard --zookeeper --print-time 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee /test_result.txt - -readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "/test_result.txt") - -kill_clickhouse - -sleep 3 - -if [[ -n "${FAILED_TESTS[*]}" ]] -then - # Clean the data so that there is no interference from the previous test run. - rm -rf /var/lib/clickhouse/{{meta,}data,user_files} ||: - - start_clickhouse - - echo "Going to run again: ${FAILED_TESTS[*]}" - - LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-test --order=random --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a /test_result.txt -else - echo "No failed tests" -fi - -mkdir -p "$COVERAGE_DIR" -mv /*.profraw "$COVERAGE_DIR" - -mkdir -p "$SOURCE_DIR"/obj-x86_64-linux-gnu -cd "$SOURCE_DIR"/obj-x86_64-linux-gnu && CC=clang-11 CXX=clang++-11 cmake .. && cd / -llvm-profdata-11 merge -sparse "${COVERAGE_DIR}"/* -o clickhouse.profdata -llvm-cov-11 export /usr/bin/clickhouse -instr-profile=clickhouse.profdata -j=16 -format=lcov -skip-functions -ignore-filename-regex "$IGNORE" > output.lcov -genhtml output.lcov --ignore-errors source --output-directory "${OUTPUT_DIR}" diff --git a/docker/test/coverage/s3downloader b/docker/test/coverage/s3downloader deleted file mode 100755 index eb3b3cd9faf..00000000000 --- a/docker/test/coverage/s3downloader +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -import os -import sys -import time -import tarfile -import logging -import argparse -import requests -import tempfile - - -DEFAULT_URL = 'https://clickhouse-datasets.s3.yandex.net' - -AVAILABLE_DATASETS = { - 'hits': 'hits_v1.tar', - 'visits': 'visits_v1.tar', -} - -RETRIES_COUNT = 5 - -def _get_temp_file_name(): - return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) - -def build_url(base_url, dataset): - return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset]) - -def dowload_with_progress(url, path): - logging.info("Downloading from %s to temp path %s", url, path) - for i in range(RETRIES_COUNT): - try: - with open(path, 'wb') as f: - response = requests.get(url, stream=True) - response.raise_for_status() - total_length = response.headers.get('content-length') - if total_length is None or int(total_length) == 0: - logging.info("No content-length, will download file without progress") - f.write(response.content) - else: - dl = 0 - total_length = int(total_length) - logging.info("Content length is %ld bytes", total_length) - for data in response.iter_content(chunk_size=4096): - dl += len(data) - f.write(data) - if sys.stdout.isatty(): - done = int(50 * dl / total_length) - percent = int(100 * float(dl) / total_length) - sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent)) - sys.stdout.flush() - break - except Exception as ex: - sys.stdout.write("\n") - time.sleep(3) - logging.info("Exception while downloading %s, retry %s", ex, i + 1) - if os.path.exists(path): - os.remove(path) - else: - raise Exception("Cannot download dataset from {}, all retries exceeded".format(url)) - - sys.stdout.write("\n") - logging.info("Downloading finished") - -def unpack_to_clickhouse_directory(tar_path, clickhouse_path): - logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path) - with tarfile.open(tar_path, 'r') as comp_file: - comp_file.extractall(path=clickhouse_path) - logging.info("Unpack finished") - - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) - - parser = argparse.ArgumentParser( - description="Simple tool for dowloading datasets for clickhouse from S3") - - parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys())) - parser.add_argument('--url-prefix', default=DEFAULT_URL) - parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/') - - args = parser.parse_args() - datasets = args.dataset_names - logging.info("Will fetch following datasets: %s", ', '.join(datasets)) - for dataset in datasets: - logging.info("Processing %s", dataset) - temp_archive_path = _get_temp_file_name() - try: - download_url_for_dataset = build_url(args.url_prefix, dataset) - dowload_with_progress(download_url_for_dataset, temp_archive_path) - unpack_to_clickhouse_directory(temp_archive_path, args.clickhouse_data_path) - except Exception as ex: - logging.info("Some exception occured %s", str(ex)) - raise - finally: - logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path) - if os.path.exists(temp_archive_path): - os.remove(temp_archive_path) - logging.info("Processing of %s finished", dataset) - logging.info("Fetch finished, enjoy your tables!") - - diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index baa5945d347..24168cea330 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -173,6 +173,8 @@ function clone_submodules contrib/dragonbox contrib/fast_float contrib/NuRaft + contrib/jemalloc + contrib/replxx ) git submodule sync @@ -193,6 +195,8 @@ function run_cmake "-DENABLE_THINLTO=0" "-DUSE_UNWIND=1" "-DENABLE_NURAFT=1" + "-DENABLE_JEMALLOC=1" + "-DENABLE_REPLXX=1" ) # TODO remove this? we don't use ccache anyway. An option would be to download it @@ -253,7 +257,13 @@ function run_tests start_server set +e - time clickhouse-test --hung-check -j 8 --order=random \ + local NPROC + NPROC=$(nproc) + NPROC=$((NPROC / 2)) + if [[ $NPROC == 0 ]]; then + NPROC=1 + fi + time clickhouse-test --hung-check -j "${NPROC}" --order=random \ --fast-tests-only --no-long --testname --shard --zookeeper --check-zookeeper-session \ -- "$FASTTEST_FOCUS" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 5d44e542269..1ebaed752a6 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -52,9 +52,21 @@ function clone } +function wget_with_retry +{ + for _ in 1 2 3 4; do + if wget -nv -nd -c "$1";then + return 0 + else + sleep 0.5 + fi + done + return 1 +} + function download { - wget -nv -nd -c "$BINARY_URL_TO_DOWNLOAD" + wget_with_retry "$BINARY_URL_TO_DOWNLOAD" chmod +x clickhouse ln -s ./clickhouse ./clickhouse-server @@ -175,6 +187,15 @@ info signals continue backtrace full info locals +info registers +disassemble /s +up +info locals +disassemble /s +up +info locals +disassemble /s +p \"done\" detach quit " > script.gdb @@ -185,8 +206,8 @@ quit time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||: # Check connectivity after we attach gdb, because it might cause the server - # to freeze and the fuzzer will fail. - for _ in {1..60} + # to freeze and the fuzzer will fail. In debug build it can take a lot of time. + for _ in {1..180} do sleep 1 if clickhouse-client --query "select 1" diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index a953a8a904a..6a40fea7500 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -72,11 +72,13 @@ RUN python3 -m pip install \ grpcio-tools \ kafka-python \ kazoo \ + lz4 \ minio \ protobuf \ psycopg2-binary==2.8.6 \ pymongo==3.11.0 \ pytest \ + pytest-order==1.0.0 \ pytest-timeout \ pytest-xdist \ pytest-repeat \ diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index 5a021036b26..ad8a8e4eb84 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -8,8 +8,8 @@ echo '{ "ip-forward": true, "log-level": "debug", "storage-driver": "overlay2", - "insecure-registries" : ["dockerhub-proxy.sas.yp-c.yandex.net:5000"], - "registry-mirrors" : ["http://dockerhub-proxy.sas.yp-c.yandex.net:5000"] + "insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"], + "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] }' | dd of=/etc/docker/daemon.json 2>/dev/null dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log & diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 6d4f1222432..3d37a6c0e92 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -11,6 +11,20 @@ if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then COMMON_BUILD_PREFIX="" fi +# Sometimes AWS responde with DNS error and it's impossible to retry it with +# current curl version options. +function curl_with_retry +{ + for _ in 1 2 3 4; do + if curl --fail --head "$1";then + return 0 + else + sleep 0.5 + fi + done + return 1 +} + # Use the packaged repository to find the revision we will compare to. function find_reference_sha { @@ -55,7 +69,7 @@ function find_reference_sha ) for path in "${urls_to_try[@]}" do - if curl --fail --retry 5 --retry-delay 1 --retry-max-time 15 --head "$path" + if curl_with_retry "$path" then found="$path" break @@ -76,7 +90,7 @@ chmod 777 workspace output cd workspace # Download the package for the version we are going to test. -if curl --fail --retry 5 --retry-delay 1 --retry-max-time 15 --head "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz" +if curl_with_retry "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz" then right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/performance/performance.tgz" fi diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index 5dc32ebcc22..c236b3a51d1 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -42,7 +42,7 @@ ENV CCACHE_DIR=/test_output/ccache CMD echo "Running PVS version $PKG_VERSION" && mkdir -p $CCACHE_DIR && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"DISABLE_HERMETIC_BUILD"=ON -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang\+\+-13 \ && ninja re2_st clickhouse_grpc_protos \ - && pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \ + && pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j "$(nproc)" -l ./licence.lic; \ cp /repo_folder/pvs-studio.log /test_output; \ plog-converter -a GA:1,2 -t fullhtml -o /test_output/pvs-studio-html-report pvs-studio.log; \ plog-converter -a GA:1,2 -t tasklist -o /test_output/pvs-studio-task-report.txt pvs-studio.log diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 138da284d5a..2efb62689ff 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -148,6 +148,15 @@ info signals continue backtrace full info locals +info registers +disassemble /s +up +info locals +disassemble /s +up +info locals +disassemble /s +p \"done\" detach quit " > script.gdb diff --git a/docker/test/testflows/runner/dockerd-entrypoint.sh b/docker/test/testflows/runner/dockerd-entrypoint.sh index 8abbd9e1c8e..0e15396082a 100755 --- a/docker/test/testflows/runner/dockerd-entrypoint.sh +++ b/docker/test/testflows/runner/dockerd-entrypoint.sh @@ -5,8 +5,8 @@ echo "Configure to use Yandex dockerhub-proxy" mkdir -p /etc/docker/ cat > /etc/docker/daemon.json << EOF { - "insecure-registries" : ["dockerhub-proxy.sas.yp-c.yandex.net:5000"], - "registry-mirrors" : ["http://dockerhub-proxy.sas.yp-c.yandex.net:5000"] + "insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"], + "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] } EOF diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index ea32f608124..5f3245c4d60 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -106,20 +106,20 @@ Build ClickHouse. Run ClickHouse from the terminal: change directory to `program Note that all clickhouse tools (server, client, etc) are just symlinks to a single binary named `clickhouse`. You can find this binary at `programs/clickhouse`. All tools can also be invoked as `clickhouse tool` instead of `clickhouse-tool`. -Alternatively you can install ClickHouse package: either stable release from Yandex repository or you can build package for yourself with `./release` in ClickHouse sources root. Then start the server with `sudo service clickhouse-server start` (or stop to stop the server). Look for logs at `/etc/clickhouse-server/clickhouse-server.log`. +Alternatively you can install ClickHouse package: either stable release from ClickHouse repository or you can build package for yourself with `./release` in ClickHouse sources root. Then start the server with `sudo clickhouse start` (or stop to stop the server). Look for logs at `/etc/clickhouse-server/clickhouse-server.log`. When ClickHouse is already installed on your system, you can build a new `clickhouse` binary and replace the existing binary: ``` bash -$ sudo service clickhouse-server stop +$ sudo clickhouse stop $ sudo cp ./clickhouse /usr/bin/ -$ sudo service clickhouse-server start +$ sudo clickhouse start ``` Also you can stop system clickhouse-server and run your own with the same configuration but with logging to terminal: ``` bash -$ sudo service clickhouse-server stop +$ sudo clickhouse stop $ sudo -u clickhouse /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml ``` @@ -257,9 +257,9 @@ There are five variants (Debug, ASan, TSan, MSan, UBSan). Thread Fuzzer (please don't mix up with Thread Sanitizer) is another kind of fuzzing that allows to randomize thread order of execution. It helps to find even more special cases. -## Security Audit {#security-audit} +## Security Audit -People from Yandex Security Team do some basic overview of ClickHouse capabilities from the security standpoint. +People from Yandex Security Team did some basic overview of ClickHouse capabilities from the security standpoint. ## Static Analyzers {#static-analyzers} @@ -326,15 +326,11 @@ There is automated check for flaky tests. It runs all new tests 100 times (for f ## Testflows -[Testflows](https://testflows.com/) is an enterprise-grade testing framework. It is used by Altinity for some of the tests and we run these tests in our CI. - -## Yandex Checks (only for Yandex employees) - -These checks are importing ClickHouse code into Yandex internal monorepository, so ClickHouse codebase can be used as a library by other products at Yandex (YT and YDB). Note that clickhouse-server itself is not being build from internal repo and unmodified open-source build is used for Yandex applications. +[Testflows](https://testflows.com/) is an enterprise-grade open-source testing framework, which is used to test a subset of ClickHouse. ## Test Automation {#test-automation} -We run tests with Yandex internal CI and job automation system named “Sandbox”. +We run tests with [GitHub Actions](https://github.com/features/actions). Build jobs and tests are run in Sandbox on per commit basis. Resulting packages and test results are published in GitHub and can be downloaded by direct links. Artifacts are stored for several months. When you send a pull request on GitHub, we tag it as “can be tested” and our CI system will build ClickHouse packages (release, debug, with address sanitizer, etc) for you. diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index 944264b68a3..cdc904f1e94 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -83,6 +83,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree]( | VARCHAR, VAR_STRING | [String](../../sql-reference/data-types/string.md) | | BLOB | [String](../../sql-reference/data-types/string.md) | | BINARY | [FixedString](../../sql-reference/data-types/fixedstring.md) | +| BIT | [UInt64](../../sql-reference/data-types/int-uint.md) | [Nullable](../../sql-reference/data-types/nullable.md) is supported. @@ -150,20 +151,38 @@ Table overrides can be used to customize the ClickHouse DDL queries, allowing yo application. This is especially useful for controlling partitioning, which is important for the overall performance of MaterializedMySQL. +These are the schema conversion manipulations you can do with table overrides for MaterializedMySQL: + + * Modify column type. Must be compatible with the original type, or replication will fail. For example, + you can modify a UInt32 column to UInt64, but you can not modify a String column to Array(String). + * Modify [column TTL](../table-engines/mergetree-family/mergetree/#mergetree-column-ttl). + * Modify [column compression codec](../../sql-reference/statements/create/table/#codecs). + * Add [ALIAS columns](../../sql-reference/statements/create/table/#alias). + * Add [skipping indexes](../table-engines/mergetree-family/mergetree/#table_engine-mergetree-data_skipping-indexes) + * Add [projections](../table-engines/mergetree-family/mergetree/#projections). Note that projection optimizations are + disabled when using `SELECT ... FINAL` (which MaterializedMySQL does by default), so their utility is limited here. + `INDEX ... TYPE hypothesis` as [described in the v21.12 blog post]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/) + may be more useful in this case. + * Modify [PARTITION BY](../table-engines/mergetree-family/custom-partitioning-key/) + * Modify [ORDER BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses) + * Modify [PRIMARY KEY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses) + * Add [SAMPLE BY](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses) + * Add [table TTL](../table-engines/mergetree-family/mergetree/#mergetree-query-clauses) + ```sql CREATE DATABASE db_name ENGINE = MaterializedMySQL(...) [SETTINGS ...] [TABLE OVERRIDE table_name ( [COLUMNS ( - [name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], ...] - [INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1, ...] - [PROJECTION projection_name_1 (SELECT [GROUP BY] [ORDER BY]), ...] - )] - [ORDER BY expr] - [PRIMARY KEY expr] - [PARTITION BY expr] - [SAMPLE BY expr] - [TTL expr] + [col_name [datatype] [ALIAS expr] [CODEC(...)] [TTL expr], ...] + [INDEX index_name expr TYPE indextype[(...)] GRANULARITY val, ...] + [PROJECTION projection_name (SELECT [GROUP BY] [ORDER BY]), ...] + )] + [ORDER BY expr] + [PRIMARY KEY expr] + [PARTITION BY expr] + [SAMPLE BY expr] + [TTL expr] ), ...] ``` @@ -173,34 +192,34 @@ Example: CREATE DATABASE db_name ENGINE = MaterializedMySQL(...) TABLE OVERRIDE table1 ( COLUMNS ( - userid UUID, - category LowCardinality(String), - timestamp DateTime CODEC(Delta, Default) + userid UUID, + category LowCardinality(String), + timestamp DateTime CODEC(Delta, Default) ) PARTITION BY toYear(timestamp) ), TABLE OVERRIDE table2 ( COLUMNS ( - ip_hash UInt32 MATERIALIZED xxHash32(client_ip), - client_ip String TTL created + INTERVAL 72 HOUR - ) - SAMPLE BY ip_hash + client_ip String TTL created + INTERVAL 72 HOUR + ) + SAMPLE BY ip_hash ) ``` -The `COLUMNS` list is sparse; it contains only modified or extra (MATERIALIZED or ALIAS) columns. Modified columns with -a different type must be assignable from the original type. There is currently no validation of this or similar issues -when the `CREATE DATABASE` query executes, so extra care needs to be taken. +The `COLUMNS` list is sparse; existing columns are modified as specified, extra ALIAS columns are added. It is not +possible to add ordinary or MATERIALIZED columns. Modified columns with a different type must be assignable from the +original type. There is currently no validation of this or similar issues when the `CREATE DATABASE` query executes, so +extra care needs to be taken. You may specify overrides for tables that do not exist yet. -!!! note "Warning" - It is easy to break replication with TABLE OVERRIDEs if not used with care. For example: +!!! warning "Warning" + It is easy to break replication with table overrides if not used with care. For example: - * If a column is added with a table override, but then later added to the source MySQL table, the converted ALTER TABLE - query in ClickHouse will fail because the column already exists. + * If an ALIAS column is added with a table override, and a column with the same name is later added to the source + MySQL table, the converted ALTER TABLE query in ClickHouse will fail and replication stops. * It is currently possible to add overrides that reference nullable columns where not-nullable are required, such as in - `ORDER BY` or `PARTITION BY`. + `ORDER BY` or `PARTITION BY`. This will cause CREATE TABLE queries that will fail, also causing replication to stop. ## Examples of Use {#examples-of-use} @@ -217,11 +236,9 @@ mysql> SELECT * FROM test; ``` ```text -+---+------+------+ -| a | b | c | -+---+------+------+ -| 2 | 222 | Wow! | -+---+------+------+ +┌─a─┬───b─┬─c────┐ +│ 2 │ 222 │ Wow! │ +└───┴─────┴──────┘ ``` Database in ClickHouse, exchanging data with the MySQL server: diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md index d2c4dbf1f3c..4dea156f32e 100644 --- a/docs/en/engines/database-engines/materialized-postgresql.md +++ b/docs/en/engines/database-engines/materialized-postgresql.md @@ -5,15 +5,15 @@ toc_title: MaterializedPostgreSQL # [experimental] MaterializedPostgreSQL {#materialize-postgresql} -Creates ClickHouse database with an initial data dump of PostgreSQL database tables and starts replication process, i.e. executes background job to apply new changes as they happen on PostgreSQL database tables in the remote PostgreSQL database. +Creates a ClickHouse database with tables from PostgreSQL database. Firstly, database with engine `MaterializedPostgreSQL` creates a snapshot of PostgreSQL database and loads required tables. Required tables can include any subset of tables from any subset of schemas from specified database. Along with the snapshot database engine acquires LSN and once initial dump of tables is performed - it starts pulling updates from WAL. After database is created, newly added tables to PostgreSQL database are not automatically added to replication. They have to be added manually with `ATTACH TABLE db.table` query. -ClickHouse server works as PostgreSQL replica. It reads WAL and performs DML queries. DDL is not replicated, but can be handled (described below). +Replication is implemented with PostgreSQL Logical Replication Protocol, which does not allow to replicate DDL, but allows to know whether replication breaking changes happened (column type changes, adding/removing columns). Such changes are detected and according tables stop receiving updates. Such tables can be automatically reloaded in the background in case required setting is turned on. Safest way for now is to use `ATTACH`/ `DETACH` queries to reload table completely. If DDL does not break replication (for example, renaming a column) table will still receive updates (insertion is done by position). ## Creating a Database {#creating-a-database} ``` sql CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] -ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'password') [SETTINGS ...] +ENGINE = MaterializedPostgreSQL('host:port', 'database', 'user', 'password') [SETTINGS ...] ``` **Engine Parameters** @@ -23,51 +23,39 @@ ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'p - `user` — PostgreSQL user. - `password` — User password. +## Example of Use {#example-of-use} + +``` sql +CREATE DATABASE postgresql; +ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password'); + +SHOW TABLES FROM postgres_db; + +┌─name───┐ +│ table1 │ +└────────┘ + +SELECT * FROM postgresql_db.postgres_table; +``` + ## Dynamically adding new tables to replication {#dynamically-adding-table-to-replication} +After `MaterializedPostgreSQL` database is created, it does not automatically detect new tables in according PostgreSQL database. Such tables can be added manually: + ``` sql ATTACH TABLE postgres_database.new_table; ``` -When specifying a specific list of tables in the database using the setting [materialized_postgresql_tables_list](../../operations/settings/settings.md#materialized-postgresql-tables-list), it will be updated to the current state, taking into account the tables which were added by the `ATTACH TABLE` query. +Warning: before version 21.13 adding table to replication left unremoved temprorary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in clickhouse version before 21.13, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. Issue is fixed in 21.13. ## Dynamically removing tables from replication {#dynamically-removing-table-from-replication} +It is possible to remove specific tables from replication: + ``` sql DETACH TABLE postgres_database.table_to_remove; ``` -## Settings {#settings} - -- [materialized_postgresql_tables_list](../../operations/settings/settings.md#materialized-postgresql-tables-list) - -- [materialized_postgresql_schema](../../operations/settings/settings.md#materialized-postgresql-schema) - -- [materialized_postgresql_schema_list](../../operations/settings/settings.md#materialized-postgresql-schema-list) - -- [materialized_postgresql_allow_automatic_update](../../operations/settings/settings.md#materialized-postgresql-allow-automatic-update) - -- [materialized_postgresql_max_block_size](../../operations/settings/settings.md#materialized-postgresql-max-block-size) - -- [materialized_postgresql_replication_slot](../../operations/settings/settings.md#materialized-postgresql-replication-slot) - -- [materialized_postgresql_snapshot](../../operations/settings/settings.md#materialized-postgresql-snapshot) - -``` sql -CREATE DATABASE database1 -ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password') -SETTINGS materialized_postgresql_tables_list = 'table1,table2,table3'; - -SELECT * FROM database1.table1; -``` - -The settings can be changed, if necessary, using a DDL query. But it is impossible to change the setting `materialized_postgresql_tables_list`. To update the list of tables in this setting use the `ATTACH TABLE` query. - -``` sql -ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = ; -``` - - ## PostgreSQL schema {#schema} PostgreSQL [schema](https://www.postgresql.org/docs/9.1/ddl-schemas.html) can be configured in 3 ways (starting from version 21.12). @@ -150,13 +138,63 @@ WHERE oid = 'postgres_table'::regclass; !!! warning "Warning" Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used. -## Example of Use {#example-of-use} +## Settings {#settings} + +1. materialized_postgresql_tables_list {#materialized-postgresql-tables-list} + +Sets a comma-separated list of PostgreSQL database tables, which will be replicated via [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) database engine. + +Default value: empty list — means whole PostgreSQL database will be replicated. + +2. materialized_postgresql_schema {#materialized-postgresql-schema} + +Default value: empty string. (Default schema is used) + +3. materialized_postgresql_schema_list {#materialized-postgresql-schema-list} + +Default value: empty list. (Default schema is used) + +4. materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update} + +Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them. + +Possible values: + +- 0 — The table is not automatically updated in the background, when schema changes are detected. +- 1 — The table is automatically updated in the background, when schema changes are detected. + +Default value: `0`. + +5. materialized_postgresql_max_block_size {#materialized-postgresql-max-block-size} + +Sets the number of rows collected in memory before flushing data into PostgreSQL database table. + +Possible values: + +- Positive integer. + +Default value: `65536`. + +6. materialized_postgresql_replication_slot {#materialized-postgresql-replication-slot} + +A user-created replication slot. Must be used together with `materialized_postgresql_snapshot`. + +7. materialized_postgresql_snapshot {#materialized-postgresql-snapshot} + +A text string identifying a snapshot, from which [initial dump of PostgreSQL tables](../../engines/database-engines/materialized-postgresql.md) will be performed. Must be used together with `materialized_postgresql_replication_slot`. ``` sql -CREATE DATABASE postgresql_db -ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password'); +CREATE DATABASE database1 +ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password') +SETTINGS materialized_postgresql_tables_list = 'table1,table2,table3'; -SELECT * FROM postgresql_db.postgres_table; +SELECT * FROM database1.table1; +``` + +The settings can be changed, if necessary, using a DDL query. But it is impossible to change the setting `materialized_postgresql_tables_list`. To update the list of tables in this setting use the `ATTACH TABLE` query. + +``` sql +ALTER DATABASE postgres_database MODIFY SETTING materialized_postgresql_max_block_size = ; ``` ## Notes {#notes} @@ -165,11 +203,11 @@ SELECT * FROM postgresql_db.postgres_table; Logical Replication Slots which exist on the primary are not available on standby replicas. So if there is a failover, new primary (the old physical standby) won’t be aware of any slots which were existing with old primary. This will lead to a broken replication from PostgreSQL. -A solution to this is to manage replication slots yourself and define a permanent replication slot (some information can be found [here](https://patroni.readthedocs.io/en/latest/SETTINGS.html)). You'll need to pass slot name via [materialized_postgresql_replication_slot](../../operations/settings/settings.md#materialized-postgresql-replication-slot) setting, and it has to be exported with `EXPORT SNAPSHOT` option. The snapshot identifier needs to be passed via [materialized_postgresql_snapshot](../../operations/settings/settings.md#materialized-postgresql-snapshot) setting. +A solution to this is to manage replication slots yourself and define a permanent replication slot (some information can be found [here](https://patroni.readthedocs.io/en/latest/SETTINGS.html)). You'll need to pass slot name via `materialized_postgresql_replication_slot` setting, and it has to be exported with `EXPORT SNAPSHOT` option. The snapshot identifier needs to be passed via `materialized_postgresql_snapshot` setting. Please note that this should be used only if it is actually needed. If there is no real need for that or full understanding why, then it is better to allow the table engine to create and manage its own replication slot. -**Example (from [@bchrobot](https://github.com/bchrobot))** +**Example (from [@bchrobot](https://github.com/bchrobot))** 1. Configure replication slot in PostgreSQL. @@ -214,3 +252,23 @@ SETTINGS ```bash kubectl exec acid-demo-cluster-0 -c postgres -- su postgres -c 'patronictl failover --candidate acid-demo-cluster-1 --force' ``` + +### Required permissions + +1. [CREATE PUBLICATION](https://postgrespro.ru/docs/postgresql/14/sql-createpublication) -- create query privilege. + +2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privelege. + +3. [pg_drop_replication_slot](https://postgrespro.ru/docs/postgrespro/9.5/functions-admin#functions-replication) -- replication privilege or superuser. + +4. [DROP PUBLICATION](https://postgrespro.ru/docs/postgresql/10/sql-droppublication) -- owner of publication (`username` in MaterializedPostgreSQL engine itself). + +It is possible to avoid executing `2` and `3` commands and having those permissions. Use settings `materialized_postgresql_replication_slot` and `materialized_postgresql_snapshot`. But with much care. + +Access to tables: + +1. pg_publication + +2. pg_replication_slots + +3. pg_publication_tables diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 0fcf7a63dd8..0d6d90f9d31 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -5,8 +5,7 @@ toc_title: HDFS # HDFS {#table_engines-hdfs} -This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html) via ClickHouse. This engine is similar -to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features. +This engine provides integration with the [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html) via ClickHouse. This engine is similar to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features. ## Usage {#usage} @@ -14,12 +13,13 @@ to the [File](../../../engines/table-engines/special/file.md#table_engines-file) ENGINE = HDFS(URI, format) ``` -The `URI` parameter is the whole file URI in HDFS. -The `format` parameter specifies one of the available file formats. To perform +**Engine Parameters** + +- `URI` - whole file URI in HDFS. The path part of `URI` may contain globs. In this case the table would be readonly. +- `format` - specifies one of the available file formats. To perform `SELECT` queries, the format must be supported for input, and to perform `INSERT` queries – for output. The available formats are listed in the [Formats](../../../interfaces/formats.md#formats) section. -The path part of `URI` may contain globs. In this case the table would be readonly. **Example:** @@ -71,12 +71,12 @@ Constructions with `{}` are similar to the [remote](../../../sql-reference/table 1. Suppose we have several files in TSV format with the following URIs on HDFS: -- 'hdfs://hdfs1:9000/some_dir/some_file_1' -- 'hdfs://hdfs1:9000/some_dir/some_file_2' -- 'hdfs://hdfs1:9000/some_dir/some_file_3' -- 'hdfs://hdfs1:9000/another_dir/some_file_1' -- 'hdfs://hdfs1:9000/another_dir/some_file_2' -- 'hdfs://hdfs1:9000/another_dir/some_file_3' + - 'hdfs://hdfs1:9000/some_dir/some_file_1' + - 'hdfs://hdfs1:9000/some_dir/some_file_2' + - 'hdfs://hdfs1:9000/some_dir/some_file_3' + - 'hdfs://hdfs1:9000/another_dir/some_file_1' + - 'hdfs://hdfs1:9000/another_dir/some_file_2' + - 'hdfs://hdfs1:9000/another_dir/some_file_3' 1. There are several ways to make a table consisting of all six files: @@ -132,6 +132,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us | **parameter** | **default value** | +| - | - | | rpc\_client\_connect\_tcpnodelay | true | | dfs\_client\_read\_shortcircuit | true | | output\_replace-datanode-on-failure | true | @@ -181,25 +182,26 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us #### ClickHouse extras {#clickhouse-extras} | **parameter** | **default value** | +| - | - | |hadoop\_kerberos\_keytab | "" | |hadoop\_kerberos\_principal | "" | |hadoop\_kerberos\_kinit\_command | kinit | |libhdfs3\_conf | "" | ### Limitations {#limitations} - * hadoop\_security\_kerberos\_ticket\_cache\_path and libhdfs3\_conf can be global only, not user specific +* `hadoop_security_kerberos_ticket_cache_path` and `libhdfs3_conf` can be global only, not user specific ## Kerberos support {#kerberos-support} -If hadoop\_security\_authentication parameter has value 'kerberos', ClickHouse authentifies via Kerberos facility. -Parameters [here](#clickhouse-extras) and hadoop\_security\_kerberos\_ticket\_cache\_path may be of help. +If the `hadoop_security_authentication` parameter has the value `kerberos`, ClickHouse authenticates via Kerberos. +Parameters are [here](#clickhouse-extras) and `hadoop_security_kerberos_ticket_cache_path` may be of help. Note that due to libhdfs3 limitations only old-fashioned approach is supported, -datanode communications are not secured by SASL (HADOOP\_SECURE\_DN\_USER is a reliable indicator of such -security approach). Use tests/integration/test\_storage\_kerberized\_hdfs/hdfs_configs/bootstrap.sh for reference. +datanode communications are not secured by SASL (`HADOOP_SECURE_DN_USER` is a reliable indicator of such +security approach). Use `tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh` for reference. -If hadoop\_kerberos\_keytab, hadoop\_kerberos\_principal or hadoop\_kerberos\_kinit\_command is specified, kinit will be invoked. hadoop\_kerberos\_keytab and hadoop\_kerberos\_principal are mandatory in this case. kinit tool and krb5 configuration files are required. +If `hadoop_kerberos_keytab`, `hadoop_kerberos_principal` or `hadoop_kerberos_kinit_command` is specified, `kinit` will be invoked. `hadoop_kerberos_keytab` and `hadoop_kerberos_principal` are mandatory in this case. `kinit` tool and krb5 configuration files are required. -## HDFS Namenode HA support{#namenode-ha} +## HDFS Namenode HA support {#namenode-ha} libhdfs3 support HDFS namenode HA. diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md index d02a11257c2..fa349e49af5 100644 --- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md +++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md @@ -7,7 +7,7 @@ toc_title: MaterializedPostgreSQL Creates ClickHouse table with an initial data dump of PostgreSQL table and starts replication process, i.e. executes background job to apply new changes as they happen on PostgreSQL table in the remote PostgreSQL database. -If more than one table is required, it is highly recommended to use the [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md) database engine instead of the table engine and use the [materialized_postgresql_tables_list](../../../operations/settings/settings.md#materialized-postgresql-tables-list) setting, which specifies the tables to be replicated. It will be much better in terms of CPU, fewer connections and fewer replication slots inside the remote PostgreSQL database. +If more than one table is required, it is highly recommended to use the [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md) database engine instead of the table engine and use the `materialized_postgresql_tables_list` setting, which specifies the tables to be replicated (will also be possible to add database `schema`). It will be much better in terms of CPU, fewer connections and fewer replication slots inside the remote PostgreSQL database. ## Creating a Table {#creating-a-table} @@ -38,7 +38,7 @@ PRIMARY KEY key; - `_version` — Transaction counter. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `_sign` — Deletion mark. Type: [Int8](../../../sql-reference/data-types/int-uint.md). Possible values: - - `1` — Row is not deleted, + - `1` — Row is not deleted, - `-1` — Row is deleted. These columns do not need to be added when a table is created. They are always accessible in `SELECT` query. diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index 0bdb54e0c16..789759ec521 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -36,6 +36,31 @@ The table structure can differ from the original PostgreSQL table structure: - `schema` — Non-default table schema. Optional. - `on conflict ...` — example: `ON CONFLICT DO NOTHING`. Optional. Note: adding this option will make insertion less efficient. +or via config (since version 21.11): + +``` + + + + + + +
+
+ + + + + + +
+``` + +Some parameters can be overriden by key value arguments: +``` sql +SELECT * FROM postgresql(postgres1, schema='schema1', table='table1'); +``` + ## Implementation Details {#implementation-details} `SELECT` queries on PostgreSQL side run as `COPY (SELECT ...) TO STDOUT` inside read-only PostgreSQL transaction with commit after each `SELECT` query. diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index ebb42461204..78c144ac76f 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -37,6 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [rabbitmq_skip_broken_messages = N,] [rabbitmq_max_block_size = N,] [rabbitmq_flush_interval_ms = N] + [rabbitmq_queue_settings_list = 'x-dead-letter-exchange=my-dlx,x-max-length=10,x-overflow=reject-publish'] ``` Required parameters: @@ -59,6 +60,7 @@ Optional parameters: - `rabbitmq_skip_broken_messages` – RabbitMQ message parser tolerance to schema-incompatible messages per block. Default: `0`. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` +- `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue. SSL connection: diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index 2711c76aeb6..5ac2105e9fd 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -66,9 +66,9 @@ WHERE table = 'visits' └───────────┴────────────────┴────────┘ ``` -The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](#alter_manipulations-with-partitions) queries. +The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md) queries. -The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](#alter_attach-partition) query. +The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query. Let’s break down the name of the first part: `201901_1_3_1`: diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index d169e137038..faa1026b919 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -39,10 +39,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] AS [db2.]name2 - `policy_name` - (optionally) policy name, it will be used to store temporary files for async send -See also: +**See Also** - - [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting - - [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples + - [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting + - [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples **Distributed Settings** diff --git a/docs/en/faq/general/how-do-i-contribute-code-to-clickhouse.md b/docs/en/faq/general/how-do-i-contribute-code-to-clickhouse.md new file mode 100644 index 00000000000..731dc9dface --- /dev/null +++ b/docs/en/faq/general/how-do-i-contribute-code-to-clickhouse.md @@ -0,0 +1,15 @@ +--- +title: How do I contribute code to ClickHouse? +toc_hidden: true +toc_priority: 120 +--- + +# How do I contribute code to ClickHouse? {#how-do-i-contribute-code-to-clickhouse} + +ClickHouse is an open-source project [developed on GitHub](https://github.com/ClickHouse/ClickHouse). + +As customary, contribution instructions are published in [CONTRIBUTING.md](https://github.com/ClickHouse/ClickHouse/blob/master/CONTRIBUTING.md) file in the root of the source code repository. + +If you want to suggest a substantial change to ClickHouse, consider [opening a GitHub issue](https://github.com/ClickHouse/ClickHouse/issues/new/choose) explaining what you want to do, to discuss it with maintainers and community first. [Examples of such RFC issues](https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aissue+is%3Aopen+rfc). + +If your contributions are security related, please check out [our security policy](https://github.com/ClickHouse/ClickHouse/security/policy/) too. diff --git a/docs/en/faq/general/index.md b/docs/en/faq/general/index.md index cd2368be1cf..51fff9a53ae 100644 --- a/docs/en/faq/general/index.md +++ b/docs/en/faq/general/index.md @@ -17,6 +17,7 @@ Questions: - [What is OLAP?](../../faq/general/olap.md) - [What is a columnar database?](../../faq/general/columnar-database.md) - [Why not use something like MapReduce?](../../faq/general/mapreduce.md) +- [How do I contribute code to ClickHouse?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md) !!! info "Don’t see what you were looking for?" Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. diff --git a/docs/en/faq/operations/index.md b/docs/en/faq/operations/index.md index c0a6d85b66d..81aec18b9cf 100644 --- a/docs/en/faq/operations/index.md +++ b/docs/en/faq/operations/index.md @@ -11,6 +11,7 @@ Questions: - [Which ClickHouse version to use in production?](../../faq/operations/production.md) - [Is it possible to delete old records from a ClickHouse table?](../../faq/operations/delete-old-data.md) +- [Does ClickHouse support multi-region replication?](../../faq/operations/multi-region-replication.md) !!! info "Don’t see what you were looking for?" Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. diff --git a/docs/en/faq/operations/multi-region-replication.md b/docs/en/faq/operations/multi-region-replication.md new file mode 100644 index 00000000000..7d78737544a --- /dev/null +++ b/docs/en/faq/operations/multi-region-replication.md @@ -0,0 +1,13 @@ +--- +title: Does ClickHouse support multi-region replication? +toc_hidden: true +toc_priority: 30 +--- + +# Does ClickHouse support multi-region replication? {#does-clickhouse-support-multi-region-replication} + +The short answer is "yes". However, we recommend keeping latency between all regions/datacenters in two-digit range, otherwise write performance will suffer as it goes through distributed consensus protocol. For example, replication between US coasts will likely work fine, but between the US and Europe won't. + +Configuration-wise there's no difference compared to single-region replication, simply use hosts that are located in different locations for replicas. + +For more information, see [full article on data replication](../../engines/table-engines/mergetree-family/replication.md). diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 4a97ab6589d..70a1b8349ff 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -142,6 +142,12 @@ On Gentoo, you can just use `emerge clickhouse` to install ClickHouse from sourc To start the server as a daemon, run: +``` bash +$ sudo clickhouse start +``` + +There are also another ways to run ClickHouse: + ``` bash $ sudo service clickhouse-server start ``` @@ -152,6 +158,12 @@ If you do not have `service` command, run as $ sudo /etc/init.d/clickhouse-server start ``` +If you have `systemctl` command, run as + +``` bash +$ sudo systemctl start clickhouse-server.service +``` + See the logs in the `/var/log/clickhouse-server/` directory. If the server does not start, check the configurations in the file `/etc/clickhouse-server/config.xml`. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 245a0c8fe89..f266d0e6354 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -204,7 +204,7 @@ When parsing with this format, tabs or linefeeds are not allowed in each field. This format is also available under the name `TSVRawWithNames`. -## TabSeparatedWithNamesAndTypes {#tabseparatedrawwithnamesandtypes} +## TabSeparatedRawWithNamesAndTypes {#tabseparatedrawwithnamesandtypes} Differs from `TabSeparatedWithNamesAndTypes` format in that the rows are written without escaping. When parsing with this format, tabs or linefeeds are not allowed in each field. diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index a2f0944de47..38e729fde0b 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -424,7 +424,10 @@ Next are the configuration methods for different `type`. `query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration. -The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully. +The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` settings, then queries the system table to check whether these settings were set successfully. + +!!! note "Warning" + To keep the default `handlers` such as` query`, `play`,` ping`, use the `` rule. Example: @@ -443,13 +446,14 @@ Example: SELECT name, value FROM system.settings WHERE name = {name_2:String} + ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2' +$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2' 1 -max_alter_threads 2 +max_final_threads 2 ``` !!! note "caution" @@ -461,7 +465,7 @@ In `dynamic_query_handler`, the query is written in the form of param of the HTT ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in. -To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully. +To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` and `queries` whether the settings were set successfully. Example: @@ -475,13 +479,14 @@ Example: query_param + ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2¶m_name_1=max_threads¶m_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D' +$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2¶m_name_1=max_threads¶m_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D' max_threads 1 -max_alter_threads 2 +max_final_threads 2 ``` ### static {#static} @@ -505,6 +510,7 @@ Return a message. Say Hi! + ``` diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index 342b1c9a496..a116c8e2222 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -6,7 +6,7 @@ toc_title: Client Libraries # Client Libraries from Third-party Developers {#client-libraries-from-third-party-developers} !!! warning "Disclaimer" - Yandex does **not** maintain the libraries listed below and hasn’t done any extensive testing to ensure their quality. + ClickHouse Inc does **not** maintain the libraries listed below and hasn’t done any extensive testing to ensure their quality. - Python - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index 593019bfb2e..393974c60c4 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -220,4 +220,24 @@ SeekTable is [free](https://www.seektable.com/help/cloud-pricing) for personal/i [Chadmin](https://github.com/bun4uk/chadmin) is a simple UI where you can visualize your currently running queries on your ClickHouse cluster and info about them and kill them if you want. +### TABLUM.IO {#tablum_io} + +[TABLUM.IO](https://tablum.io/) — an online query and analytics tool for ETL and visualization. It allows connecting to ClickHouse, query data via a versatile SQL console as well as to load data from static files and 3rd party services. TABLUM.IO can visualize data results as charts and tables. + +Features: +- ETL: data loading from popular databases, local and remote files, API invocations. +- Versatile SQL console with syntax highlight and visual query builder. +- Data visualization as charts and tables. +- Data materialization and sub-queries. +- Data reporting to Slack, Telegram or email. +- Data pipelining via proprietary API. +- Data export in JSON, CSV, SQL, HTML formats. +- Web-based interface. + +TABLUM.IO can be run as a self-hosted solution (as a docker image) or in the cloud. +License: [commercial](https://tablum.io/pricing) product with 3-month free period. + +Try it out for free [in the cloud](https://tablum.io/try). +Learn more about the product at [TABLUM.IO](https://tablum.io/) + [Original article](https://clickhouse.com/docs/en/interfaces/third-party/gui/) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 325cd1ff825..87c5a6f7aec 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -87,7 +87,7 @@ toc_title: Adopters | Kontur | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | | Kuaishou | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.com/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) | | KGK Global | Vehicle monitoring | — | — | — | [Press release, June 2021](https://zoom.cnews.ru/news/item/530921) | -| Lawrence Berkeley National Laboratory | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) | +| Lawrence Berkeley National Laboratory | Research | Traffic analysis | 5 servers | 55 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) | | LifeStreet | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | | Mail.ru Cloud Solutions | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | | MAXILECT | Ad Tech, Blockchain, ML, AI | — | — | — | [Job advertisement, 2021](https://www.linkedin.com/feed/update/urn:li:activity:6780842017229430784/) | @@ -178,5 +178,9 @@ toc_title: Adopters | Цифровой Рабочий | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) | | ООО «МПЗ Богородский» | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) | | ДомКлик | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) | +| Futurra Group | Analytics | — | — | — | [Article in Russian, December 2021](https://dou.ua/forums/topic/35587/) | +| UseTech | Software Development | — | — | — | [Job Posting, December 2021](https://vk.com/wall136266658_2418) | +| Lookforsale | E-Commerce | — | — | — | [Job Posting, December 2021](https://telegram.me/javascript_jobs/587318) | +| R-Vision | Information Security | — | — | — | [Article in Russian, December 2021](https://www.anti-malware.ru/reviews/R-Vision-SENSE-15) | [Original article](https://clickhouse.com/docs/en/introduction/adopters/) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index eb4673be18a..6738f77cff9 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -16,6 +16,11 @@ ZooKeeper is one of the first well-known open-source coordination systems. It's By default, ClickHouse Keeper provides the same guarantees as ZooKeeper (linearizable writes, non-linearizable reads). It has a compatible client-server protocol, so any standard ZooKeeper client can be used to interact with ClickHouse Keeper. Snapshots and logs have an incompatible format with ZooKeeper, but `clickhouse-keeper-converter` tool allows to convert ZooKeeper data to ClickHouse Keeper snapshot. Interserver protocol in ClickHouse Keeper is also incompatible with ZooKeeper so mixed ZooKeeper / ClickHouse Keeper cluster is impossible. +ClickHouse Keeper supports Access Control List (ACL) the same way as [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) does. ClickHouse Keeper supports the same set of permissions and has the identical built-in schemes: `world`, `auth`, `digest`, `host` and `ip`. Digest authentication scheme uses pair `username:password`. Password is encoded in Base64. + +!!! info "Note" + External integrations are not supported. + ## Configuration ClickHouse Keeper can be used as a standalone replacement for ZooKeeper or as an internal part of the ClickHouse server, but in both cases configuration is almost the same `.xml` file. The main ClickHouse Keeper configuration tag is ``. Keeper configuration has the following parameters: @@ -118,13 +123,13 @@ echo mntr | nc localhost 9181 Bellow is the detailed 4lw commands: -- ruok : Tests if server is running in a non-error state. The server will respond with imok if it is running. Otherwise it will not respond at all. A response of "imok" does not necessarily indicate that the server has joined the quorum, just that the server process is active and bound to the specified client port. Use "stat" for details on state wrt quorum and client connection information. +- `ruok`: Tests if server is running in a non-error state. The server will respond with imok if it is running. Otherwise it will not respond at all. A response of "imok" does not necessarily indicate that the server has joined the quorum, just that the server process is active and bound to the specified client port. Use "stat" for details on state wrt quorum and client connection information. ``` imok ``` -- mntr : Outputs a list of variables that could be used for monitoring the health of the cluster. +- `mntr`: Outputs a list of variables that could be used for monitoring the health of the cluster. ``` zk_version v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7 @@ -146,12 +151,11 @@ zk_followers 0 zk_synced_followers 0 ``` -- srvr : Lists full details for the server. +- `srvr`: Lists full details for the server. ``` ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7 Latency min/avg/max: 0/0/0 - Received: 2 Sent : 2 Connections: 1 @@ -161,16 +165,14 @@ Mode: leader Node count: 4 ``` -- stat : Lists brief details for the server and connected clients. +- `stat`: Lists brief details for the server and connected clients. ``` ClickHouse Keeper version: v21.11.1.1-prestable-7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7 Clients: 192.168.1.1:52852(recved=0,sent=0) 192.168.1.1:52042(recved=24,sent=48) - Latency min/avg/max: 0/0/0 - Received: 4 Sent : 4 Connections: 1 @@ -178,16 +180,15 @@ Outstanding: 0 Zxid: 36 Mode: leader Node count: 4 - ``` -- srst : Reset server statistics. The command will affect the result of `srvr`, `mntr` and `stat`. +- `srst`: Reset server statistics. The command will affect the result of `srvr`, `mntr` and `stat`. ``` Server stats reset. ``` -- conf : Print details about serving configuration. +- `conf`: Print details about serving configuration. ``` server_id=1 @@ -220,20 +221,20 @@ compress_snapshots_with_zstd_format=true configuration_change_tries_count=20 ``` -- cons : List full connection/session details for all clients connected to this server. Includes information on numbers of packets received/sent, session id, operation latencies, last operation performed, etc... +- `cons`: List full connection/session details for all clients connected to this server. Includes information on numbers of packets received/sent, session id, operation latencies, last operation performed, etc... ``` 192.168.1.1:52163(recved=0,sent=0,sid=0xffffffffffffffff,lop=NA,est=1636454787393,to=30000,lzxid=0xffffffffffffffff,lresp=0,llat=0,minlat=0,avglat=0,maxlat=0) 192.168.1.1:52042(recved=9,sent=18,sid=0x0000000000000001,lop=List,est=1636454739887,to=30000,lcxid=0x0000000000000005,lzxid=0x0000000000000005,lresp=1636454739892,llat=0,minlat=0,avglat=0,maxlat=0) ``` -- crst : Reset connection/session statistics for all connections. +- `crst`: Reset connection/session statistics for all connections. ``` Connection stats reset. ``` -- envi : Print details about serving environment +- `envi`: Print details about serving environment ``` Environment: @@ -250,41 +251,41 @@ user.tmp=/var/folders/b4/smbq5mfj7578f2jzwn602tt40000gn/T/ ``` -- dirs : Shows the total size of snapshot and log files in bytes +- `dirs`: Shows the total size of snapshot and log files in bytes ``` snapshot_dir_size: 0 log_dir_size: 3875 ``` -- isro: Tests if server is running in read-only mode. The server will respond with "ro" if in read-only mode or "rw" if not in read-only mode. +- `isro`: Tests if server is running in read-only mode. The server will respond with "ro" if in read-only mode or "rw" if not in read-only mode. ``` rw ``` -- wchs : Lists brief information on watches for the server. +- `wchs`: Lists brief information on watches for the server. ``` 1 connections watching 1 paths Total watches:1 ``` -- wchc : Lists detailed information on watches for the server, by session. This outputs a list of sessions(connections) with associated watches (paths). Note, depending on the number of watches this operation may be expensive (ie impact server performance), use it carefully. +- `wchc`: Lists detailed information on watches for the server, by session. This outputs a list of sessions (connections) with associated watches (paths). Note, depending on the number of watches this operation may be expensive (ie impact server performance), use it carefully. ``` 0x0000000000000001 /clickhouse/task_queue/ddl ``` -- wchp : Lists detailed information on watches for the server, by path. This outputs a list of paths (znodes) with associated sessions. Note, depending on the number of watches this operation may be expensive (ie impact server performance), use it carefully. +- `wchp`: Lists detailed information on watches for the server, by path. This outputs a list of paths (znodes) with associated sessions. Note, depending on the number of watches this operation may be expensive (i. e. impact server performance), use it carefully. ``` /clickhouse/task_queue/ddl 0x0000000000000001 ``` -- dump : Lists the outstanding sessions and ephemeral nodes. This only works on the leader. +- `dump`: Lists the outstanding sessions and ephemeral nodes. This only works on the leader. ``` Sessions dump (2): diff --git a/docs/en/operations/external-authenticators/kerberos.md b/docs/en/operations/external-authenticators/kerberos.md index 2e2a88dc7a8..da84c1f6a89 100644 --- a/docs/en/operations/external-authenticators/kerberos.md +++ b/docs/en/operations/external-authenticators/kerberos.md @@ -14,11 +14,11 @@ To enable Kerberos, one should include `kerberos` section in `config.xml`. This #### Parameters: - `principal` - canonical service principal name that will be acquired and used when accepting security contexts. - - This parameter is optional, if omitted, the default principal will be used. + - This parameter is optional, if omitted, the default principal will be used. - `realm` - a realm, that will be used to restrict authentication to only those requests whose initiator's realm matches it. - - This parameter is optional, if omitted, no additional filtering by realm will be applied. + - This parameter is optional, if omitted, no additional filtering by realm will be applied. Example (goes into `config.xml`): @@ -75,7 +75,7 @@ In order to enable Kerberos authentication for the user, specify `kerberos` sect Parameters: - `realm` - a realm that will be used to restrict authentication to only those requests whose initiator's realm matches it. - - This parameter is optional, if omitted, no additional filtering by realm will be applied. + - This parameter is optional, if omitted, no additional filtering by realm will be applied. Example (goes into `users.xml`): diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 8a38fbce083..e8099ef0ac6 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -435,26 +435,58 @@ Similar to `interserver_http_host`, except that this hostname can be used by oth ## interserver_http_credentials {#server-settings-interserver-http-credentials} -The username and password used to authenticate during [replication](../../engines/table-engines/mergetree-family/replication.md) with the Replicated\* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster. -By default, the authentication is not used. +A username and a password used to connect to other servers during [replication](../../engines/table-engines/mergetree-family/replication.md). Also the server authenticates other replicas using these credentials. So, `interserver_http_credentials` must be the same for all replicas in a cluster. + +By default, if `interserver_http_credentials` section is omitted, authentication is not used during replication. !!! note "Note" - These credentials are common for replication through `HTTP` and `HTTPS`. + `interserver_http_credentials` settings do not relate to a ClickHouse client credentials [configuration](../../interfaces/cli.md#configuration_files). -This section contains the following parameters: +!!! note "Note" + These credentials are common for replication via `HTTP` and `HTTPS`. -- `user` — username. -- `password` — password. +The section contains the following parameters: -**Example** +- `user` — Username. +- `password` — Password. +- `allow_empty` — If `true`, then other replicas are allowed to connect without authentication even if credentials are set. If `false`, then connections without authentication are refused. Default value: `false`. +- `old` — Contains old `user` and `password` used during credential rotation. Several `old` sections can be specified. + +**Credentials Rotation** + +ClickHouse supports dynamic interserver credentials rotation without stopping all replicas at the same time to update their configuration. Credentials can be changed in several steps. + +To enable authentication, set `interserver_http_credentials.allow_empty` to `true` and add credentials. This allows connections with authentication and without it. + +``` xml + + admin + 111 + true + +``` + +After configuring all replicas set `allow_empty` to `false` or remove this setting. It makes authentication with new credentials mandatory. + +To change existing credentials, move the username and the password to `interserver_http_credentials.old` section and update `user` and `password` with new values. At this point the server uses new credentials to connect to other replicas and accepts connections with either new or old credentials. ``` xml admin 222 + + admin + 111 + + + temp + 000 + ``` +When new credentials are applied to all replicas, old credentials may be removed. + ## keep_alive_timeout {#keep-alive-timeout} The number of seconds that ClickHouse waits for incoming requests before closing the connection. Defaults to 10 seconds. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e56625fe948..510047f4353 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -817,9 +817,19 @@ If the number of rows to be read from a file of a [MergeTree](../../engines/tabl Possible values: -- Any positive integer. +- Positive integer. -Default value: 163840. +Default value: `163840`. + +## merge_tree_min_rows_for_concurrent_read_for_remote_filesystem {#merge-tree-min-rows-for-concurrent-read-for-remote-filesystem} + +The minimum number of lines to read from one file before [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem. + +Possible values: + +- Positive integer. + +Default value: `163840`. ## merge_tree_min_bytes_for_concurrent_read {#setting-merge-tree-min-bytes-for-concurrent-read} @@ -827,9 +837,19 @@ If the number of bytes to read from one file of a [MergeTree](../../engines/tabl Possible value: -- Any positive integer. +- Positive integer. -Default value: 251658240. +Default value: `251658240`. + +## merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem {#merge-tree-min-bytes-for-concurrent-read-for-remote-filesystem} + +The minimum number of bytes to read from one file before [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem. + +Possible values: + +- Positive integer. + +Default value: `251658240`. ## merge_tree_min_rows_for_seek {#setting-merge-tree-min-rows-for-seek} @@ -1469,7 +1489,7 @@ Possible values: Default value: `1`. -**See Also** +**See Also** - [min_count_to_compile_aggregate_expression](#min_count_to_compile_aggregate_expression) @@ -2095,7 +2115,7 @@ Possible values: - 0 — Optimization disabled. - 1 — Optimization enabled. - + Default value: `1`. See also: @@ -3134,6 +3154,12 @@ Possible values: Default value: `0`. +!!! warning "Warning" + Nullable primary key usually indicates bad design. It is forbidden in almost all main stream DBMS. The feature is mainly for [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) and is not heavily tested. Use with care. + +!!! warning "Warning" + Do not enable this feature in version `<= 21.8`. It's not properly implemented and may lead to server crash. + ## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty} Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility. @@ -3682,49 +3708,6 @@ Possible values: Default value: `0`. -## materialized_postgresql_max_block_size {#materialized-postgresql-max-block-size} - -Sets the number of rows collected in memory before flushing data into PostgreSQL database table. - -Possible values: - -- Positive integer. - -Default value: `65536`. - -## materialized_postgresql_tables_list {#materialized-postgresql-tables-list} - -Sets a comma-separated list of PostgreSQL database tables, which will be replicated via [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) database engine. - -Default value: empty list — means whole PostgreSQL database will be replicated. - -## materialized_postgresql_schema {#materialized-postgresql-schema} - -Default value: empty string. (Default schema is used) - -## materialized_postgresql_schema_list {#materialized-postgresql-schema-list} - -Default value: empty list. (Default schema is used) - -## materialized_postgresql_allow_automatic_update {#materialized-postgresql-allow-automatic-update} - -Allows reloading table in the background, when schema changes are detected. DDL queries on the PostgreSQL side are not replicated via ClickHouse [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) engine, because it is not allowed with PostgreSQL logical replication protocol, but the fact of DDL changes is detected transactionally. In this case, the default behaviour is to stop replicating those tables once DDL is detected. However, if this setting is enabled, then, instead of stopping the replication of those tables, they will be reloaded in the background via database snapshot without data losses and replication will continue for them. - -Possible values: - -- 0 — The table is not automatically updated in the background, when schema changes are detected. -- 1 — The table is automatically updated in the background, when schema changes are detected. - -Default value: `0`. - -## materialized_postgresql_replication_slot {#materialized-postgresql-replication-slot} - -A user-created replication slot. Must be used together with [materialized_postgresql_snapshot](#materialized-postgresql-snapshot). - -## materialized_postgresql_snapshot {#materialized-postgresql-snapshot} - -A text string identifying a snapshot, from which [initial dump of PostgreSQL tables](../../engines/database-engines/materialized-postgresql.md) will be performed. Must be used together with [materialized_postgresql_replication_slot](#materialized-postgresql-replication-slot). - ## allow_experimental_projection_optimization {#allow-experimental-projection-optimization} Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md#projections) optimization when processing `SELECT` queries. @@ -3993,8 +3976,8 @@ If [wait_for_async_insert](#wait-for-async-insert) is enabled, every client will Possible values: -- 0 — Insertions are made synchronously, one after another. -- 1 — Multiple asynchronous insertions enabled. +- 0 — Insertions are made synchronously, one after another. +- 1 — Multiple asynchronous insertions enabled. Default value: `0`. @@ -4066,7 +4049,7 @@ Default value: `0`. ## alter_partition_verbose_result {#alter-partition-verbose-result} -Enables or disables the display of information about the parts to which the manipulation operations with partitions and parts have been successfully applied. +Enables or disables the display of information about the parts to which the manipulation operations with partitions and parts have been successfully applied. Applicable to [ATTACH PARTITION|PART](../../sql-reference/statements/alter/partition.md#alter_attach-partition) and to [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md#alter_freeze-partition). Possible values: diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index 5ba38ab3e67..55e4a8284a0 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -6,7 +6,7 @@ You can use this table to get information similar to the [DESCRIBE TABLE](../../ Columns from [temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.columns` only in those session where they have been created. They are shown with the empty `database` field. -Columns: +The `system.columns` table contains the following columns (the column type is shown in brackets): - `database` ([String](../../sql-reference/data-types/string.md)) — Database name. - `table` ([String](../../sql-reference/data-types/string.md)) — Table name. @@ -86,21 +86,4 @@ numeric_scale: ᴺᵁᴸᴸ datetime_precision: ᴺᵁᴸᴸ ``` -The `system.columns` table contains the following columns (the column type is shown in brackets): - -- `database` (String) — Database name. -- `table` (String) — Table name. -- `name` (String) — Column name. -- `type` (String) — Column type. -- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. -- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined. -- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes. -- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes. -- `marks_bytes` (UInt64) — The size of marks, in bytes. -- `comment` (String) — Comment on the column, or an empty string if it is not defined. -- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression. -- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression. -- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. -- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. - [Original article](https://clickhouse.com/docs/en/operations/system-tables/columns) diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 551c63d1aa3..21e5923e3a0 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -35,7 +35,7 @@ SELECT * FROM system.metrics LIMIT 10 - [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. - [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred. -- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. [Original article](https://clickhouse.com/docs/en/operations/system-tables/metrics) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md index 598af24c0de..33bfe72548b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md @@ -24,9 +24,7 @@ Function: - Calculates a hash for all parameters in the aggregate, then uses it in calculations. -- Uses an adaptive sampling algorithm. For the calculation state, the function uses a sample of element hash values up to 65536. - - This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions. +- Uses an adaptive sampling algorithm. For the calculation state, the function uses a sample of element hash values up to 65536. This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions. - Provides the result deterministically (it does not depend on the query processing order). diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md index 81945eeece6..e483a20eed9 100644 --- a/docs/en/sql-reference/data-types/aggregatefunction.md +++ b/docs/en/sql-reference/data-types/aggregatefunction.md @@ -11,9 +11,7 @@ Aggregate functions can have an implementation-defined intermediate state that c **Parameters** -- Name of the aggregate function. - - If the function is parametric, specify its parameters too. +- Name of the aggregate function. If the function is parametric, specify its parameters too. - Types of the aggregate function arguments. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index 5fedd5cf8ad..b49f384367d 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -41,7 +41,7 @@ Example of a polygon dictionary configuration: ``` -Tne corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query): +The corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query): ``` sql CREATE DICTIONARY polygon_dict_name ( key Array(Array(Array(Array(Float64)))), diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index 095f059513c..e606a19af6f 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -122,7 +122,12 @@ Setting fields: - `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`). - `format` — The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported. +- `command_termination_timeout` — executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter. +- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. +- `command_read_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. - `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. +- `execute_direct` - Executable source file will be searched inside `user_scripts` folder and executed directly. Arguments are passed using spaces. Example: `test_script arg_1 arg_2`. Default value is false. Optional parameter. +- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node. @@ -150,10 +155,14 @@ Setting fields: - `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`). - `format` — The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported. -- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. -- `command_termination_timeout` — Executable pool script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter. +- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`. +- `command_termination_timeout` — executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter. - `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. Optional parameter. +- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. +- `command_read_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. - `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. Optional parameter. +- `execute_direct` - Executable source file will be searched inside `user_scripts` folder and executed directly. Additional arguments can be specified. Example: `test_script arg_1 arg_2`. Default value is false. Optional parameter. +- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node. diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md index e86e6b37998..037078ba223 100644 --- a/docs/en/sql-reference/functions/index.md +++ b/docs/en/sql-reference/functions/index.md @@ -73,16 +73,19 @@ User defined function configurations are searched relative to the path specified A function configuration contains the following settings: - `name` - a function name. -- `command` - a command or a script to execute. +- `command` - script name to execute or command if `execute_direct` is false. - `argument` - argument description with the `type` of an argument. Each argument is described in a separate setting. - `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command. - `return_type` - the type of a returned value. - `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created. - `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`. -- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`. +- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`. +- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. +- `command_read_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. - `pool_size` - the size of a command pool. Optional. Default value is `16`. -- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. - `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. +- `execute_direct` - Executable source file will be searched inside `user_scripts` folder and executed directly. Additional arguments can be specified. Example: `test_script arg_1 arg_2`. Default value is true. Optional parameter. +- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter. The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk. @@ -102,7 +105,6 @@ Creating `test_function` using XML configuration: TabSeparated cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table" - 0 ``` diff --git a/docs/en/sql-reference/statements/create/role.md b/docs/en/sql-reference/statements/create/role.md index 4723613aeef..e0e58f7a0f6 100644 --- a/docs/en/sql-reference/statements/create/role.md +++ b/docs/en/sql-reference/statements/create/role.md @@ -31,7 +31,7 @@ CREATE ROLE accountant; GRANT SELECT ON db.* TO accountant; ``` -This sequence of queries creates the role `accountant` that has the privilege of reading data from the `accounting` database. +This sequence of queries creates the role `accountant` that has the privilege of reading data from the `db` database. Assigning the role to the user `mira`: diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index f04952746a6..2b1262f7d3c 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -21,7 +21,7 @@ GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.ta - `user` — ClickHouse user account. The `WITH GRANT OPTION` clause grants `user` or `role` with permission to execute the `GRANT` query. Users can grant privileges of the same scope they have and less. -The `WITH REPLACE OPTION` clause replace old privileges by new privileges for the `user` or `role`, if not specified it is append privileges. +The `WITH REPLACE OPTION` clause replace old privileges by new privileges for the `user` or `role`, if is not specified it appends privileges. ## Assigning Role Syntax {#assign-role-syntax} @@ -33,7 +33,7 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US - `user` — ClickHouse user account. The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`. -The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if not specified it is append roles. +The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles. ## Usage {#grant-usage} diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index aa61348d2a0..3d302be561a 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -55,13 +55,13 @@ The behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_ - [join_on_disk_max_files_to_merge](../../../operations/settings/settings.md#join_on_disk_max_files_to_merge) - [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) -## ON Section Conditions {on-section-conditions} +## ON Section Conditions {#on-section-conditions} + +An `ON` section can contain several conditions combined using the `AND` and `OR` operators. Conditions specifying join keys must refer both left and right tables and must use the equality operator. Other conditions may use other logical operators but they must refer either the left or the right table of a query. -An `ON` section can contain several conditions combined using the `AND` operator. Conditions specifying join keys must refer both left and right tables and must use the equality operator. Other conditions may use other logical operators but they must refer either the left or the right table of a query. Rows are joined if the whole complex condition is met. If the conditions are not met, still rows may be included in the result depending on the `JOIN` type. Note that if the same conditions are placed in a `WHERE` section and they are not met, then rows are always filtered out from the result. -!!! note "Note" - The `OR` operator inside an `ON` section is not supported yet. +The `OR` operator inside the `ON` clause works using the hash join algorithm — for each `OR` argument with join keys for `JOIN`, a separate hash table is created, so memory consumption and query execution time grow linearly with an increase in the number of expressions `OR` of the `ON` clause. !!! note "Note" If a condition refers columns from different tables, then only the equality operator (`=`) is supported so far. @@ -109,7 +109,47 @@ Result: │ B │ Text B │ 15 │ └──────┴────────┴────────┘ ``` +Query with `INNER` type of a join and condition with `OR`: +``` sql +CREATE TABLE t1 (`a` Int64, `b` Int64) ENGINE = MergeTree() ORDER BY a; + +CREATE TABLE t2 (`key` Int32, `val` Int64) ENGINE = MergeTree() ORDER BY key; + +INSERT INTO t1 SELECT number as a, -a as b from numbers(5); + +INSERT INTO t2 SELECT if(number % 2 == 0, toInt64(number), -number) as key, number as val from numbers(5); + +SELECT a, b, val FROM t1 INNER JOIN t2 ON t1.a = t2.key OR t1.b = t2.key; +``` + +Result: + +``` +┌─a─┬──b─┬─val─┐ +│ 0 │ 0 │ 0 │ +│ 1 │ -1 │ 1 │ +│ 2 │ -2 │ 2 │ +│ 3 │ -3 │ 3 │ +│ 4 │ -4 │ 4 │ +└───┴────┴─────┘ +``` + +Query with `INNER` type of a join and conditions with `OR` and `AND`: + +``` sql +SELECT a, b, val FROM t1 INNER JOIN t2 ON t1.a = t2.key OR t1.b = t2.key AND t2.val > 3; +``` + +Result: + +``` +┌─a─┬──b─┬─val─┐ +│ 0 │ 0 │ 0 │ +│ 2 │ -2 │ 2 │ +│ 4 │ -4 │ 4 │ +└───┴────┴─────┘ +``` ## ASOF JOIN Usage {#asof-join-usage} `ASOF JOIN` is useful when you need to join records that have no exact match. diff --git a/docs/en/sql-reference/table-functions/hdfsCluster.md b/docs/en/sql-reference/table-functions/hdfsCluster.md new file mode 100644 index 00000000000..6183fe83c38 --- /dev/null +++ b/docs/en/sql-reference/table-functions/hdfsCluster.md @@ -0,0 +1,58 @@ +--- +toc_priority: 55 +toc_title: hdfsCluster +--- + +# hdfsCluster Table Function {#hdfsCluster-table-function} + +Allows processing files from HDFS in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in HDFS file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. + +**Syntax** + +``` sql +hdfsCluster(cluster_name, URI, format, structure) +``` + +**Arguments** + +- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. + +**Returned value** + +A table with the specified structure for reading data in the specified file. + +**Examples** + +1. Suppose that we have a ClickHouse cluster named `cluster_simple`, and several files with following URIs on HDFS: + +- ‘hdfs://hdfs1:9000/some_dir/some_file_1’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_2’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_3’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_1’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_2’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_3’ + +2. Query the amount of rows in these files: + +``` sql +SELECT count(*) +FROM hdfsCluster('cluster_simple', 'hdfs://hdfs1:9000/{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32') +``` + +3. Query the amount of rows in all files of these two directories: + +``` sql +SELECT count(*) +FROM hdfsCluster('cluster_simple', 'hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value UInt32') +``` + +!!! warning "Warning" + If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. + +**See Also** + +- [HDFS engine](../../engines/table-engines/integrations/hdfs.md) +- [HDFS table function](../../sql-reference/table-functions/hdfs.md) diff --git a/docs/ja/faq/general/how-do-i-contribute-code-to-clickhouse.md b/docs/ja/faq/general/how-do-i-contribute-code-to-clickhouse.md new file mode 120000 index 00000000000..5ac9a615386 --- /dev/null +++ b/docs/ja/faq/general/how-do-i-contribute-code-to-clickhouse.md @@ -0,0 +1 @@ +../../../en/faq/general/how-do-i-contribute-code-to-clickhouse.md \ No newline at end of file diff --git a/docs/ja/faq/operations/multi-region-replication.md b/docs/ja/faq/operations/multi-region-replication.md new file mode 120000 index 00000000000..dbc985ee1fb --- /dev/null +++ b/docs/ja/faq/operations/multi-region-replication.md @@ -0,0 +1 @@ +../../../en/faq/operations/multi-region-replication.md \ No newline at end of file diff --git a/docs/ja/getting-started/example-datasets/ontime.md b/docs/ja/getting-started/example-datasets/ontime.md index 2a951af6026..33314faa53d 100644 --- a/docs/ja/getting-started/example-datasets/ontime.md +++ b/docs/ja/getting-started/example-datasets/ontime.md @@ -15,13 +15,7 @@ toc_title: OnTime データのダウンロード: ``` bash -for s in `seq 1987 2018` -do -for m in `seq 1 12` -do -wget https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_${s}_${m}.zip -done -done +wget --no-check-certificate --continue https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_{1987..2021}_{1..12}.zip ``` (https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh より) @@ -40,7 +34,7 @@ CREATE TABLE `ontime` `Reporting_Airline` String, `DOT_ID_Reporting_Airline` Int32, `IATA_CODE_Reporting_Airline` String, - `Tail_Number` Int32, + `Tail_Number` String, `Flight_Number_Reporting_Airline` String, `OriginAirportID` Int32, `OriginAirportSeqID` Int32, diff --git a/docs/ja/interfaces/http.md b/docs/ja/interfaces/http.md index 4ac9cd9e472..210e3f46d24 100644 --- a/docs/ja/interfaces/http.md +++ b/docs/ja/interfaces/http.md @@ -397,7 +397,7 @@ $ curl -v 'http://localhost:8123/predefined_query' `` 値は以下の定義済みクエリです `` これは、Http要求が一致し、クエリの結果が返されたときにClickHouseによって実行されます。 これは必須構成です。 -次の例では、次の値を定義します `max_threads` と `max_alter_threads` 設定、そしてクエリのテーブルから設定設定します。 +次の例では、次の値を定義します `max_threads` と `max_final_threads` 設定、そしてクエリのテーブルから設定設定します。 例: @@ -420,9 +420,9 @@ $ curl -v 'http://localhost:8123/predefined_query' ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2' +$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2' 1 -max_alter_threads 2 +max_final_threads 2 ``` !!! note "注意" @@ -434,7 +434,7 @@ max_alter_threads 2 クリックハウスは、 `` HTTP要求のurlの値。 のデフォルト値 `` は `/query` . これはオプションの構成です。 設定ファイルに定義がない場合、paramは渡されません。 -この機能を試すために、この例ではmax_threadsとmax_alter_threadsの値を定義し、設定が正常に設定されたかどうかを照会します。 +この機能を試すために、この例ではmax_threadsとmax_final_threadsの値を定義し、設定が正常に設定されたかどうかを照会します。 例: @@ -452,9 +452,9 @@ max_alter_threads 2 ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2¶m_name_1=max_threads¶m_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D' +$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2¶m_name_1=max_threads¶m_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D' max_threads 1 -max_alter_threads 2 +max_final_threads 2 ``` ## 静的 {#static} diff --git a/docs/ru/engines/table-engines/integrations/hdfs.md b/docs/ru/engines/table-engines/integrations/hdfs.md index 5949cc8a0d7..78a82955cd2 100644 --- a/docs/ru/engines/table-engines/integrations/hdfs.md +++ b/docs/ru/engines/table-engines/integrations/hdfs.md @@ -5,7 +5,7 @@ toc_title: HDFS # HDFS {#table_engines-hdfs} -Управляет данными в HDFS. Данный движок похож на движки [File](../special/file.md#table_engines-file) и [URL](../special/url.md#table_engines-url). +Этот движок обеспечивает интеграцию с экосистемой [Apache Hadoop](https://ru.wikipedia.org/wiki/Hadoop), позволяя управлять данными в HDFS посредством ClickHouse. Данный движок похож на движки [File](../special/file.md#table_engines-file) и [URL](../special/url.md#table_engines-url), но предоставляет возможности, характерные для Hadoop. ## Использование движка {#usage} @@ -13,9 +13,11 @@ toc_title: HDFS ENGINE = HDFS(URI, format) ``` -В параметр `URI` нужно передавать полный URI файла в HDFS. +**Параметры движка** + +В параметр `URI` нужно передавать полный URI файла в HDFS. Часть URI с путем файла может содержать шаблоны. В этом случае таблица может использоваться только для чтения. Параметр `format` должен быть таким, который ClickHouse может использовать и в запросах `INSERT`, и в запросах `SELECT`. Полный список поддерживаемых форматов смотрите в разделе [Форматы](../../../interfaces/formats.md#formats). -Часть URI с путем файла может содержать шаблоны. В этом случае таблица может использоваться только для чтения. + **Пример:** @@ -67,12 +69,12 @@ SELECT * FROM hdfs_engine_table LIMIT 2 1. Предположим, у нас есть несколько файлов со следующими URI в HDFS: -- 'hdfs://hdfs1:9000/some_dir/some_file_1' -- 'hdfs://hdfs1:9000/some_dir/some_file_2' -- 'hdfs://hdfs1:9000/some_dir/some_file_3' -- 'hdfs://hdfs1:9000/another_dir/some_file_1' -- 'hdfs://hdfs1:9000/another_dir/some_file_2' -- 'hdfs://hdfs1:9000/another_dir/some_file_3' + - 'hdfs://hdfs1:9000/some_dir/some_file_1' + - 'hdfs://hdfs1:9000/some_dir/some_file_2' + - 'hdfs://hdfs1:9000/some_dir/some_file_3' + - 'hdfs://hdfs1:9000/another_dir/some_file_1' + - 'hdfs://hdfs1:9000/another_dir/some_file_2' + - 'hdfs://hdfs1:9000/another_dir/some_file_3' 1. Есть несколько возможностей создать таблицу, состояющую из этих шести файлов: @@ -128,6 +130,7 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9 | **параметр** | **по умолчанию** | +| - | - | | rpc\_client\_connect\_tcpnodelay | true | | dfs\_client\_read\_shortcircuit | true | | output\_replace-datanode-on-failure | true | @@ -177,22 +180,23 @@ CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9 #### Расширенные параметры для ClickHouse {#clickhouse-extras} | **параметр** | **по умолчанию** | +| - | - | |hadoop\_kerberos\_keytab | "" | |hadoop\_kerberos\_principal | "" | |hadoop\_kerberos\_kinit\_command | kinit | ### Ограничения {#limitations} - * hadoop\_security\_kerberos\_ticket\_cache\_path могут быть определены только на глобальном уровне + * `hadoop_security_kerberos_ticket_cache_path` и `libhdfs3_conf` могут быть определены только на глобальном, а не на пользовательском уровне ## Поддержка Kerberos {#kerberos-support} -Если hadoop\_security\_authentication параметр имеет значение 'kerberos', ClickHouse аутентифицируется с помощью Kerberos. -[Расширенные параметры](#clickhouse-extras) и hadoop\_security\_kerberos\_ticket\_cache\_path помогают сделать это. +Если параметр `hadoop_security_authentication` имеет значение `kerberos`, ClickHouse аутентифицируется с помощью Kerberos. +[Расширенные параметры](#clickhouse-extras) и `hadoop_security_kerberos_ticket_cache_path` помогают сделать это. Обратите внимание что из-за ограничений libhdfs3 поддерживается только устаревший метод аутентификации, -коммуникация с узлами данных не защищена SASL (HADOOP\_SECURE\_DN\_USER надежный показатель такого -подхода к безопасности). Используйте tests/integration/test\_storage\_kerberized\_hdfs/hdfs_configs/bootstrap.sh для примера настроек. +коммуникация с узлами данных не защищена SASL (`HADOOP_SECURE_DN_USER` надежный показатель такого +подхода к безопасности). Используйте `tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh` для примера настроек. -Если hadoop\_kerberos\_keytab, hadoop\_kerberos\_principal или hadoop\_kerberos\_kinit\_command указаны в настройках, kinit будет вызван. hadoop\_kerberos\_keytab и hadoop\_kerberos\_principal обязательны в этом случае. Необходимо также будет установить kinit и файлы конфигурации krb5. +Если `hadoop_kerberos_keytab`, `hadoop_kerberos_principal` или `hadoop_kerberos_kinit_command` указаны в настройках, `kinit` будет вызван. `hadoop_kerberos_keytab` и `hadoop_kerberos_principal` обязательны в этом случае. Необходимо также будет установить `kinit` и файлы конфигурации krb5. ## Виртуальные столбцы {#virtual-columns} diff --git a/docs/ru/engines/table-engines/integrations/kafka.md b/docs/ru/engines/table-engines/integrations/kafka.md index 19e2850dd51..7ea3d124ab3 100644 --- a/docs/ru/engines/table-engines/integrations/kafka.md +++ b/docs/ru/engines/table-engines/integrations/kafka.md @@ -191,5 +191,5 @@ ClickHouse может поддерживать учетные данные Kerbe **Смотрите также** - [Виртуальные столбцы](index.md#table_engines-virtual_columns) -- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) +- [background_message_broker_schedule_pool_size](../../../operations/settings/settings.md#background_message_broker_schedule_pool_size) diff --git a/docs/ru/faq/general/how-do-i-contribute-code-to-clickhouse.md b/docs/ru/faq/general/how-do-i-contribute-code-to-clickhouse.md new file mode 120000 index 00000000000..5ac9a615386 --- /dev/null +++ b/docs/ru/faq/general/how-do-i-contribute-code-to-clickhouse.md @@ -0,0 +1 @@ +../../../en/faq/general/how-do-i-contribute-code-to-clickhouse.md \ No newline at end of file diff --git a/docs/ru/faq/operations/multi-region-replication.md b/docs/ru/faq/operations/multi-region-replication.md new file mode 120000 index 00000000000..dbc985ee1fb --- /dev/null +++ b/docs/ru/faq/operations/multi-region-replication.md @@ -0,0 +1 @@ +../../../en/faq/operations/multi-region-replication.md \ No newline at end of file diff --git a/docs/ru/getting-started/example-datasets/ontime.md b/docs/ru/getting-started/example-datasets/ontime.md index e1d47a5a9e7..2ee4315c76f 100644 --- a/docs/ru/getting-started/example-datasets/ontime.md +++ b/docs/ru/getting-started/example-datasets/ontime.md @@ -15,13 +15,7 @@ toc_title: OnTime Скачивание данных (из `https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh`): ``` bash -for s in `seq 1987 2018` -do -for m in `seq 1 12` -do -wget https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_${s}_${m}.zip -done -done +wget --no-check-certificate --continue https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_{1987..2021}_{1..12}.zip ``` Создание таблицы: @@ -38,7 +32,7 @@ CREATE TABLE `ontime` `Reporting_Airline` String, `DOT_ID_Reporting_Airline` Int32, `IATA_CODE_Reporting_Airline` String, - `Tail_Number` Int32, + `Tail_Number` String, `Flight_Number_Reporting_Airline` String, `OriginAirportID` Int32, `OriginAirportSeqID` Int32, diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index bbb66b70371..e2ca1a86284 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -114,7 +114,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe Параметры в конфигурационных файлах переопределяют значения по умолчанию. -### Параметры командной строки {#parametry-komandnoi-stroki} +### Параметры командной строки {#command-line-options} - `--host, -h` — имя сервера, по умолчанию — ‘localhost’. Вы можете использовать как имя, так и IPv4 или IPv6 адрес. - `--port` — порт для подключения, по умолчанию — 9000. Обратите внимание: для HTTP-интерфейса и нативного интерфейса используются разные порты. @@ -136,7 +136,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe Начиная с версии 20.5, в `clickhouse-client` есть автоматическая подсветка синтаксиса (включена всегда). -### Конфигурационные файлы {#konfiguratsionnye-faily} +### Конфигурационные файлы {#configuration_files} `clickhouse—client` использует первый существующий файл из: diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 79d760271f5..a384776e519 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -129,6 +129,9 @@ world Каждый элемент структуры типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) представляется как отдельный массив. +Входящие параметры типа "перечисление" (`ENUM`) могут передаваться в виде значений или порядковых номеров. Сначала переданное значение будет сопоставляться с элементами перечисления. Если совпадение не будет найдено и при этом переданное значение является числом, оно будет трактоваться как порядковый номер в перечислении. +Если входящие параметры типа `ENUM` содержат только порядковые номера, рекомендуется включить настройку [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) для ускорения парсинга. + Например: ``` sql @@ -362,6 +365,9 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR Если установлена настройка [input_format_defaults_for_omitted_fields = 1](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) и тип столбца не `Nullable(T)`, то пустые значения без кавычек заменяются значениями по умолчанию для типа данных столбца. +Входящие параметры типа "перечисление" (`ENUM`) могут передаваться в виде значений или порядковых номеров. Сначала переданное значение будет сопоставляться с элементами перечисления. Если совпадение не будет найдено и при этом переданное значение является числом, оно будет трактоваться как порядковый номер в перечислении. +Если входящие параметры типа `ENUM` содержат только порядковые номера, рекомендуется включить настройку [input_format_tsv_enum_as_number](../operations/settings/settings.md#settings-input_format_tsv_enum_as_number) для ускорения парсинга. + Формат CSV поддерживает вывод totals и extremes аналогично `TabSeparated`. ## CSVWithNames {#csvwithnames} @@ -693,7 +699,7 @@ CREATE TABLE IF NOT EXISTS example_table - Если `input_format_defaults_for_omitted_fields = 1`, то значение по умолчанию для `x` равно `0`, а значение по умолчанию `a` равно `x * 2`. !!! note "Предупреждение" - Если `input_format_defaults_for_omitted_fields = 1`, то при обработке запросов ClickHouse потребляет больше вычислительных ресурсов, чем если `input_format_defaults_for_omitted_fields = 0`. + При добавлении данных с помощью `input_format_defaults_for_omitted_fields = 1`, ClickHouse потребляет больше вычислительных ресурсов по сравнению с `input_format_defaults_for_omitted_fields = 0`. ### Выборка данных {#vyborka-dannykh} diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index 6d94a43ff15..8687201e1c9 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -422,7 +422,10 @@ $ curl -v 'http://localhost:8123/predefined_query' Значение `query` — это предопределенный запрос `predefined_query_handler`, который выполняется ClickHouse при совпадении HTTP-запроса и возврате результата запроса. Это обязательная настройка. -В следующем примере определяются настройки [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_alter_threads`, а затем запрашивается системная таблица, чтобы проверить, были ли эти параметры успешно установлены. +В следующем примере определяются настройки [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_final_threads`, а затем запрашивается системная таблица, чтобы проверить, были ли эти параметры успешно установлены. + +!!! note "Предупреждение" + Чтобы сохранить стандартные `handlers` такие как `query`, `play`, `ping`, используйте правило ``. Пример: @@ -441,13 +444,14 @@ $ curl -v 'http://localhost:8123/predefined_query' SELECT name, value FROM system.settings WHERE name = {name_2:String} + ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2' +$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2' 1 -max_alter_threads 2 +max_final_threads 2 ``` !!! note "Предупреждение" @@ -459,7 +463,7 @@ max_alter_threads 2 ClickHouse извлекает и выполняет значение, соответствующее значению `query_param_name` URL-адресе HTTP-запроса. Значение по умолчанию `query_param_name` — это `/query` . Это необязательная настройка. Если в файле конфигурации нет определения, параметр не передается. -Чтобы поэкспериментировать с этой функциональностью, в примере определяются значения [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_alter_threads` и запрашивается, успешно ли были установлены настройки. +Чтобы поэкспериментировать с этой функциональностью, в примере определяются значения [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_final_threads` и запрашивается, успешно ли были установлены настройки. Пример: @@ -473,13 +477,14 @@ ClickHouse извлекает и выполняет значение, соотв query_param + ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2¶m_name_1=max_threads¶m_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D' +$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2¶m_name_1=max_threads¶m_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D' max_threads 1 -max_alter_threads 2 +max_final_threads 2 ``` ### static {#static} @@ -503,6 +508,7 @@ max_alter_threads 2 Say Hi! + ``` diff --git a/docs/ru/interfaces/third-party/gui.md b/docs/ru/interfaces/third-party/gui.md index b80aaf7948e..201e92f994e 100644 --- a/docs/ru/interfaces/third-party/gui.md +++ b/docs/ru/interfaces/third-party/gui.md @@ -227,4 +227,25 @@ SeekTable [бесплатен](https://www.seektable.com/help/cloud-pricing) д [Chadmin](https://github.com/bun4uk/chadmin) — простой графический интерфейс для визуализации запущенных запросов на вашем кластере ClickHouse. Он отображает информацию о запросах и дает возможность их завершать. +### TABLUM.IO {#tablum_io} + +[TABLUM.IO](https://tablum.io/) — онлайн инструмент для загрузки и визуализации данных. Позволяет подключаться к БД ClickHouse, работать с базами и таблицами через многофункциональную SQL консоль, загружать данные из таблиц, объединять их с данными из других источников (файлов, сторонних сервисов) и визуализировать результаты в виде таблиц и графиков. + +Основные возможности: +- Многофункциональный ETL: загрузка данных из популярных баз данных, локальных и удаленных файлов, загрузка результатов вызова REST API. +- Универсальная SQL консоль с подсветкой синтаксиса и визуальным генератором SQL запросов. +- Визуализация загруженных данных в виде графиков и таблиц. +- Материализация данных и подзапросы к загруженным данным. +- Отправка результатов визуализации в Slack, Telegram или на email. +- Организация потоков данных (data pipeline) через собственный API. +- Экспорт данных в форматах JSON, CSV, SQL, HTML. +- Веб-интерфейс. + +Поддерживается установка TABLUM.IO на собственный сервер (в виде Docker образа) или работа с сервисом в облаке. +Лицензия: [коммерческий](https://tablum.io/pricing) продукт с периодом бесплатного тестирования 3 месяца. + +Протестировать TABLUM.IO без разворачивания на собственном сервере можно [здесь](https://tablum.io/try). +Подробно о продукте смотрите на [TABLUM.IO](https://tablum.io/) + + [Original article](https://clickhouse.com/docs/en/interfaces/third-party/gui/) diff --git a/docs/ru/operations/clickhouse-keeper.md b/docs/ru/operations/clickhouse-keeper.md index 14d95ebae68..9d6c4799008 100644 --- a/docs/ru/operations/clickhouse-keeper.md +++ b/docs/ru/operations/clickhouse-keeper.md @@ -16,12 +16,17 @@ ZooKeeper — один из первых широко известных сер По умолчанию ClickHouse Keeper предоставляет те же гарантии, что и ZooKeeper (линеаризуемость записей, последовательная согласованность чтений). У него есть совместимый клиент-серверный протокол, поэтому любой стандартный клиент ZooKeeper может использоваться для взаимодействия с ClickHouse Keeper. Снэпшоты и журналы имеют несовместимый с ZooKeeper формат, однако можно конвертировать данные Zookeeper в снэпшот ClickHouse Keeper с помощью `clickhouse-keeper-converter`. Межсерверный протокол ClickHouse Keeper также несовместим с ZooKeeper, поэтому создание смешанного кластера ZooKeeper / ClickHouse Keeper невозможно. +Система управления доступом (ACL) ClickHouse Keeper реализована так же, как в [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl). ClickHouse Keeper поддерживает тот же набор разрешений и идентичные схемы: `world`, `auth`, `digest`, `host` и `ip`. Digest для аутентификации использует пару значений `username:password`. Пароль кодируется в Base64. + +!!! info "Примечание" + Внешние интеграции не поддерживаются. + ## Конфигурация ClickHouse Keeper может использоваться как равноценная замена ZooKeeper или как внутренняя часть сервера ClickHouse, но в обоих случаях конфигурация представлена файлом `.xml`. Главный тег конфигурации ClickHouse Keeper — это ``. Параметры конфигурации: - `tcp_port` — порт для подключения клиента (по умолчанию для ZooKeeper: `2181`). -- `tcp_port_secure` — зашифрованный порт для подключения клиента. +- `tcp_port_secure` — зашифрованный порт для SSL-соединения между клиентом и сервером сервиса. - `server_id` — уникальный идентификатор сервера, каждый участник кластера должен иметь уникальный номер (1, 2, 3 и т. д.). - `log_storage_path` — путь к журналам координации, лучше хранить их на незанятом устройстве (актуально и для ZooKeeper). - `snapshot_storage_path` — путь к снэпшотам координации. @@ -50,7 +55,11 @@ ClickHouse Keeper может использоваться как равноце - `shutdown_timeout` — время ожидания завершения внутренних подключений и выключения, в миллисекундах (по умолчанию: 5000). - `startup_timeout` — время отключения сервера, если он не подключается к другим участникам кворума, в миллисекундах (по умолчанию: 30000). -Конфигурация кворума находится в `.` и содержит описание серверов. Единственный параметр для всего кворума — `secure`, который включает зашифрованное соединение для связи между участниками кворума. Параметры для каждого ``: +Конфигурация кворума находится в `.` и содержит описание серверов. + +Единственный параметр для всего кворума — `secure`, который включает зашифрованное соединение для связи между участниками кворума. Параметру можно задать значение `true`, если для внутренней коммуникации между узлами требуется SSL-соединение, в ином случае не указывайте ничего. + +Параметры для каждого ``: - `id` — идентификатор сервера в кворуме. - `hostname` — имя хоста, на котором размещен сервер. diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 4038c59b8ca..4a2da778a06 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -436,26 +436,58 @@ ClickHouse проверяет условия для `min_part_size` и `min_part ## interserver_http_credentials {#server-settings-interserver-http-credentials} -Имя пользователя и пароль, использующиеся для аутентификации при [репликации](../../operations/server-configuration-parameters/settings.md) движками Replicated\*. Это имя пользователя и пароль используются только для взаимодействия между репликами кластера и никак не связаны с аутентификацией клиентов ClickHouse. Сервер проверяет совпадение имени и пароля для соединяющихся с ним реплик, а также использует это же имя и пароль для соединения с другими репликами. Соответственно, эти имя и пароль должны быть прописаны одинаковыми для всех реплик кластера. -По умолчанию аутентификация не используется. +Имя пользователя и пароль, использующиеся для подключения к другим серверам при [репликации](../../engines/table-engines/mergetree-family/replication.md) движками Replicated\*. Сервер использует эти же учетные данные при аутентификации других реплик. Поэтому настройки `interserver_http_credentials` должны быть заданы одинаковыми для всех реплик кластера. + +По умолчанию, если секция `interserver_http_credentials` не задана в конфигурации, аутентификация при репликации не используется. !!! note "Примечание" - Эти учетные данные являются общими для обмена данными по протоколам `HTTP` и `HTTPS`. + Настройки `interserver_http_credentials` не относятся к [конфигурации](../../interfaces/cli.md#configuration_files) учетных данных клиента ClickHouse. + +!!! note "Примечание" + Учетные данные в `interserver_http_credentials` являются общими для репликации по `HTTP` и `HTTPS`. Раздел содержит следующие параметры: - `user` — имя пользователя. - `password` — пароль. +- `allow_empty` — если `true`, то другие реплики могут подключаться без аутентификации, даже если учетные данные заданы. Если `false`, то подключение без аутентификации не допускается. Значение по умолчанию: `false`. +- `old` — секция содержит старые значения `user` и `password`, которые используются в процессе изменения учетных данных. Можно указывать несколько секций `old`. -**Пример конфигурации** +**Изменение учетных данных** + +ClickHouse поддерживает динамическое изменение учетных данных. При этом не требуется одновременно останавливать все реплики, чтобы обновить конфигурацию. Изменение учетных данных выполняется за несколько шагов. + +Чтобы включить аутентификацию, установите `interserver_http_credentials.allow_empty` в значение `true` и задайте учетные данные. С такой конфигурацией разрешены подключения как с аутентификацией, так и без нее. + +``` xml + + admin + 111 + true + +``` + +После конфигурации всех реплик установите `allow_empty` в значение `false` или удалите эту настройку. Это сделает аутентификацию с новыми учетными данными обязательной. + +Чтобы изменить учетные данные, перенесите имя пользователя и пароль в секцию `interserver_http_credentials.old` и укажите новые значения для `user` и `password`. Сервер будет использовать новые учетные данные при подключении к другим репликам и при этом будет разрешать подключения как с новыми, так и со старыми учетными данными. ``` xml admin 222 + + admin + 111 + + + temp + 000 + ``` +Когда новые учетные данные обновятся на всех репликах, старые учетные данные можно удалить из конфигурации. + ## keep_alive_timeout {#keep-alive-timeout} Время в секундах, в течение которого ClickHouse ожидает входящих запросов прежде чем закрыть соединение. Значение по умолчанию: 10 секунд. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 1b4da512c9f..affa90d9840 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -391,12 +391,14 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( ## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number} -Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата TSV. +Включает или отключает парсинг значений перечислений как порядковых номеров. + +Если режим включен, то во входящих данных в формате `TCV` значения перечисления (тип `ENUM`) всегда трактуются как порядковые номера, а не как элементы перечисления. Эту настройку рекомендуется включать для оптимизации парсинга, если данные типа `ENUM` содержат только порядковые номера, а не сами элементы перечисления. Возможные значения: -- 0 — парсинг значений перечисления как значений. -- 1 — парсинг значений перечисления как идентификаторов перечисления. +- 0 — входящие значения типа `ENUM` сначала сопоставляются с элементами перечисления, а если совпадений не найдено, то трактуются как порядковые номера. +- 1 — входящие значения типа `ENUM` сразу трактуются как порядковые номера. Значение по умолчанию: 0. @@ -410,10 +412,39 @@ CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' При включенной настройке `input_format_tsv_enum_as_number`: +Запрос: + ```sql SET input_format_tsv_enum_as_number = 1; INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 1; +SELECT * FROM table_with_enum_column_for_tsv_insert; +``` + +Результат: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +``` + +Запрос: + +```sql +SET input_format_tsv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; +``` + +сгенерирует исключение. + +При отключенной настройке `input_format_tsv_enum_as_number`: + +Запрос: + +```sql +SET input_format_tsv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; SELECT * FROM table_with_enum_column_for_tsv_insert; ``` @@ -428,15 +459,6 @@ SELECT * FROM table_with_enum_column_for_tsv_insert; └─────┴────────┘ ``` -При отключенной настройке `input_format_tsv_enum_as_number` запрос `INSERT`: - -```sql -SET input_format_tsv_enum_as_number = 0; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; -``` - -сгенерирует исключение. - ## input_format_null_as_default {#settings-input-format-null-as-default} Включает или отключает инициализацию [значениями по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) ячеек с [NULL](../../sql-reference/syntax.md#null-literal), если тип данных столбца не позволяет [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable). @@ -739,9 +761,20 @@ ClickHouse может парсить только базовый формат `Y Возможные значения: -- Любое положительное целое число. +- Положительное целое число. -Значение по умолчанию: 163840. +Значение по умолчанию: `163840`. + + +## merge_tree_min_rows_for_concurrent_read_for_remote_filesystem {#merge-tree-min-rows-for-concurrent-read-for-remote-filesystem} + +Минимальное количество строк для чтения из одного файла, прежде чем движок [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) может выполнять параллельное чтение из удаленной файловой системы. + +Возможные значения: + +- Положительное целое число. + +Значение по умолчанию: `163840`. ## merge_tree_min_bytes_for_concurrent_read {#setting-merge-tree-min-bytes-for-concurrent-read} @@ -751,7 +784,17 @@ ClickHouse может парсить только базовый формат `Y - Положительное целое число. -Значение по умолчанию: 251658240. +Значение по умолчанию: `251658240`. + +## merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem {#merge-tree-min-bytes-for-concurrent-read-for-remote-filesystem} + +Минимальное количество байтов для чтения из одного файла, прежде чем движок [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) может выполнять параллельное чтение из удаленной файловой системы. + +Возможное значение: + +- Положительное целое число. + +Значение по умолчанию: `251658240`. ## merge_tree_min_rows_for_seek {#setting-merge-tree-min-rows-for-seek} @@ -1511,12 +1554,13 @@ SELECT area/period FROM account_orders FORMAT JSON; ## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number} -Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата CSV. +Включает или отключает парсинг значений перечислений как порядковых номеров. +Если режим включен, то во входящих данных в формате `CSV` значения перечисления (тип `ENUM`) всегда трактуются как порядковые номера, а не как элементы перечисления. Эту настройку рекомендуется включать для оптимизации парсинга, если данные типа `ENUM` содержат только порядковые номера, а не сами элементы перечисления. Возможные значения: -- 0 — парсинг значений перечисления как значений. -- 1 — парсинг значений перечисления как идентификаторов перечисления. +- 0 — входящие значения типа `ENUM` сначала сопоставляются с элементами перечисления, а если совпадений не найдено, то трактуются как порядковые номера. +- 1 — входящие значения типа `ENUM` сразу трактуются как порядковые номера. Значение по умолчанию: 0. @@ -1530,10 +1574,11 @@ CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' При включенной настройке `input_format_csv_enum_as_number`: +Запрос: + ```sql SET input_format_csv_enum_as_number = 1; INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; -SELECT * FROM table_with_enum_column_for_csv_insert; ``` Результат: @@ -1544,15 +1589,37 @@ SELECT * FROM table_with_enum_column_for_csv_insert; └─────┴────────┘ ``` -При отключенной настройке `input_format_csv_enum_as_number` запрос `INSERT`: +Запрос: ```sql -SET input_format_csv_enum_as_number = 0; -INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; +SET input_format_csv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' ``` сгенерирует исключение. +При отключенной настройке `input_format_csv_enum_as_number`: + +Запрос: + +```sql +SET input_format_csv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2 +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' +SELECT * FROM table_with_enum_column_for_csv_insert; +``` + +Результат: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +┌──Id─┬─Value─┐ +│ 103 │ first │ +└─────┴───────┘ +``` + ## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line} Использовать в качестве разделителя строк для CSV формата CRLF (DOS/Windows стиль) вместо LF (Unix стиль). @@ -1574,18 +1641,19 @@ INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2; `INSERT` завершается успешно только в том случае, когда ClickHouse смог без ошибки записать данные в `insert_quorum` реплик за время `insert_quorum_timeout`. Если по любой причине количество реплик с успешной записью не достигнет `insert_quorum`, то запись считается не состоявшейся и ClickHouse удалит вставленный блок из всех реплик, куда уже успел записать данные. -Все реплики в кворуме консистентны, т.е. содержат данные всех более ранних запросов `INSERT`. Последовательность `INSERT` линеаризуется. +Когда `insert_quorum_parallel` выключена, все реплики кворума консистентны, то есть содержат данные всех предыдущих запросов `INSERT` (последовательность `INSERT` линеаризуется). При чтении с диска данных, записанных с помощью `insert_quorum` и при выключенной `insert_quorum_parallel`, можно включить последовательную консистентность для запросов `SELECT` с помощью [select_sequential_consistency](#settings-select_sequential_consistency). -При чтении данных, записанных с `insert_quorum` можно использовать настройку [select_sequential_consistency](#settings-select_sequential_consistency). - -ClickHouse генерирует исключение +ClickHouse генерирует исключение: - Если количество доступных реплик на момент запроса меньше `insert_quorum`. - При попытке записать данные в момент, когда предыдущий блок ещё не вставлен в `insert_quorum` реплик. Эта ситуация может возникнуть, если пользователь вызвал `INSERT` прежде, чем завершился предыдущий с `insert_quorum`. +- При выключенной `insert_quorum_parallel` и при попытке записать данные в момент, когда предыдущий блок еще не вставлен в `insert_quorum` реплик (несколько параллельных `INSERT`-запросов). Эта ситуация может возникнуть при попытке пользователя выполнить очередной запрос `INSERT` к той же таблице, прежде чем завершится предыдущий с `insert_quorum`. + См. также: - [insert_quorum_timeout](#settings-insert_quorum_timeout) +- [insert_quorum_parallel](#settings-insert_quorum_parallel) - [select_sequential_consistency](#settings-select_sequential_consistency) ## insert_quorum_timeout {#settings-insert_quorum_timeout} @@ -1597,11 +1665,29 @@ ClickHouse генерирует исключение См. также: - [insert_quorum](#settings-insert_quorum) +- [insert_quorum_parallel](#settings-insert_quorum_parallel) +- [select_sequential_consistency](#settings-select_sequential_consistency) + +## insert_quorum_parallel {#settings-insert_quorum_parallel} + +Включает и выключает параллелизм для кворумных вставок (`INSERT`-запросы). Когда опция включена, возможно выполнять несколько кворумных `INSERT`-запросов одновременно, при этом запросы не дожидаются окончания друг друга . Когда опция выключена, одновременные записи с кворумом в одну и ту же таблицу будут отклонены (будет выполнена только одна из них). + +Возможные значения: + +- 0 — Выключена. +- 1 — Включена. + +Значение по умолчанию: 1. + +См. также: + +- [insert_quorum](#settings-insert_quorum) +- [insert_quorum_timeout](#settings-insert_quorum_timeout) - [select_sequential_consistency](#settings-select_sequential_consistency) ## select_sequential_consistency {#settings-select_sequential_consistency} -Включает или выключает последовательную консистентность для запросов `SELECT`. +Включает или выключает последовательную консистентность для запросов `SELECT`. Необходимо, чтобы `insert_quorum_parallel` была выключена (по умолчанию включена), а опция `insert_quorum` включена. Возможные значения: @@ -1614,10 +1700,13 @@ ClickHouse генерирует исключение Когда последовательная консистентность включена, то ClickHouse позволит клиенту выполнить запрос `SELECT` только к тем репликам, которые содержат данные всех предыдущих запросов `INSERT`, выполненных с `insert_quorum`. Если клиент обратится к неполной реплике, то ClickHouse сгенерирует исключение. В запросе SELECT не будут участвовать данные, которые ещё не были записаны на кворум реплик. +Если `insert_quorum_parallel` включена (по умолчанию это так), тогда `select_sequential_consistency` не будет работать. Причина в том, что параллельные запросы `INSERT` можно записать в разные наборы реплик кворума, поэтому нет гарантии того, что в отдельно взятую реплику будут сделаны все записи. + См. также: - [insert_quorum](#settings-insert_quorum) - [insert_quorum_timeout](#settings-insert_quorum_timeout) +- [insert_quorum_parallel](#settings-insert_quorum_parallel) ## insert_deduplicate {#settings-insert-deduplicate} diff --git a/docs/ru/sql-reference/statements/grant.md b/docs/ru/sql-reference/statements/grant.md index c970d4d24f3..06432193f9f 100644 --- a/docs/ru/sql-reference/statements/grant.md +++ b/docs/ru/sql-reference/statements/grant.md @@ -21,7 +21,7 @@ GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.ta - `user` — Пользователь ClickHouse. `WITH GRANT OPTION` разрешает пользователю или роли выполнять запрос `GRANT`. Пользователь может выдавать только те привилегии, которые есть у него, той же или меньшей области действий. -`WITH REPLACE OPTION` заменяет все старые привилегии новыми привилегиями для `user` или `role`, Если не указано, добавьте новые привилегии для старых. +`WITH REPLACE OPTION` заменяет все старые привилегии новыми привилегиями для `user` или `role`, если не указано, добавляет новые привилегии. ## Синтаксис назначения ролей {#assign-role-syntax} @@ -34,7 +34,7 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US - `user` — Пользователь ClickHouse. `WITH ADMIN OPTION` присваивает привилегию [ADMIN OPTION](#admin-option-privilege) пользователю или роли. -`WITH REPLACE OPTION` заменяет все старые роли новыми ролями для пользователя `user` или `role`, Если не указано, добавьте новые роли в старые. +`WITH REPLACE OPTION` заменяет все старые роли новыми ролями для пользователя `user` или `role`, если не указано, добавляет новые новые роли. ## Использование {#grant-usage} diff --git a/docs/ru/sql-reference/statements/select/join.md b/docs/ru/sql-reference/statements/select/join.md index 9f6d38a024f..bb9b7445083 100644 --- a/docs/ru/sql-reference/statements/select/join.md +++ b/docs/ru/sql-reference/statements/select/join.md @@ -55,13 +55,13 @@ FROM - [join_on_disk_max_files_to_merge](../../../operations/settings/settings.md#join_on_disk_max_files_to_merge) - [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) -## Условия в секции ON {on-section-conditions} +## Условия в секции ON {#on-section-conditions} + +Секция `ON` может содержать несколько условий, связанных операторами `AND` и `OR`. Условия, задающие ключи соединения, должны содержать столбцы левой и правой таблицы и должны использовать оператор равенства. Прочие условия могут использовать другие логические операторы, но в отдельном условии могут использоваться столбцы либо только левой, либо только правой таблицы. -Секция `ON` может содержать несколько условий, связанных оператором `AND`. Условия, задающие ключи соединения, должны содержать столбцы левой и правой таблицы и должны использовать оператор равенства. Прочие условия могут использовать другие логические операторы, но в отдельном условии могут использоваться столбцы либо только левой, либо только правой таблицы. Строки объединяются только тогда, когда всё составное условие выполнено. Если оно не выполнено, то строки могут попасть в результат в зависимости от типа `JOIN`. Обратите внимание, что если то же самое условие поместить в секцию `WHERE`, то строки, для которых оно не выполняется, никогда не попаду в результат. -!!! note "Примечание" - Оператор `OR` внутри секции `ON` пока не поддерживается. +Оператор `OR` внутри секции `ON` работает, используя алгоритм хеш-соединения — на каждый агрумент `OR` с ключами соединений для `JOIN` создается отдельная хеш-таблица, поэтому потребление памяти и время выполнения запроса растет линейно при увеличении количества выражений `OR` секции `ON`. !!! note "Примечание" Если в условии использованы столбцы из разных таблиц, то пока поддерживается только оператор равенства (`=`). @@ -110,6 +110,47 @@ SELECT name, text, scores FROM table_1 INNER JOIN table_2 └──────┴────────┴────────┘ ``` +Запрос с типом соединения `INNER` и условием с оператором `OR`: + +``` sql +CREATE TABLE t1 (`a` Int64, `b` Int64) ENGINE = MergeTree() ORDER BY a; + +CREATE TABLE t2 (`key` Int32, `val` Int64) ENGINE = MergeTree() ORDER BY key; + +INSERT INTO t1 SELECT number as a, -a as b from numbers(5); + +INSERT INTO t2 SELECT if(number % 2 == 0, toInt64(number), -number) as key, number as val from numbers(5); + +SELECT a, b, val FROM t1 INNER JOIN t2 ON t1.a = t2.key OR t1.b = t2.key; +``` + +Результат: + +``` +┌─a─┬──b─┬─val─┐ +│ 0 │ 0 │ 0 │ +│ 1 │ -1 │ 1 │ +│ 2 │ -2 │ 2 │ +│ 3 │ -3 │ 3 │ +│ 4 │ -4 │ 4 │ +└───┴────┴─────┘ +``` + +Запрос с типом соединения `INNER` и условиями с операторами `OR` и `AND`: + +``` sql +SELECT a, b, val FROM t1 INNER JOIN t2 ON t1.a = t2.key OR t1.b = t2.key AND t2.val > 3; +``` + +Результат: + +``` +┌─a─┬──b─┬─val─┐ +│ 0 │ 0 │ 0 │ +│ 2 │ -2 │ 2 │ +│ 4 │ -4 │ 4 │ +└───┴────┴─────┘ +``` ## Использование ASOF JOIN {#asof-join-usage} `ASOF JOIN` применим в том случае, когда необходимо объединять записи, которые не имеют точного совпадения. diff --git a/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md b/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md new file mode 120000 index 00000000000..5ac9a615386 --- /dev/null +++ b/docs/zh/faq/general/how-do-i-contribute-code-to-clickhouse.md @@ -0,0 +1 @@ +../../../en/faq/general/how-do-i-contribute-code-to-clickhouse.md \ No newline at end of file diff --git a/docs/zh/faq/operations/multi-region-replication.md b/docs/zh/faq/operations/multi-region-replication.md new file mode 120000 index 00000000000..dbc985ee1fb --- /dev/null +++ b/docs/zh/faq/operations/multi-region-replication.md @@ -0,0 +1 @@ +../../../en/faq/operations/multi-region-replication.md \ No newline at end of file diff --git a/docs/zh/getting-started/example-datasets/ontime.md b/docs/zh/getting-started/example-datasets/ontime.md index 03a9a8c4278..907f63634cc 100644 --- a/docs/zh/getting-started/example-datasets/ontime.md +++ b/docs/zh/getting-started/example-datasets/ontime.md @@ -15,17 +15,9 @@ toc_title: OnTime 下载数据: ``` bash -for s in `seq 1987 2018` -do -for m in `seq 1 12` -do -wget https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_${s}_${m}.zip -done -done +wget --no-check-certificate --continue https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_{1987..2021}_{1..12}.zip ``` -(参考 https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh ) - 创建表结构: ``` sql @@ -40,7 +32,7 @@ CREATE TABLE `ontime` `Reporting_Airline` String, `DOT_ID_Reporting_Airline` Int32, `IATA_CODE_Reporting_Airline` String, - `Tail_Number` Int32, + `Tail_Number` String, `Flight_Number_Reporting_Airline` String, `OriginAirportID` Int32, `OriginAirportSeqID` Int32, diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md index cdce4f2f2e7..738b0365f46 100644 --- a/docs/zh/interfaces/http.md +++ b/docs/zh/interfaces/http.md @@ -407,7 +407,7 @@ $ curl -v 'http://localhost:8123/predefined_query' `query` 是一个预定义的`predefined_query_handler`查询,它由ClickHouse在匹配HTTP请求并返回查询结果时执行。这是一个必须的配置。 -以下是定义的[max_threads](../operations/settings/settings.md#settings-max_threads)和`max_alter_threads`设置, 然后查询系统表以检查这些设置是否设置成功。 +以下是定义的[max_threads](../operations/settings/settings.md#settings-max_threads)和`max_final_threads`设置, 然后查询系统表以检查这些设置是否设置成功。 示例: @@ -430,9 +430,9 @@ $ curl -v 'http://localhost:8123/predefined_query' ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_alter_threads?max_threads=1&max_alter_threads=2' +$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2' 1 -max_alter_threads 2 +max_final_threads 2 ``` !!! note "警告" @@ -444,7 +444,7 @@ max_alter_threads 2 ClickHouse提取并执行与HTTP请求URL中的`query_param_name`值对应的值。`query_param_name`的默认值是`/query`。这是一个可选的配置。如果配置文件中没有定义,则不会传入参数。 -为了试验这个功能,示例定义了[max_threads](../operations/settings/settings.md#settings-max_threads)和`max_alter_threads`,`queries`设置是否成功的值。 +为了试验这个功能,示例定义了[max_threads](../operations/settings/settings.md#settings-max_threads)和`max_final_threads`,`queries`设置是否成功的值。 示例: @@ -462,9 +462,9 @@ ClickHouse提取并执行与HTTP请求URL中的`query_param_name`值对应的值 ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_alter_threads=2¶m_name_1=max_threads¶m_name_2=max_alter_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D' +$ curl -H 'XXX:TEST_HEADER_VALUE_DYNAMIC' 'http://localhost:8123/own?max_threads=1&max_final_threads=2¶m_name_1=max_threads¶m_name_2=max_final_threads&query_param=SELECT%20name,value%20FROM%20system.settings%20where%20name%20=%20%7Bname_1:String%7D%20OR%20name%20=%20%7Bname_2:String%7D' max_threads 1 -max_alter_threads 2 +max_final_threads 2 ``` ### static {#static} diff --git a/docs/zh/operations/system-tables/columns.md b/docs/zh/operations/system-tables/columns.md index 9a90561a07b..6d4299a9056 100644 --- a/docs/zh/operations/system-tables/columns.md +++ b/docs/zh/operations/system-tables/columns.md @@ -1,29 +1,89 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.columns {#system-columns} -# 系统。列 {#system-columns} +此系统表包含所有表中列的信息。 -包含有关所有表中列的信息。 +你可以使用这个表来获得类似于 [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) 查询的信息,但是可以同时获得多个表的信息。 -您可以使用此表获取类似于以下内容的信息 [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) 查询,但对于多个表一次。 +[临时表](../../sql-reference/statements/create/table.md#temporary-tables)中的列只在创建它们的会话中的 `system.columns` 中才可见,并且它们的 `database` 字段显示为空。 -该 `system.columns` 表包含以下列(列类型显示在括号中): +`system.columns` 表包含以下列 (括号中显示的是列类型): -- `database` (String) — Database name. -- `table` (String) — Table name. -- `name` (String) — Column name. -- `type` (String) — Column type. -- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`)为默认值,如果没有定义,则为空字符串。 -- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined. -- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes. -- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes. -- `marks_bytes` (UInt64) — The size of marks, in bytes. -- `comment` (String) — Comment on the column, or an empty string if it is not defined. -- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression. -- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression. -- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. -- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. +- `database` ([String](../../sql-reference/data-types/string.md)) — 数据库名称。 +- `table` ([String](../../sql-reference/data-types/string.md)) — 表名。 +- `name` ([String](../../sql-reference/data-types/string.md)) — 列名。 +- `type` ([String](../../sql-reference/data-types/string.md)) — 列类型。 +- `position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 列在表中的顺序位置,从1开始。 +- `default_kind` ([String](../../sql-reference/data-types/string.md)) — 默认值的表达式类型(`DEFAULT`, `MATERIALIZED`, `ALIAS`) ,如果没有定义,则为空字符串。 +- `default_expression` ([String](../../sql-reference/data-types/string.md)) — 默认值的表达式,如果未定义则为空字符串。 +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 压缩数据的大小,以字节为单位。 +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 解压后的数据的大小,以字节为单位。 +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 标记的大小,以字节为单位。 +- `comment` ([String](../../sql-reference/data-types/string.md)) — 列注释,如果没有定义,则为空字符串。 +- `is_in_partition_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 列是否在分区表达式中的标志。 +- `is_in_sorting_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 列是否在排序键表达式中的标志。 +- `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 列是否在主键表达式中的标志。 +- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 列是否在采样键表达式中的标志。 +- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — 压缩编码的名称。 +- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 二进制数据、字符数据或文本数据和图像的最大长度(以字节为单位)。在 ClickHouse 中只对 `FixedString` 数据类型有意义。否则,将返回 `NULL` 值。 +- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 近似数字型数据、精确数字型数据、整数型数据或货币数据的精度。在 ClickHouse 中,对于整数类型是比特率(bitness),对于 `Decimal` 类型是十进制精度。否则,将返回 `NULL` 值。 +- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 数字系统的基数是近似数字型数据、精确数字型数据、整数型数据或货币数据的精度。在 ClickHouse 中,对于整数类型是2,对于 `Decimal` 类型是10。否则,将返回 `NULL` 值。 +- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — 近似数字型数据、精确数字型数据、整数型数据或货币数据的比例。在 ClickHouse 中只对 `Decimal` 类型有意义。否则,将返回 `NULL` 值。 +- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — `DateTime64` 数据类型的小数精度。对于其他数据类型,将返回 `NULL` 值。 + +**示例** + +```sql +SELECT * FROM system.columns LIMIT 2 FORMAT Vertical; +``` + +```text +Row 1: +────── +database: INFORMATION_SCHEMA +table: COLUMNS +name: table_catalog +type: String +position: 1 +default_kind: +default_expression: +data_compressed_bytes: 0 +data_uncompressed_bytes: 0 +marks_bytes: 0 +comment: +is_in_partition_key: 0 +is_in_sorting_key: 0 +is_in_primary_key: 0 +is_in_sampling_key: 0 +compression_codec: +character_octet_length: ᴺᵁᴸᴸ +numeric_precision: ᴺᵁᴸᴸ +numeric_precision_radix: ᴺᵁᴸᴸ +numeric_scale: ᴺᵁᴸᴸ +datetime_precision: ᴺᵁᴸᴸ + +Row 2: +────── +database: INFORMATION_SCHEMA +table: COLUMNS +name: table_schema +type: String +position: 2 +default_kind: +default_expression: +data_compressed_bytes: 0 +data_uncompressed_bytes: 0 +marks_bytes: 0 +comment: +is_in_partition_key: 0 +is_in_sorting_key: 0 +is_in_primary_key: 0 +is_in_sampling_key: 0 +compression_codec: +character_octet_length: ᴺᵁᴸᴸ +numeric_precision: ᴺᵁᴸᴸ +numeric_precision_radix: ᴺᵁᴸᴸ +numeric_scale: ᴺᵁᴸᴸ +datetime_precision: ᴺᵁᴸᴸ +``` [原文](https://clickhouse.com/docs/zh/operations/system-tables/columns) diff --git a/docs/zh/operations/system-tables/contributors.md b/docs/zh/operations/system-tables/contributors.md index e9374a7dc9c..fd876da3594 100644 --- a/docs/zh/operations/system-tables/contributors.md +++ b/docs/zh/operations/system-tables/contributors.md @@ -1,15 +1,10 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.contributors {#system-contributors} -# 系统。贡献者 {#system-contributors} - -包含有关贡献者的信息。 该顺序在查询执行时是随机的。 +此系统表包含有关贡献者的信息。排列顺序是在查询执行时随机生成的。 列: -- `name` (String) — Contributor (author) name from git log. +- `name` (String) — git 日志中的贡献者 (作者) 名字。 **示例** @@ -32,7 +27,7 @@ SELECT * FROM system.contributors LIMIT 10 └──────────────────┘ ``` -要在表中找出自己,请使用查询: +要在表中找到你自己,请这样查询: ``` sql SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova' @@ -43,3 +38,5 @@ SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova' │ Olga Khvostikova │ └──────────────────┘ ``` + +[原文](https://clickhouse.com/docs/zh/operations/system-tables/contributors) diff --git a/docs/zh/operations/system-tables/databases.md b/docs/zh/operations/system-tables/databases.md index 134b8ebc7ab..3fadb02446d 100644 --- a/docs/zh/operations/system-tables/databases.md +++ b/docs/zh/operations/system-tables/databases.md @@ -1,12 +1,39 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.databases {#system-databases} -# 系统。数据库 {#system-databases} +包含当前用户可用的数据库的相关信息。 -此表包含一个名为"字符串"的列 ‘name’ – the name of a database. +列: -服务器知道的每个数据库在表中都有相应的条目。 +- `name` ([String](../../sql-reference/data-types/string.md)) — 数据库的名称。 +- `engine` ([String](../../sql-reference/data-types/string.md)) — [数据库的引擎](../../engines/database-engines/index.md)。 +- `data_path` ([String](../../sql-reference/data-types/string.md)) — 数据的路径。 +- `metadata_path` ([String](../../sql-reference/data-types/enum.md)) — 元数据的路径。 +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — 数据库的 UUID。 +- `comment` ([String](../../sql-reference/data-types/enum.md)) — 数据库的注释。 -该系统表用于实现 `SHOW DATABASES` 查询。 +这个系统表的 `name` 列被用于实现 `SHOW DATABASES` 查询。 + +**示例** + +创建一个数据库。 + +``` sql +CREATE DATABASE test; +``` + +查询此用户所有可用的数据库。 + +``` sql +SELECT * FROM system.databases; +``` + +``` text +┌─name───────────────┬─engine─┬─data_path──────────────────┬─metadata_path───────────────────────────────────────────────────────┬─uuid─────────────────────────────────┬─comment─┐ +│ INFORMATION_SCHEMA │ Memory │ /var/lib/clickhouse/ │ │ 00000000-0000-0000-0000-000000000000 │ │ +│ default │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/d31/d317b4bd-3595-4386-81ee-c2334694128a/ │ 24363899-31d7-42a0-a436-389931d752a0 │ │ +│ information_schema │ Memory │ /var/lib/clickhouse/ │ │ 00000000-0000-0000-0000-000000000000 │ │ +│ system │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/1d1/1d1c869d-e465-4b1b-a51f-be033436ebf9/ │ 03e9f3d1-cc88-4a49-83e9-f3d1cc881a49 │ │ +└────────────────────┴────────┴────────────────────────────┴─────────────────────────────────────────────────────────────────────┴──────────────────────────────────────┴─────────┘ +``` + +[原文](https://clickhouse.com/docs/zh/operations/system-tables/databases) diff --git a/docs/zh/operations/system-tables/detached_parts.md b/docs/zh/operations/system-tables/detached_parts.md index ba35444c551..efcbb61d37e 100644 --- a/docs/zh/operations/system-tables/detached_parts.md +++ b/docs/zh/operations/system-tables/detached_parts.md @@ -1,14 +1,11 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.detached_parts {#system_tables-detached_parts} -# 系统。detached_parts {#system_tables-detached_parts} +包含关于 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 表的分离分区的信息。`reason` 列详细说明了该分区被分离的原因。 -包含有关分离部分的信息 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 桌子 该 `reason` 列指定分离部件的原因。 +对于用户分离的分区,原因是空的。你可以通过 [ALTER TABLE ATTACH PARTITION\|PART](../../sql-reference/statements/alter/partition.md#alter_attach-partition) 命令添加这些分区。 -对于用户分离的部件,原因是空的。 这些部件可以附加 [ALTER TABLE ATTACH PARTITION\|PART](../../sql-reference/statements/alter.md#alter_attach-partition) 指挥部 +关于其他列的描述,请参见 [system.parts](../../operations/system-tables/parts.md#system_tables-parts)。 -有关其他列的说明,请参阅 [系统。零件](../../operations/system-tables/parts.md#system_tables-parts). +如果分区名称无效,一些列的值可能是`NULL`。你可以通过[ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter/partition.md#alter_drop-detached)来删除这些分区。 -如果部件名称无效,某些列的值可能为 `NULL`. 这些部分可以删除 [ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter.md#alter_drop-detached). +[原文](https://clickhouse.com/docs/zh/operations/system-tables/detached_parts) diff --git a/docs/zh/operations/system-tables/disks.md b/docs/zh/operations/system-tables/disks.md index 8cd24d24550..2a6dcc9ae45 100644 --- a/docs/zh/operations/system-tables/disks.md +++ b/docs/zh/operations/system-tables/disks.md @@ -1,31 +1,27 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.disks {#system_tables-disks} -# 系统。磁盘 {#system_tables-disks} - -包含有关在定义的磁盘信息 [服务器配置](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). +包含在 [服务器配置](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure) 中定义的磁盘信息. 列: -- `name` ([字符串](../../sql-reference/data-types/string.md)) — Name of a disk in the server configuration. -- `path` ([字符串](../../sql-reference/data-types/string.md)) — Path to the mount point in the file system. -- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes. -- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. -- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` 磁盘配置参数。 +- `name` ([字符串](../../sql-reference/data-types/string.md)) — 服务器配置中的磁盘名称. +- `path` ([字符串](../../sql-reference/data-types/string.md)) — 文件系统中挂载点的路径. +- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 磁盘上的可用空间,以字节为单位. +- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 磁盘容量,以字节为单位。 +- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 在磁盘上应保持空闲的磁盘空间的数量,以字节为单位。在磁盘配置的 `keep_free_space_bytes` 参数中定义。 -## 系统。storage_policies {#system_tables-storage_policies} +**示例** -包含有关存储策略和卷中定义的信息 [服务器配置](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). +```sql +:) SELECT * FROM system.disks; +``` -列: +```text +┌─name────┬─path─────────────────┬───free_space─┬──total_space─┬─keep_free_space─┐ +│ default │ /var/lib/clickhouse/ │ 276392587264 │ 490652508160 │ 0 │ +└─────────┴──────────────────────┴──────────────┴──────────────┴─────────────────┘ -- `policy_name` ([字符串](../../sql-reference/data-types/string.md)) — Name of the storage policy. -- `volume_name` ([字符串](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy. -- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration. -- `disks` ([数组(字符串)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy. -- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). -- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. +1 rows in set. Elapsed: 0.001 sec. +``` -如果存储策略包含多个卷,则每个卷的信息将存储在表的单独行中。 +[原文](https://clickhouse.com/docs/zh/operations/system-tables/disks) diff --git a/docs/zh/operations/system-tables/merge_tree_settings.md b/docs/zh/operations/system-tables/merge_tree_settings.md index a6ad6f78f8e..48d9a7dd9af 100644 --- a/docs/zh/operations/system-tables/merge_tree_settings.md +++ b/docs/zh/operations/system-tables/merge_tree_settings.md @@ -1,16 +1,55 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.merge_tree_settings {#system-merge_tree_settings} -# 系统。merge_tree_settings {#system-merge_tree_settings} - -包含有关以下设置的信息 `MergeTree` 桌子 +包含 `MergeTree` 表的设置 (Setting) 信息。 列: -- `name` (String) — Setting name. -- `value` (String) — Setting value. -- `description` (String) — Setting description. -- `type` (String) — Setting type (implementation specific string value). -- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. +- `name` (String) — 设置名称。 +- `value` (String) — 设置的值。 +- `description` (String) — 设置描述。 +- `type` (String) — 设置类型 (执行特定的字符串值)。 +- `changed` (UInt8) — 该设置是否在配置中明确定义或是明确改变。 + + +**示例** +```sql +:) SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical; +``` + +```text +Row 1: +────── +name: index_granularity +value: 8192 +changed: 0 +description: How many rows correspond to one primary key value. +type: SettingUInt64 + +Row 2: +────── +name: min_bytes_for_wide_part +value: 0 +changed: 0 +description: Minimal uncompressed size in bytes to create part in wide format instead of compact +type: SettingUInt64 + +Row 3: +────── +name: min_rows_for_wide_part +value: 0 +changed: 0 +description: Minimal number of rows to create part in wide format instead of compact +type: SettingUInt64 + +Row 4: +────── +name: merge_max_block_size +value: 8192 +changed: 0 +description: How many rows in blocks should be formed for merge operations. +type: SettingUInt64 + +4 rows in set. Elapsed: 0.001 sec. +``` + +[原文](https://clickhouse.com/docs/zh/operations/system-tables/merge_tree_settings) diff --git a/docs/zh/operations/system-tables/metrics.md b/docs/zh/operations/system-tables/metrics.md index 34b7fa35681..5b5b4615f82 100644 --- a/docs/zh/operations/system-tables/metrics.md +++ b/docs/zh/operations/system-tables/metrics.md @@ -1,19 +1,14 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.metrics {#system_tables-metrics} -# 系统。指标 {#system_tables-metrics} - -包含可以立即计算或具有当前值的指标。 例如,同时处理的查询的数量或当前副本的延迟。 此表始终是最新的。 +此系统表包含可以即时计算或具有当前值的指标。例如,同时处理的查询数量或当前的复制延迟。这个表始终是最新的。 列: -- `metric` ([字符串](../../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value. -- `description` ([字符串](../../sql-reference/data-types/string.md)) — Metric description. +- `metric` ([字符串](../../sql-reference/data-types/string.md)) — 指标名称. +- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — 指标的值. +- `description` ([字符串](../../sql-reference/data-types/string.md)) — 指标的描述. -支持的指标列表,您可以在 [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) ClickHouse的源文件。 +对于支持的指标列表,您可以查看 [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) ClickHouse 的源文件。 **示例** @@ -38,7 +33,7 @@ SELECT * FROM system.metrics LIMIT 10 **另请参阅** -- [系统。asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred. -- [系统。metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. -- [监测](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. +- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — 包含周期性的计算指标。 +- [system.events](../../operations/system-tables/events.md#system_tables-events) — 包含发生的一些事件。 +- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — 包含`system.metrics`表和`system.events`表的历史指标值。 +- [监控](../../operations/monitoring.md) — ClickHouse 监控的基本概念。 diff --git a/docs/zh/operations/system-tables/numbers.md b/docs/zh/operations/system-tables/numbers.md index c42c87053ca..fd67baa01a5 100644 --- a/docs/zh/operations/system-tables/numbers.md +++ b/docs/zh/operations/system-tables/numbers.md @@ -1,12 +1,32 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.numbers {#system-numbers} -# 系统。数字 {#system-numbers} +这个表有一个名为 `number` 的 UInt64 列,包含了几乎所有从 0 开始的自然数。 -此表包含一个名为UInt64的列 `number` 它包含几乎所有从零开始的自然数。 +你可以用这个表进行测试,或者如果你需要进行暴力搜索。 -您可以使用此表进行测试,或者如果您需要进行暴力搜索。 +从该表的读取是不并行的。 -从此表中读取的内容不是并行的。 +**示例** + +```sql +:) SELECT * FROM system.numbers LIMIT 10; +``` + +```text +┌─number─┐ +│ 0 │ +│ 1 │ +│ 2 │ +│ 3 │ +│ 4 │ +│ 5 │ +│ 6 │ +│ 7 │ +│ 8 │ +│ 9 │ +└────────┘ + +10 rows in set. Elapsed: 0.001 sec. +``` + +[原文](https://clickhouse.com/docs/zh/operations/system-tables/numbers) diff --git a/docs/zh/operations/system-tables/one.md b/docs/zh/operations/system-tables/one.md index a8dc64c18c7..79d2c0199d8 100644 --- a/docs/zh/operations/system-tables/one.md +++ b/docs/zh/operations/system-tables/one.md @@ -1,12 +1,23 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.one {#system-one} -# 系统。一 {#system-one} +此表包含一行只有一个值为 0 的 `dummy` UInt8 列的数据。 -此表包含一行,其中包含一行 `dummy` UInt8列包含值0。 +如果 `SELECT` 查询没有指定 `FROM` 子句,就会使用这个表来查询。 -如果使用此表 `SELECT` 查询不指定 `FROM` 条款 +这个表类似于其他数据库管理系统(DMBS)中的 `DUAL` 表。 -这类似于 `DUAL` 表在其他Dbms中找到。 +**示例** + +```sql +:) SELECT * FROM system.one LIMIT 10; +``` + +```text +┌─dummy─┐ +│ 0 │ +└───────┘ + +1 rows in set. Elapsed: 0.001 sec. +``` + +[原文](https://clickhouse.com/docs/zh/operations/system-tables/one) diff --git a/docs/zh/operations/system-tables/parts.md b/docs/zh/operations/system-tables/parts.md index e924ee27df3..dc98288305f 100644 --- a/docs/zh/operations/system-tables/parts.md +++ b/docs/zh/operations/system-tables/parts.md @@ -1,85 +1,167 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.parts {#system_tables-parts} -# 系统。零件 {#system_tables-parts} +此系统表包含 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 表分区的相关信息。 -包含有关的部分信息 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 桌子 - -每行描述一个数据部分。 +每一行描述一个数据分区。 列: -- `partition` (String) – The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter.md#query_language_queries_alter) 查询。 +- `partition` ([String](../../sql-reference/data-types/string.md)) – 分区名称。请参阅 [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) 查询的说明,来了解什么是分区。 格式: - `YYYYMM` 用于按月自动分区。 - - `any_string` 手动分区时。 + - `any_string` 手动分区时,是其他格式的字符串。 -- `name` (`String`) – Name of the data part. +- `name` ([String](../../sql-reference/data-types/string.md)) – 数据分区的名称。 -- `active` (`UInt8`) – Flag that indicates whether the data part is active. If a data part is active, it's used in a table. Otherwise, it's deleted. Inactive data parts remain after merging. +- `part_type` ([String](../../sql-reference/data-types/string.md)) — 数据分区的存储格式。 -- `marks` (`UInt64`) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` 通过索引粒度(通常为8192)(此提示不适用于自适应粒度)。 + 可能的值: -- `rows` (`UInt64`) – The number of rows. + - `Wide` — 每一列在文件系统中的一个单独文件中存储。 + - `Compact` — 所有列在文件系统中的一个文件中存储。 -- `bytes_on_disk` (`UInt64`) – Total size of all the data part files in bytes. + 数据存储格式由 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) 表的 `min_bytes_for_wide_part` 和 `min_rows_for_wide_part` 控制。 -- `data_compressed_bytes` (`UInt64`) – Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. + - `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) – 指示数据分区是否处于活动状态的标志。如果数据分区处于活动状态,则此数据正在被表使用。反之,则不活跃(deleted)。合并后仍会保留非活跃的数据分区。 -- `data_uncompressed_bytes` (`UInt64`) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. +- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 标记数。要获得数据分区中的大致行数:使用`marks`(标记数)乘以索引粒度(通常为 8192)。不适用于自适应颗粒度。 -- `marks_bytes` (`UInt64`) – The size of the file with marks. +- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 行数. -- `modification_time` (`DateTime`) – The time the directory with the data part was modified. This usually corresponds to the time of data part creation.\| +- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 数据总大小(以字节为单位)。 -- `remove_time` (`DateTime`) – The time when the data part became inactive. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 数据分区中压缩数据的总大小。不包括所有辅助文件(例如,带有标记的文件)。 -- `refcount` (`UInt32`) – The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 数据分区中未压缩数据的总大小。不包括所有辅助文件(例如,带有标记的文件)。 -- `min_date` (`Date`) – The minimum value of the date key in the data part. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 带有标记的文件的大小。 -- `max_date` (`Date`) – The maximum value of the date key in the data part. +- `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 数据分区中二级索引的压缩数据总大小。所有的辅助文件(例如,带有标记的文件)都不包括在内。 -- `min_time` (`DateTime`) – The minimum value of the date and time key in the data part. +- `secondary_indices_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 数据分区中二级索引的未压缩数据的总大小。所有的辅助文件(例如,带有标记的文件)都不包括在内。 -- `max_time`(`DateTime`) – The maximum value of the date and time key in the data part. +- `secondary_indices_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 带标记的二级索引的文件大小。 -- `partition_id` (`String`) – ID of the partition. +- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – 包含数据分区的目录被修改的时间。这通常对应于数据部分创建的时间。 -- `min_block_number` (`UInt64`) – The minimum number of data parts that make up the current part after merging. +- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – 数据分区变为非活动状态的时间。 -- `max_block_number` (`UInt64`) – The maximum number of data parts that make up the current part after merging. +- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) – 使用数据部分的位置数。大于 2 的值表示数据部分用于查询或是用于合并。 -- `level` (`UInt32`) – Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts. +- `min_date` ([Date](../../sql-reference/data-types/date.md)) – 数据部分中日期键的最小值。 -- `data_version` (`UInt64`) – Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`). +- `max_date` ([Date](../../sql-reference/data-types/date.md)) – 数据部分中日期键的最大值。 -- `primary_key_bytes_in_memory` (`UInt64`) – The amount of memory (in bytes) used by primary key values. +- `min_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – 数据部分中日期和时间键的最小值。 -- `primary_key_bytes_in_memory_allocated` (`UInt64`) – The amount of memory (in bytes) reserved for primary key values. +- `max_time`([DateTime](../../sql-reference/data-types/datetime.md)) – 数据部分中日期和时间键的最大值。 -- `is_frozen` (`UInt8`) – Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup doesn't exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter.md#alter_freeze-partition) +- `partition_id` ([String](../../sql-reference/data-types/string.md)) – 分区的 ID。 -- `database` (`String`) – Name of the database. +- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 合并后构成当前部分的最小数据部分数量。 -- `table` (`String`) – Name of the table. +- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 合并后构成当前部分的最大数据部分数量。 -- `engine` (`String`) – Name of the table engine without parameters. +- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) – 合并树的深度。值为 0 表示该分区是通过插入创建的,而不是通过合并创建的。 -- `path` (`String`) – Absolute path to the folder with data part files. +- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 用于确定应将哪些订正(mutations)应用于数据部分(版本高于 `data_version` 的订正(mutations))的数字。 -- `disk` (`String`) – Name of a disk that stores the data part. +- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 主键值使用的内存量(以字节为单位)。 -- `hash_of_all_files` (`String`) – [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 的压缩文件。 +- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) – 为主键值保留的内存量(以字节为单位)。 -- `hash_of_uncompressed_files` (`String`) – [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 未压缩的文件(带标记的文件,索引文件等。). +- `is_frozen` ([UInt8](../../sql-reference/data-types/int-uint.md)) – 显示分区数据备份存在的标志。1,备份存在。0,备份不存在。更多细节,见 [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md#alter_freeze-partition)。 -- `uncompressed_hash_of_compressed_files` (`String`) – [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) 压缩文件中的数据,就好像它们是未压缩的。 +- `database` ([String](../../sql-reference/data-types/string.md)) – 数据库的名称。 -- `bytes` (`UInt64`) – Alias for `bytes_on_disk`. +- `table` ([String](../../sql-reference/data-types/string.md)) – 表的名称。 -- `marks_size` (`UInt64`) – Alias for `marks_bytes`. +- `engine` ([String](../../sql-reference/data-types/string.md)) – 不带参数的表引擎名称。 + +- `path` ([String](../../sql-reference/data-types/string.md)) – 包含数据部分文件的文件夹的绝对路径。 + +- `disk` ([String](../../sql-reference/data-types/string.md)) – 存储数据部分的磁盘的名称。 + +- `hash_of_all_files` ([String](../../sql-reference/data-types/string.md)) – 压缩文件的 [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128)。 + +- `hash_of_uncompressed_files` ([String](../../sql-reference/data-types/string.md)) – 未压缩文件(带有标记的文件、索引文件等)的 [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128)。 + +- `uncompressed_hash_of_compressed_files` ([String](../../sql-reference/data-types/string.md)) – 压缩文件中的数据(没有压缩时)的 [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128)。 + +- `delete_ttl_info_min` ([DateTime](../../sql-reference/data-types/datetime.md)) — [TTL DELETE 规则](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的日期和时间键的最小值。 + +- `delete_ttl_info_max` ([DateTime](../../sql-reference/data-types/datetime.md)) — [TTL DELETE 规则](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的日期和时间键的最大值。 + +- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 表达式的数组。 每个表达式定义一个 [TTL MOVE 规则](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). + + !!! note "警告" + 保留 `move_ttl_info.expression` 数组主要是为了向后兼容,现在检查 `TTL MOVE` 规则最简单的方法是使用 `move_ttl_info.min` 和 `move_ttl_info.max` 字段。 + +- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — 日期值和时间值的数组。数组中的每个元素都描述了一个 [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的最小键值。 + +- `move_ttl_info.max` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — 日期值和时间值的数组。数组中的每个元素都描述了一个 [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) 的最大键值。 + +- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – `bytes_on_disk`的别名。 + +- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) – `marks_bytes`的别名。 + +**示例** + +``` sql +SELECT * FROM system.parts LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +partition: tuple() +name: all_1_4_1_6 +part_type: Wide +active: 1 +marks: 2 +rows: 6 +bytes_on_disk: 310 +data_compressed_bytes: 157 +data_uncompressed_bytes: 91 +secondary_indices_compressed_bytes: 58 +secondary_indices_uncompressed_bytes: 6 +secondary_indices_marks_bytes: 48 +marks_bytes: 144 +modification_time: 2020-06-18 13:01:49 +remove_time: 1970-01-01 00:00:00 +refcount: 1 +min_date: 1970-01-01 +max_date: 1970-01-01 +min_time: 1970-01-01 00:00:00 +max_time: 1970-01-01 00:00:00 +partition_id: all +min_block_number: 1 +max_block_number: 4 +level: 1 +data_version: 6 +primary_key_bytes_in_memory: 8 +primary_key_bytes_in_memory_allocated: 64 +is_frozen: 0 +database: default +table: months +engine: MergeTree +disk_name: default +path: /var/lib/clickhouse/data/default/months/all_1_4_1_6/ +hash_of_all_files: 2d0657a16d9430824d35e327fcbd87bf +hash_of_uncompressed_files: 84950cc30ba867c77a408ae21332ba29 +uncompressed_hash_of_compressed_files: 1ad78f1c6843bbfb99a2c931abe7df7d +delete_ttl_info_min: 1970-01-01 00:00:00 +delete_ttl_info_max: 1970-01-01 00:00:00 +move_ttl_info.expression: [] +move_ttl_info.min: [] +move_ttl_info.max: [] +``` + +**另请参阅** + +- [MergeTree(合并树)家族](../../engines/table-engines/mergetree-family/mergetree.md) +- [列和表的 TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) + +[原文](https://clickhouse.com/docs/zh/operations/system-tables/parts) diff --git a/docs/zh/operations/system-tables/settings.md b/docs/zh/operations/system-tables/settings.md index c717c8c9562..144eb0179c4 100644 --- a/docs/zh/operations/system-tables/settings.md +++ b/docs/zh/operations/system-tables/settings.md @@ -1,27 +1,22 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.settings {#system-tables-system-settings} -# 系统。设置 {#system-tables-system-settings} - -包含有关当前用户的会话设置的信息。 +包含当前用户会话设置的相关信息。 列: -- `name` ([字符串](../../sql-reference/data-types/string.md)) — Setting name. -- `value` ([字符串](../../sql-reference/data-types/string.md)) — Setting value. -- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting is changed from its default value. -- `description` ([字符串](../../sql-reference/data-types/string.md)) — Short setting description. -- `min` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [制约因素](../../operations/settings/constraints-on-settings.md#constraints-on-settings). 如果设置没有最小值,则包含 [NULL](../../sql-reference/syntax.md#null-literal). -- `max` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [制约因素](../../operations/settings/constraints-on-settings.md#constraints-on-settings). 如果设置没有最大值,则包含 [NULL](../../sql-reference/syntax.md#null-literal). -- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting: - - `0` — Current user can change the setting. - - `1` — Current user can't change the setting. +- `name` ([字符串](../../sql-reference/data-types/string.md)) — 设置名称。 +- `value` ([字符串](../../sql-reference/data-types/string.md)) — 设置的值。 +- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 显示该设置是否从其默认值修改。 +- `description` ([字符串](../../sql-reference/data-types/string.md)) — 该设置的简要描述。 +- `min` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — 该设置的最小值,如果有最小值,则是通过[约束](../../operations/settings/constraints-on-settings.md#constraints-on-settings)设置的。如果该设置没有最小值,则包含 [NULL](../../sql-reference/syntax.md#null-literal). +- `max` ([可为空](../../sql-reference/data-types/nullable.md)([字符串](../../sql-reference/data-types/string.md))) — 该设置的最大值, 如果有最大值,则是通过[约束](../../operations/settings/constraints-on-settings.md#constraints-on-settings)设置的。如果该设置没有最大值,则包含 [NULL](../../sql-reference/syntax.md#null-literal). +- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 当前用户是否可以修改该设置: + - `0` — 当前用户可以修改此设置. + - `1` — 当前用户不能修改此设置. **示例** -下面的示例演示如何获取有关名称包含的设置的信息 `min_i`. +下面的例子显示了如何获得设置名称中包含`min_i`的设置信息。 ``` sql SELECT * @@ -37,10 +32,10 @@ WHERE name LIKE '%min_i%' └─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘ ``` -使用 `WHERE changed` 可以是有用的,例如,当你想检查: +比如,当你想要检查以下情况时,使用 `WHERE changed` 会很有用: -- 配置文件中的设置是否正确加载并正在使用。 -- 在当前会话中更改的设置。 +- 配置文件中的设置是否正确加载,并正在使用。 +- 在当前会话中更改过的设置。 @@ -52,4 +47,6 @@ SELECT * FROM system.settings WHERE changed AND name='load_balancing' - [设置](../../operations/settings/index.md#session-settings-intro) - [查询权限](../../operations/settings/permissions-for-queries.md#settings_readonly) -- [对设置的限制](../../operations/settings/constraints-on-settings.md) +- [对设置的约束](../../operations/settings/constraints-on-settings.md) + +[原文](https://clickhouse.com/docs/zh/operations/system-tables/settings) diff --git a/docs/zh/operations/system-tables/storage_policies.md b/docs/zh/operations/system-tables/storage_policies.md index 550af6b2b27..e2531649493 100644 --- a/docs/zh/operations/system-tables/storage_policies.md +++ b/docs/zh/operations/system-tables/storage_policies.md @@ -1,19 +1,17 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.storage_policies {#system_tables-storage_policies} -# 系统。storage_policies {#system_tables-storage_policies} - -包含有关存储策略和卷中定义的信息 [服务器配置](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure). +包含有关 [服务器配置](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes_configure) 中定义的存储策略和卷信息。 列: -- `policy_name` ([字符串](../../sql-reference/data-types/string.md)) — Name of the storage policy. -- `volume_name` ([字符串](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy. -- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration. -- `disks` ([数组(字符串)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy. -- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). -- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. +- `policy_name` ([String](../../sql-reference/data-types/string.md)) — 存储策略的名称。 +- `volume_name` ([String](../../sql-reference/data-types/string.md)) — 存储策略中定义的卷名称。 +- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 配置中的卷顺序号,数据根据这个优先级填充卷,比如插入和合并期间的数据将被写入优先级较低的卷 (还需考虑其他规则: TTL, `max_data_part_size`, `move_factor`)。 +- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — 存储策略中定义的磁盘名。 +- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 可以存储在卷磁盘上数据部分的最大大小 (0 - 不限制)。 +- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — 磁盘空闲的比率。当比率超过配置的值,ClickHouse 将把数据向下一个卷移动。 +- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 设置中 `prefer_not_to_merge` 的值. 当这个设置启用时,不允许在此卷上合并数据。这将允许控制 ClickHouse 如何与运行速度较慢的磁盘一起工作。 -如果存储策略包含多个卷,则每个卷的信息将存储在表的单独行中。 +如果存储策略包含多个卷,则每个卷的信息将在表中作为单独一行存储。 + +[原文](https://clickhouse.com/docs/zh/operations/system-tables/storage_policies) diff --git a/docs/zh/operations/system-tables/tables.md b/docs/zh/operations/system-tables/tables.md index 6a719a92ca0..03ea9f93d26 100644 --- a/docs/zh/operations/system-tables/tables.md +++ b/docs/zh/operations/system-tables/tables.md @@ -1,58 +1,128 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.tables {#system-tables} -# 系统。表 {#system-tables} +包含服务器知道的每个表的元数据。 [分离的](../../sql-reference/statements/detach.md)表不在 `system.tables` 显示。 -包含服务器知道的每个表的元数据。 分离的表不显示在 `system.tables`。 +[临时表](../../sql-reference/statements/create/table.md#temporary-tables)只在创建它们的会话中的 `system.tables` 中才可见。它们的数据库字段显示为空,并且 `is_temporary` 标志显示为开启。 -此表包含以下列(列类型显示在括号中): +此表包含以下列 (列类型显示在括号中): -- `database` (String) — 表所在的数据库表名。 +- `database` ([String](../../sql-reference/data-types/string.md)) — 表所在的数据库名。 -- `name` (String) — 表名。 +- `name` ([String](../../sql-reference/data-types/string.md)) — 表名。 -- `engine` (String) — 表引擎名 (不包含参数)。 +- `engine` ([String](../../sql-reference/data-types/string.md)) — 表引擎名 (不包含参数)。 -- `is_temporary` (UInt8)-指示表是否是临时的标志。 +- `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - 指示表是否是临时的标志。 -- `data_path` (String)-文件系统中表数据的路径。 +- `data_path` ([String](../../sql-reference/data-types/string.md)) - 表数据在文件系统中的路径。 -- `metadata_path` (String)-文件系统中表元数据的路径。 +- `metadata_path` ([String](../../sql-reference/data-types/string.md)) - 表元数据在文件系统中的路径。 -- `metadata_modification_time` (DateTime)-表元数据的最新修改时间。 +- `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - 表元数据的最新修改时间。 -- `dependencies_database` (数组(字符串))-数据库依赖关系。 +- `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - 数据库依赖关系。 -- `dependencies_table` (数组(字符串))-表依赖关系 ([MaterializedView](../../engines/table-engines/special/materializedview.md) 基于当前表的表)。 +- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - 表依赖关系 (基于当前表的 [物化视图](../../engines/table-engines/special/materializedview.md) 表) 。 -- `create_table_query` (String)-用于创建表的SQL语句。 +- `create_table_query` ([String](../../sql-reference/data-types/string.md)) - 用于创建表的 SQL 语句。 -- `engine_full` (String)-表引擎的参数。 +- `engine_full` ([String](../../sql-reference/data-types/string.md)) - 表引擎的参数。 -- `partition_key` (String)-表中指定的分区键表达式。 +- `as_select` ([String](../../sql-reference/data-types/string.md)) - 视图的 `SELECT` 语句。 -- `sorting_key` (String)-表中指定的排序键表达式。 +- `partition_key` ([String](../../sql-reference/data-types/string.md)) - 表中指定的分区键表达式。 -- `primary_key` (String)-表中指定的主键表达式。 +- `sorting_key` ([String](../../sql-reference/data-types/string.md)) - 表中指定的排序键表达式。 -- `sampling_key` (String)-表中指定的采样键表达式。 +- `primary_key` ([String](../../sql-reference/data-types/string.md)) - 表中指定的主键表达式。 -- `storage_policy` (字符串)-存储策略: +- `sampling_key` ([String](../../sql-reference/data-types/string.md)) - 表中指定的采样键表达式。 + +- `storage_policy` ([String](../../sql-reference/data-types/string.md)) - 存储策略: - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) - - [分布](../../engines/table-engines/special/distributed.md#distributed) + - [Distributed](../../engines/table-engines/special/distributed.md#distributed) -- `total_rows` (Nullable(UInt64))-总行数,如果可以快速确定表中的确切行数,否则行数为`Null`(包括底层 `Buffer` 表)。 +- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - 总行数,如果无法快速确定表中的确切行数,则行数返回为 `NULL` (包括底层 `Buffer` 表) 。 -- `total_bytes` (Nullable(UInt64))-总字节数,如果可以快速确定存储表的确切字节数,否则字节数为`Null` (即**不** 包括任何底层存储)。 +- `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - 总字节数,如果无法快速确定存储表的确切字节数,则字节数返回为 `NULL` ( **不** 包括任何底层存储) 。 - - 如果表将数据存在磁盘上,返回实际使用的磁盘空间(压缩后)。 + - 如果表将数据存在磁盘上,返回实际使用的磁盘空间 (压缩后) 。 - 如果表在内存中存储数据,返回在内存中使用的近似字节数。 -- `lifetime_rows` (Nullbale(UInt64))-服务启动后插入的总行数(只针对`Buffer`表)。 +- `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - 服务启动后插入的总行数(只针对 `Buffer` 表) 。 + + +- `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - 服务启动后插入的总字节数(只针对 `Buffer` 表) 。 + + +- `comment` ([String](../../sql-reference/data-types/string.md)) - 表的注释。 + +- `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — 标志,表示表本身是否在磁盘上存储数据,或者访问其他来源。 `system.tables` 表被用于 `SHOW TABLES` 的查询实现中。 +**示例** + +```sql +SELECT * FROM system.tables LIMIT 2 FORMAT Vertical; +``` + +```text +Row 1: +────── +database: base +name: t1 +uuid: 81b1c20a-b7c6-4116-a2ce-7583fb6b6736 +engine: MergeTree +is_temporary: 0 +data_paths: ['/var/lib/clickhouse/store/81b/81b1c20a-b7c6-4116-a2ce-7583fb6b6736/'] +metadata_path: /var/lib/clickhouse/store/461/461cf698-fd0b-406d-8c01-5d8fd5748a91/t1.sql +metadata_modification_time: 2021-01-25 19:14:32 +dependencies_database: [] +dependencies_table: [] +create_table_query: CREATE TABLE base.t1 (`n` UInt64) ENGINE = MergeTree ORDER BY n SETTINGS index_granularity = 8192 +engine_full: MergeTree ORDER BY n SETTINGS index_granularity = 8192 +as_select: SELECT database AS table_catalog +partition_key: +sorting_key: n +primary_key: n +sampling_key: +storage_policy: default +total_rows: 1 +total_bytes: 99 +lifetime_rows: ᴺᵁᴸᴸ +lifetime_bytes: ᴺᵁᴸᴸ +comment: +has_own_data: 0 + +Row 2: +────── +database: default +name: 53r93yleapyears +uuid: 00000000-0000-0000-0000-000000000000 +engine: MergeTree +is_temporary: 0 +data_paths: ['/var/lib/clickhouse/data/default/53r93yleapyears/'] +metadata_path: /var/lib/clickhouse/metadata/default/53r93yleapyears.sql +metadata_modification_time: 2020-09-23 09:05:36 +dependencies_database: [] +dependencies_table: [] +create_table_query: CREATE TABLE default.`53r93yleapyears` (`id` Int8, `febdays` Int8) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192 +engine_full: MergeTree ORDER BY id SETTINGS index_granularity = 8192 +as_select: SELECT name AS catalog_name +partition_key: +sorting_key: id +primary_key: id +sampling_key: +storage_policy: default +total_rows: 2 +total_bytes: 155 +lifetime_rows: ᴺᵁᴸᴸ +lifetime_bytes: ᴺᵁᴸᴸ +comment: +has_own_data: 0 +``` + + [原文](https://clickhouse.com/docs/zh/operations/system-tables/tables) diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index 536bb37199d..4d491a06795 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -46,7 +46,7 @@ void ClusterCopier::init() reloadTaskDescription(); task_cluster->loadTasks(*task_cluster_current_config); - getContext()->setClustersConfig(task_cluster_current_config, task_cluster->clusters_prefix); + getContext()->setClustersConfig(task_cluster_current_config, false, task_cluster->clusters_prefix); /// Set up shards and their priority task_cluster->random_engine.seed(task_cluster->random_device()); diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index a017355cfdd..706e273e2b4 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index afd6a36ea15..d144b4d332e 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -379,11 +380,11 @@ int Keeper::main(const std::vector & /*args*/) socket.setReceiveTimeout(settings.receive_timeout); socket.setSendTimeout(settings.send_timeout); servers->emplace_back( + listen_host, port_name, - std::make_unique( - new KeeperTCPHandlerFactory(*this, false), server_pool, socket, new Poco::Net::TCPServerParams)); - - LOG_INFO(log, "Listening for connections to Keeper (tcp): {}", address.toString()); + "Keeper (tcp): " + address.toString(), + std::make_unique( + new KeeperTCPHandlerFactory(*this, false), server_pool, socket)); }); const char * secure_port_name = "keeper_server.tcp_port_secure"; @@ -395,10 +396,11 @@ int Keeper::main(const std::vector & /*args*/) socket.setReceiveTimeout(settings.receive_timeout); socket.setSendTimeout(settings.send_timeout); servers->emplace_back( + listen_host, secure_port_name, - std::make_unique( - new KeeperTCPHandlerFactory(*this, true), server_pool, socket, new Poco::Net::TCPServerParams)); - LOG_INFO(log, "Listening for connections to Keeper with secure protocol (tcp_secure): {}", address.toString()); + "Keeper with secure protocol (tcp_secure): " + address.toString(), + std::make_unique( + new KeeperTCPHandlerFactory(*this, true), server_pool, socket)); #else UNUSED(port); throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", @@ -408,7 +410,10 @@ int Keeper::main(const std::vector & /*args*/) } for (auto & server : *servers) + { server.start(); + LOG_INFO(log, "Listening for {}", server.getDescription()); + } zkutil::EventPtr unused_event = std::make_shared(); zkutil::ZooKeeperNodeCache unused_cache([] { return nullptr; }); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 8da8641bcdc..53e295b7fbb 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -792,9 +792,9 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp int mainEntryClickHouseLocal(int argc, char ** argv) { - DB::LocalServer app; try { + DB::LocalServer app; app.init(argc, argv); return app.run(); } diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index caccc726923..947e7ab1768 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.cpp b/programs/odbc-bridge/ODBCBlockOutputStream.cpp index 710614130c3..1c28da2a072 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp @@ -1,13 +1,8 @@ #include "ODBCBlockOutputStream.h" -#include #include -#include -#include -#include -#include "getIdentifierQuote.h" -#include -#include +#include +#include #include #include @@ -45,7 +40,7 @@ void ODBCSink::consume(Chunk chunk) std::string query = getInsertQuery(db_name, table_name, block.getColumnsWithTypeAndName(), quoting) + values_buf.str(); execute(connection_holder, - [&](nanodbc::connection & connection) { execute(connection, query); }); + [&](nanodbc::connection & connection) { execute(connection, query); }); } } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 59e6e7adb91..adc6c911031 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -71,6 +72,7 @@ #include "MetricsTransmitter.h" #include #include +#include #include #include #include @@ -128,6 +130,11 @@ namespace CurrentMetrics extern const Metric MaxPushedDDLEntryID; } +namespace ProfileEvents +{ + extern const Event MainConfigLoads; +} + namespace fs = std::filesystem; #if USE_JEMALLOC @@ -345,16 +352,53 @@ Poco::Net::SocketAddress Server::socketBindListen(Poco::Net::ServerSocket & sock return address; } -void Server::createServer(const std::string & listen_host, const char * port_name, bool listen_try, CreateServerFunc && func) const +std::vector getListenHosts(const Poco::Util::AbstractConfiguration & config) +{ + auto listen_hosts = DB::getMultipleValuesFromConfig(config, "", "listen_host"); + if (listen_hosts.empty()) + { + listen_hosts.emplace_back("::1"); + listen_hosts.emplace_back("127.0.0.1"); + } + return listen_hosts; +} + +bool getListenTry(const Poco::Util::AbstractConfiguration & config) +{ + bool listen_try = config.getBool("listen_try", false); + if (!listen_try) + listen_try = DB::getMultipleValuesFromConfig(config, "", "listen_host").empty(); + return listen_try; +} + + +void Server::createServer( + Poco::Util::AbstractConfiguration & config, + const std::string & listen_host, + const char * port_name, + bool listen_try, + bool start_server, + std::vector & servers, + CreateServerFunc && func) const { /// For testing purposes, user may omit tcp_port or http_port or https_port in configuration file. - if (!config().has(port_name)) + if (config.getString(port_name, "").empty()) return; - auto port = config().getInt(port_name); + /// If we already have an active server for this listen_host/port_name, don't create it again + for (const auto & server : servers) + if (!server.isStopping() && server.getListenHost() == listen_host && server.getPortName() == port_name) + return; + + auto port = config.getInt(port_name); try { - func(port); + servers.push_back(func(port)); + if (start_server) + { + servers.back().start(); + LOG_INFO(&logger(), "Listening for {}", servers.back().getDescription()); + } global_context->registerServerPort(port_name, port); } catch (const Poco::Exception &) @@ -516,6 +560,27 @@ if (ThreadFuzzer::instance().isEffective()) config().getUInt("thread_pool_queue_size", 10000) ); + Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024)); + std::mutex servers_lock; + std::vector servers; + std::vector servers_to_start_before_tables; + /// This object will periodically calculate some metrics. + AsynchronousMetrics async_metrics( + global_context, config().getUInt("asynchronous_metrics_update_period_s", 1), + [&]() -> std::vector + { + std::vector metrics; + metrics.reserve(servers_to_start_before_tables.size()); + for (const auto & server : servers_to_start_before_tables) + metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); + + std::lock_guard lock(servers_lock); + for (const auto & server : servers) + metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); + return metrics; + } + ); + ConnectionCollector::init(global_context, config().getUInt("max_threads_for_connection_collector", 10)); bool has_zookeeper = config().has("zookeeper"); @@ -847,7 +912,7 @@ if (ThreadFuzzer::instance().isEffective()) // in a lot of places. For now, disable updating log configuration without server restart. //setTextLog(global_context->getTextLog()); updateLevels(*config, logger()); - global_context->setClustersConfig(config); + global_context->setClustersConfig(config, has_zookeeper); global_context->setMacros(std::make_unique(*config, "macros", log)); global_context->setExternalAuthenticatorsConfig(*config); @@ -876,12 +941,17 @@ if (ThreadFuzzer::instance().isEffective()) global_context->reloadZooKeeperIfChanged(config); global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config); + + std::lock_guard lock(servers_lock); + updateServers(*config, server_pool, async_metrics, servers); } global_context->updateStorageConfiguration(*config); global_context->updateInterserverCredentials(*config); CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs"); + + ProfileEvents::increment(ProfileEvents::MainConfigLoads); }, /* already_loaded = */ false); /// Reload it right now (initial loading) @@ -993,24 +1063,8 @@ if (ThreadFuzzer::instance().isEffective()) /// try set up encryption. There are some errors in config, error will be printed and server wouldn't start. CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs"); - Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0); - - Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024)); - Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; - http_params->setTimeout(settings.http_receive_timeout); - http_params->setKeepAliveTimeout(keep_alive_timeout); - - auto servers_to_start_before_tables = std::make_shared>(); - - std::vector listen_hosts = DB::getMultipleValuesFromConfig(config(), "", "listen_host"); - - bool listen_try = config().getBool("listen_try", false); - if (listen_hosts.empty()) - { - listen_hosts.emplace_back("::1"); - listen_hosts.emplace_back("127.0.0.1"); - listen_try = true; - } + const auto listen_hosts = getListenHosts(config()); + const auto listen_try = getListenTry(config()); if (config().has("keeper_server")) { @@ -1033,39 +1087,46 @@ if (ThreadFuzzer::instance().isEffective()) { /// TCP Keeper const char * port_name = "keeper_server.tcp_port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - servers_to_start_before_tables->emplace_back( - port_name, - std::make_unique( - new KeeperTCPHandlerFactory(*this, false), server_pool, socket, new Poco::Net::TCPServerParams)); - - LOG_INFO(log, "Listening for connections to Keeper (tcp): {}", address.toString()); - }); + createServer( + config(), listen_host, port_name, listen_try, /* start_server: */ false, + servers_to_start_before_tables, + [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "Keeper (tcp): " + address.toString(), + std::make_unique( + new KeeperTCPHandlerFactory(*this, false), server_pool, socket)); + }); const char * secure_port_name = "keeper_server.tcp_port_secure"; - createServer(listen_host, secure_port_name, listen_try, [&](UInt16 port) - { + createServer( + config(), listen_host, secure_port_name, listen_try, /* start_server: */ false, + servers_to_start_before_tables, + [&](UInt16 port) -> ProtocolServerAdapter + { #if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - servers_to_start_before_tables->emplace_back( - secure_port_name, - std::make_unique( - new KeeperTCPHandlerFactory(*this, true), server_pool, socket, new Poco::Net::TCPServerParams)); - LOG_INFO(log, "Listening for connections to Keeper with secure protocol (tcp_secure): {}", address.toString()); + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + secure_port_name, + "Keeper with secure protocol (tcp_secure): " + address.toString(), + std::make_unique( + new KeeperTCPHandlerFactory(*this, true), server_pool, socket)); #else - UNUSED(port); - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + UNUSED(port); + throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; #endif - }); + }); } #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); @@ -1073,14 +1134,19 @@ if (ThreadFuzzer::instance().isEffective()) } - for (auto & server : *servers_to_start_before_tables) + for (auto & server : servers_to_start_before_tables) + { server.start(); + LOG_INFO(log, "Listening for {}", server.getDescription()); + } SCOPE_EXIT({ /// Stop reloading of the main config. This must be done before `global_context->shutdown()` because /// otherwise the reloading may pass a changed config to some destroyed parts of ContextSharedPart. main_config_reloader.reset(); + async_metrics.stop(); + /** Ask to cancel background jobs all table engines, * and also query_log. * It is important to do early, not in destructor of Context, because @@ -1092,11 +1158,11 @@ if (ThreadFuzzer::instance().isEffective()) LOG_DEBUG(log, "Shut down storages."); - if (!servers_to_start_before_tables->empty()) + if (!servers_to_start_before_tables.empty()) { LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); int current_connections = 0; - for (auto & server : *servers_to_start_before_tables) + for (auto & server : servers_to_start_before_tables) { server.stop(); current_connections += server.currentConnections(); @@ -1108,7 +1174,7 @@ if (ThreadFuzzer::instance().isEffective()) LOG_INFO(log, "Closed all listening sockets."); if (current_connections > 0) - current_connections = waitServersToFinish(*servers_to_start_before_tables, config().getInt("shutdown_wait_unfinished", 5)); + current_connections = waitServersToFinish(servers_to_start_before_tables, config().getInt("shutdown_wait_unfinished", 5)); if (current_connections) LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections); @@ -1262,223 +1328,18 @@ if (ThreadFuzzer::instance().isEffective()) LOG_INFO(log, "TaskStats is not implemented for this OS. IO accounting will be disabled."); #endif - auto servers = std::make_shared>(); { - /// This object will periodically calculate some metrics. - AsynchronousMetrics async_metrics( - global_context, config().getUInt("asynchronous_metrics_update_period_s", 1), servers_to_start_before_tables, servers); attachSystemTablesAsync(global_context, *DatabaseCatalog::instance().getSystemDatabase(), async_metrics); - for (const auto & listen_host : listen_hosts) { - /// HTTP - const char * port_name = "http_port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - - servers->emplace_back( - port_name, - std::make_unique( - context(), createHandlerFactory(*this, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); - - LOG_INFO(log, "Listening for http://{}", address.toString()); - }); - - /// HTTPS - port_name = "https_port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) - { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back( - port_name, - std::make_unique( - context(), createHandlerFactory(*this, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); - - LOG_INFO(log, "Listening for https://{}", address.toString()); -#else - UNUSED(port); - throw Exception{"HTTPS protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; -#endif - }); - - /// TCP - port_name = "tcp_port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - servers->emplace_back(port_name, std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - - LOG_INFO(log, "Listening for connections with native protocol (tcp): {}", address.toString()); - }); - - /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt - port_name = "tcp_with_proxy_port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - servers->emplace_back(port_name, std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - - LOG_INFO(log, "Listening for connections with native protocol (tcp) with PROXY: {}", address.toString()); - }); - - /// TCP with SSL - port_name = "tcp_port_secure"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) - { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - servers->emplace_back(port_name, std::make_unique( - new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - LOG_INFO(log, "Listening for connections with secure native protocol (tcp_secure): {}", address.toString()); -#else - UNUSED(port); - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; -#endif - }); - - /// Interserver IO HTTP - port_name = "interserver_http_port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back( - port_name, - std::make_unique( - context(), - createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"), - server_pool, - socket, - http_params)); - - LOG_INFO(log, "Listening for replica communication (interserver): http://{}", address.toString()); - }); - - port_name = "interserver_https_port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) - { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back( - port_name, - std::make_unique( - context(), - createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"), - server_pool, - socket, - http_params)); - - LOG_INFO(log, "Listening for secure replica communication (interserver): https://{}", address.toString()); -#else - UNUSED(port); - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; -#endif - }); - - port_name = "mysql_port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - servers->emplace_back(port_name, std::make_unique( - new MySQLHandlerFactory(*this), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - - LOG_INFO(log, "Listening for MySQL compatibility protocol: {}", address.toString()); - }); - - port_name = "postgresql_port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - servers->emplace_back(port_name, std::make_unique( - new PostgreSQLHandlerFactory(*this), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - - LOG_INFO(log, "Listening for PostgreSQL compatibility protocol: " + address.toString()); - }); - -#if USE_GRPC - port_name = "grpc_port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) - { - Poco::Net::SocketAddress server_address(listen_host, port); - servers->emplace_back(port_name, std::make_unique(*this, makeSocketAddress(listen_host, port, log))); - LOG_INFO(log, "Listening for gRPC protocol: " + server_address.toString()); - }); -#endif - - /// Prometheus (if defined and not setup yet with http_port) - port_name = "prometheus.port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back( - port_name, - std::make_unique( - context(), - createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory"), - server_pool, - socket, - http_params)); - - LOG_INFO(log, "Listening for Prometheus: http://{}", address.toString()); - }); + std::lock_guard lock(servers_lock); + createServers(config(), listen_hosts, listen_try, server_pool, async_metrics, servers); + if (servers.empty()) + throw Exception( + "No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)", + ErrorCodes::NO_ELEMENTS_IN_CONFIG); } - if (servers->empty()) - throw Exception("No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)", - ErrorCodes::NO_ELEMENTS_IN_CONFIG); - - /// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread. async_metrics.start(); { @@ -1557,9 +1418,24 @@ if (ThreadFuzzer::instance().isEffective()) &CurrentMetrics::MaxDDLEntryID, &CurrentMetrics::MaxPushedDDLEntryID)); } - for (auto & server : *servers) - server.start(); - LOG_INFO(log, "Ready for connections."); + { + std::lock_guard lock(servers_lock); + for (auto & server : servers) + { + server.start(); + LOG_INFO(log, "Listening for {}", server.getDescription()); + } + LOG_INFO(log, "Ready for connections."); + } + + try + { + global_context->startClusterDiscovery(); + } + catch (...) + { + tryLogCurrentException(log, "Caught exception while starting cluster discovery"); + } SCOPE_EXIT_SAFE({ LOG_DEBUG(log, "Received termination signal."); @@ -1568,10 +1444,13 @@ if (ThreadFuzzer::instance().isEffective()) is_cancelled = true; int current_connections = 0; - for (auto & server : *servers) { - server.stop(); - current_connections += server.currentConnections(); + std::lock_guard lock(servers_lock); + for (auto & server : servers) + { + server.stop(); + current_connections += server.currentConnections(); + } } if (current_connections) @@ -1584,7 +1463,7 @@ if (ThreadFuzzer::instance().isEffective()) global_context->getProcessList().killAllQueries(); if (current_connections) - current_connections = waitServersToFinish(*servers, config().getInt("shutdown_wait_unfinished", 5)); + current_connections = waitServersToFinish(servers, config().getInt("shutdown_wait_unfinished", 5)); if (current_connections) LOG_INFO(log, "Closed connections. But {} remain." @@ -1620,4 +1499,273 @@ if (ThreadFuzzer::instance().isEffective()) return Application::EXIT_OK; } + +void Server::createServers( + Poco::Util::AbstractConfiguration & config, + const std::vector & listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers) +{ + const Settings & settings = global_context->getSettingsRef(); + + Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(settings.http_receive_timeout); + http_params->setKeepAliveTimeout(keep_alive_timeout); + + for (const auto & listen_host : listen_hosts) + { + /// HTTP + const char * port_name = "http_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + + return ProtocolServerAdapter( + listen_host, + port_name, + "http://" + address.toString(), + std::make_unique( + context(), createHandlerFactory(*this, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); + }); + + /// HTTPS + port_name = "https_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { +#if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "https://" + address.toString(), + std::make_unique( + context(), createHandlerFactory(*this, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); +#else + UNUSED(port); + throw Exception{"HTTPS protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + }); + + /// TCP + port_name = "tcp_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + + /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt + port_name = "tcp_with_proxy_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp) with PROXY: " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + + /// TCP with SSL + port_name = "tcp_port_secure"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { +#if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "secure native protocol (tcp_secure): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), + server_pool, + socket, + new Poco::Net::TCPServerParams)); +#else + UNUSED(port); + throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + }); + + /// Interserver IO HTTP + port_name = "interserver_http_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "replica communication (interserver): http://" + address.toString(), + std::make_unique( + context(), + createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"), + server_pool, + socket, + http_params)); + }); + + port_name = "interserver_https_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { +#if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "secure replica communication (interserver): https://" + address.toString(), + std::make_unique( + context(), + createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"), + server_pool, + socket, + http_params)); +#else + UNUSED(port); + throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + }); + + port_name = "mysql_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "MySQL compatibility protocol: " + address.toString(), + std::make_unique(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + }); + + port_name = "postgresql_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "PostgreSQL compatibility protocol: " + address.toString(), + std::make_unique(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + }); + +#if USE_GRPC + port_name = "grpc_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::SocketAddress server_address(listen_host, port); + return ProtocolServerAdapter( + listen_host, + port_name, + "gRPC protocol: " + server_address.toString(), + std::make_unique(*this, makeSocketAddress(listen_host, port, &logger()))); + }); +#endif + + /// Prometheus (if defined and not setup yet with http_port) + port_name = "prometheus.port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "Prometheus: http://" + address.toString(), + std::make_unique( + context(), createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); + }); + } + +} + +void Server::updateServers( + Poco::Util::AbstractConfiguration & config, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers) +{ + Poco::Logger * log = &logger(); + /// Gracefully shutdown servers when their port is removed from config + const auto listen_hosts = getListenHosts(config); + const auto listen_try = getListenTry(config); + + for (auto & server : servers) + if (!server.isStopping()) + { + bool has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end(); + bool has_port = !config.getString(server.getPortName(), "").empty(); + if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber()) + { + server.stop(); + LOG_INFO(log, "Stopped listening for {}", server.getDescription()); + } + } + + createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers: */ true); + + /// Remove servers once all their connections are closed + while (std::any_of(servers.begin(), servers.end(), [](const auto & server) { return server.isStopping(); })) + { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + std::erase_if(servers, [&log](auto & server) + { + if (!server.isStopping()) + return false; + auto is_finished = server.currentConnections() == 0; + if (is_finished) + LOG_DEBUG(log, "Server finished: {}", server.getDescription()); + else + LOG_TRACE(log, "Waiting server to finish: {}", server.getDescription()); + return is_finished; + }); + } +} + } diff --git a/programs/server/Server.h b/programs/server/Server.h index 45e5fccd51d..b4f2ea3bb79 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -24,6 +24,8 @@ namespace Poco namespace DB { +class AsynchronousMetrics; +class ProtocolServerAdapter; class Server : public BaseDaemon, public IServer { @@ -67,8 +69,30 @@ private: ContextMutablePtr global_context; Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const; - using CreateServerFunc = std::function; - void createServer(const std::string & listen_host, const char * port_name, bool listen_try, CreateServerFunc && func) const; + using CreateServerFunc = std::function; + void createServer( + Poco::Util::AbstractConfiguration & config, + const std::string & listen_host, + const char * port_name, + bool listen_try, + bool start_server, + std::vector & servers, + CreateServerFunc && func) const; + + void createServers( + Poco::Util::AbstractConfiguration & config, + const std::vector & listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers = false); + + void updateServers( + Poco::Util::AbstractConfiguration & config, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers); }; } diff --git a/programs/server/config.xml b/programs/server/config.xml index 9a2a6d7729f..d88773a3fc4 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -152,6 +152,7 @@ This setting could be used to switch replication to another network interface (the server may be connected to multiple networks via multiple addresses) --> + @@ -177,6 +178,7 @@ --> + @@ -293,6 +295,10 @@ 10000 + + + 0.9 diff --git a/release b/release index 6e6970d7b00..3eb5591fe2c 100755 --- a/release +++ b/release @@ -87,7 +87,7 @@ if [ -z "$NO_BUILD" ] ; then # Build (only binary packages). debuild --preserve-env -e PATH \ -e DEB_CC=$DEB_CC -e DEB_CXX=$DEB_CXX -e CMAKE_FLAGS="$CMAKE_FLAGS" \ - -b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS} + -b ${DEBUILD_NOSIGN_OPTIONS} ${DEBUILD_NODEPS_OPTIONS} ${DEB_ARCH_FLAG} fi if [ -n "$MAKE_RPM" ]; then diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index da1fd94239a..9cad53e667b 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -30,6 +30,7 @@ namespace ErrorCodes { extern const int UNKNOWN_ELEMENT_IN_CONFIG; extern const int UNKNOWN_SETTING; + extern const int AUTHENTICATION_FAILED; } @@ -401,9 +402,20 @@ void AccessControl::addStoragesFromMainConfig( } -UUID AccessControl::login(const Credentials & credentials, const Poco::Net::IPAddress & address) const +UUID AccessControl::authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const { - return MultipleAccessStorage::login(credentials, address, *external_authenticators); + try + { + return MultipleAccessStorage::authenticate(credentials, address, *external_authenticators); + } + catch (...) + { + tryLogCurrentException(getLogger(), "from: " + address.toString() + ", user: " + credentials.getUserName() + ": Authentication failed"); + + /// We use the same message for all authentication failures because we don't want to give away any unnecessary information for security reasons, + /// only the log will show the exact reason. + throw Exception(credentials.getUserName() + ": Authentication failed: password is incorrect or there is no user with such name", ErrorCodes::AUTHENTICATION_FAILED); + } } void AccessControl::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index d891432266e..77709313d3e 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -113,7 +113,7 @@ public: bool isSettingNameAllowed(const std::string_view & name) const; void checkSettingNameIsAllowed(const std::string_view & name) const; - UUID login(const Credentials & credentials, const Poco::Net::IPAddress & address) const; + UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const; void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getContextAccess( diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 794c0a0d5d5..6bc9aeec4c2 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -54,7 +54,7 @@ namespace const Poco::SHA1Engine::Digest & digest = engine.digest(); Poco::SHA1Engine::Digest calculated_password_sha1(sha1_size); - for (size_t i = 0; i < sha1_size; i++) + for (size_t i = 0; i < sha1_size; ++i) calculated_password_sha1[i] = scrambled_password[i] ^ digest[i]; auto calculated_password_double_sha1 = Util::encodeSHA1(calculated_password_sha1); diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 1428e546d34..5215139b50c 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -426,19 +426,24 @@ std::vector DiskAccessStorage::findAllImpl(AccessEntityType type) const return res; } -bool DiskAccessStorage::existsImpl(const UUID & id) const +bool DiskAccessStorage::exists(const UUID & id) const { std::lock_guard lock{mutex}; return entries_by_id.count(id); } -AccessEntityPtr DiskAccessStorage::readImpl(const UUID & id) const +AccessEntityPtr DiskAccessStorage::readImpl(const UUID & id, bool throw_if_not_exists) const { std::lock_guard lock{mutex}; auto it = entries_by_id.find(id); if (it == entries_by_id.end()) - throwNotFound(id); + { + if (throw_if_not_exists) + throwNotFound(id); + else + return nullptr; + } const auto & entry = it->second; if (!entry.entity) @@ -447,43 +452,56 @@ AccessEntityPtr DiskAccessStorage::readImpl(const UUID & id) const } -String DiskAccessStorage::readNameImpl(const UUID & id) const +std::optional DiskAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const { std::lock_guard lock{mutex}; auto it = entries_by_id.find(id); if (it == entries_by_id.end()) - throwNotFound(id); - return String{it->second.name}; + { + if (throw_if_not_exists) + throwNotFound(id); + else + return std::nullopt; + } + return it->second.name; } -bool DiskAccessStorage::canInsertImpl(const AccessEntityPtr &) const -{ - return !readonly; -} - - -UUID DiskAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists) +std::optional DiskAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) { Notifications notifications; SCOPE_EXIT({ notify(notifications); }); UUID id = generateRandomID(); std::lock_guard lock{mutex}; - insertNoLock(id, new_entity, replace_if_exists, notifications); - return id; + if (insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, notifications)) + return id; + + return std::nullopt; } -void DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, Notifications & notifications) +bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, Notifications & notifications) { const String & name = new_entity->getName(); AccessEntityType type = new_entity->getType(); + /// Check that we can insert. if (readonly) throwReadonlyCannotInsert(type, name); - /// Check that we can insert. + auto & entries_by_name = entries_by_name_and_type[static_cast(type)]; + auto it_by_name = entries_by_name.find(name); + bool name_collision = (it_by_name != entries_by_name.end()); + + if (name_collision && !replace_if_exists) + { + if (throw_if_exists) + throwNameCollisionCannotInsert(type, name); + else + return false; + } + auto it_by_id = entries_by_id.find(id); if (it_by_id != entries_by_id.end()) { @@ -491,18 +509,11 @@ void DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getName()); } - auto & entries_by_name = entries_by_name_and_type[static_cast(type)]; - auto it_by_name = entries_by_name.find(name); - bool name_collision = (it_by_name != entries_by_name.end()); - - if (name_collision && !replace_if_exists) - throwNameCollisionCannotInsert(type, name); - scheduleWriteLists(type); writeAccessEntityToDisk(id, *new_entity); if (name_collision && replace_if_exists) - removeNoLock(it_by_name->second->id, notifications); + removeNoLock(it_by_name->second->id, /* throw_if_not_exists = */ false, notifications); /// Do insertion. auto & entry = entries_by_id[id]; @@ -512,24 +523,30 @@ void DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne entry.entity = new_entity; entries_by_name[entry.name] = &entry; prepareNotifications(id, entry, false, notifications); + return true; } -void DiskAccessStorage::removeImpl(const UUID & id) +bool DiskAccessStorage::removeImpl(const UUID & id, bool throw_if_not_exists) { Notifications notifications; SCOPE_EXIT({ notify(notifications); }); std::lock_guard lock{mutex}; - removeNoLock(id, notifications); + return removeNoLock(id, throw_if_not_exists, notifications); } -void DiskAccessStorage::removeNoLock(const UUID & id, Notifications & notifications) +bool DiskAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists, Notifications & notifications) { auto it = entries_by_id.find(id); if (it == entries_by_id.end()) - throwNotFound(id); + { + if (throw_if_not_exists) + throwNotFound(id); + else + return false; + } Entry & entry = it->second; AccessEntityType type = entry.type; @@ -545,28 +562,35 @@ void DiskAccessStorage::removeNoLock(const UUID & id, Notifications & notificati auto & entries_by_name = entries_by_name_and_type[static_cast(type)]; entries_by_name.erase(entry.name); entries_by_id.erase(it); + return true; } -void DiskAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func) +bool DiskAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) { Notifications notifications; SCOPE_EXIT({ notify(notifications); }); std::lock_guard lock{mutex}; - updateNoLock(id, update_func, notifications); + return updateNoLock(id, update_func, throw_if_not_exists, notifications); } -void DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_func, Notifications & notifications) +bool DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, Notifications & notifications) { auto it = entries_by_id.find(id); if (it == entries_by_id.end()) - throwNotFound(id); + { + if (throw_if_not_exists) + throwNotFound(id); + else + return false; + } Entry & entry = it->second; if (readonly) throwReadonlyCannotUpdate(entry.type, entry.name); + if (!entry.entity) entry.entity = readAccessEntityFromDisk(id); auto old_entity = entry.entity; @@ -576,7 +600,7 @@ void DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_ throwBadCast(id, new_entity->getType(), new_entity->getName(), old_entity->getType()); if (*new_entity == *old_entity) - return; + return true; const String & new_name = new_entity->getName(); const String & old_name = old_entity->getName(); @@ -602,6 +626,7 @@ void DiskAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_ } prepareNotifications(id, entry, false, notifications); + return true; } @@ -675,7 +700,7 @@ scope_guard DiskAccessStorage::subscribeForChangesImpl(AccessEntityType type, co }; } -bool DiskAccessStorage::hasSubscriptionImpl(const UUID & id) const +bool DiskAccessStorage::hasSubscription(const UUID & id) const { std::lock_guard lock{mutex}; auto it = entries_by_id.find(id); @@ -687,7 +712,7 @@ bool DiskAccessStorage::hasSubscriptionImpl(const UUID & id) const return false; } -bool DiskAccessStorage::hasSubscriptionImpl(AccessEntityType type) const +bool DiskAccessStorage::hasSubscription(AccessEntityType type) const { std::lock_guard lock{mutex}; const auto & handlers = handlers_by_type[static_cast(type)]; diff --git a/src/Access/DiskAccessStorage.h b/src/Access/DiskAccessStorage.h index 853a18590f0..20390dabfa0 100644 --- a/src/Access/DiskAccessStorage.h +++ b/src/Access/DiskAccessStorage.h @@ -24,22 +24,22 @@ public: bool isPathEqual(const String & directory_path_) const; void setReadOnly(bool readonly_) { readonly = readonly_; } - bool isReadOnly() const { return readonly; } + bool isReadOnly() const override { return readonly; } + + bool exists(const UUID & id) const override; + bool hasSubscription(const UUID & id) const override; + bool hasSubscription(AccessEntityType type) const override; private: std::optional findImpl(AccessEntityType type, const String & name) const override; std::vector findAllImpl(AccessEntityType type) const override; - bool existsImpl(const UUID & id) const override; - AccessEntityPtr readImpl(const UUID & id) const override; - String readNameImpl(const UUID & id) const override; - bool canInsertImpl(const AccessEntityPtr & entity) const override; - UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override; - void removeImpl(const UUID & id) override; - void updateImpl(const UUID & id, const UpdateFunc & update_func) override; + AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; + std::optional readNameImpl(const UUID & id, bool throw_if_not_exists) const override; + std::optional insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override; + bool removeImpl(const UUID & id, bool throw_if_not_exists) override; + bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override; scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override; - bool hasSubscriptionImpl(const UUID & id) const override; - bool hasSubscriptionImpl(AccessEntityType type) const override; void clear(); bool readLists(); @@ -50,9 +50,9 @@ private: void listsWritingThreadFunc(); void stopListsWritingThread(); - void insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, Notifications & notifications); - void removeNoLock(const UUID & id, Notifications & notifications); - void updateNoLock(const UUID & id, const UpdateFunc & update_func, Notifications & notifications); + bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, Notifications & notifications); + bool removeNoLock(const UUID & id, bool throw_if_not_exists, Notifications & notifications); + bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, Notifications & notifications); AccessEntityPtr readAccessEntityFromDisk(const UUID & id) const; void writeAccessEntityToDisk(const UUID & id, const IAccessEntity & entity) const; diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 51c2525d923..0a7fc630d97 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -21,8 +21,8 @@ namespace ErrorCodes extern const int ACCESS_STORAGE_READONLY; extern const int WRONG_PASSWORD; extern const int IP_ADDRESS_NOT_ALLOWED; - extern const int AUTHENTICATION_FAILED; extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } @@ -32,101 +32,6 @@ namespace { return "ID(" + toString(id) + ")"; } - - String formatTypeWithNameOrID(const IAccessStorage & storage, const UUID & id) - { - auto entity = storage.tryRead(id); - if (entity) - return entity->formatTypeWithName(); - return outputID(id); - } - - - template - bool tryCall(const Func & function) - { - try - { - function(); - return true; - } - catch (...) - { - return false; - } - } - - - class ErrorsTracker - { - public: - explicit ErrorsTracker(size_t count_) { succeed.reserve(count_); } - - template - bool tryCall(const Func & func) - { - try - { - func(); - } - catch (Exception & e) - { - if (!exception) - exception.emplace(e); - succeed.push_back(false); - return false; - } - catch (Poco::Exception & e) - { - if (!exception) - exception.emplace(Exception::CreateFromPocoTag{}, e); - succeed.push_back(false); - return false; - } - catch (std::exception & e) - { - if (!exception) - exception.emplace(Exception::CreateFromSTDTag{}, e); - succeed.push_back(false); - return false; - } - succeed.push_back(true); - return true; - } - - bool errors() const { return exception.has_value(); } - - void showErrors(const char * format, Fn auto && get_name_function) - { - if (!exception) - return; - - Strings succeeded_names_list; - Strings failed_names_list; - for (size_t i = 0; i != succeed.size(); ++i) - { - String name = get_name_function(i); - if (succeed[i]) - succeeded_names_list.emplace_back(name); - else - failed_names_list.emplace_back(name); - } - String succeeded_names = boost::algorithm::join(succeeded_names_list, ", "); - String failed_names = boost::algorithm::join(failed_names_list, ", "); - if (succeeded_names.empty()) - succeeded_names = "none"; - - String error_message = format; - boost::replace_all(error_message, "{succeeded_names}", succeeded_names); - boost::replace_all(error_message, "{failed_names}", failed_names); - exception->addMessage(error_message); - exception->rethrow(); - } - - private: - std::vector succeed; - std::optional exception; - }; } @@ -175,228 +80,332 @@ std::vector IAccessStorage::getIDs(AccessEntityType type, const Strings & } -bool IAccessStorage::exists(const UUID & id) const -{ - return existsImpl(id); -} - - -AccessEntityPtr IAccessStorage::tryReadBase(const UUID & id) const -{ - AccessEntityPtr entity; - auto func = [&] { entity = readImpl(id); }; - if (!tryCall(func)) - return nullptr; - return entity; -} - - String IAccessStorage::readName(const UUID & id) const { - return readNameImpl(id); + return *readNameImpl(id, /* throw_if_not_exists = */ true); } -Strings IAccessStorage::readNames(const std::vector & ids) const +std::optional IAccessStorage::readName(const UUID & id, bool throw_if_not_exists) const { - Strings res; - res.reserve(ids.size()); - for (const auto & id : ids) - res.emplace_back(readName(id)); - return res; + return readNameImpl(id, throw_if_not_exists); } -std::optional IAccessStorage::tryReadName(const UUID & id) const -{ - String name; - auto func = [&] { name = readNameImpl(id); }; - if (!tryCall(func)) - return {}; - return name; -} - - -Strings IAccessStorage::tryReadNames(const std::vector & ids) const +Strings IAccessStorage::readNames(const std::vector & ids, bool throw_if_not_exists) const { Strings res; res.reserve(ids.size()); for (const auto & id : ids) { - if (auto name = tryReadName(id)) + if (auto name = readNameImpl(id, throw_if_not_exists)) res.emplace_back(std::move(name).value()); } return res; } -UUID IAccessStorage::insert(const AccessEntityPtr & entity) +std::optional IAccessStorage::tryReadName(const UUID & id) const { - return insertImpl(entity, false); + return readName(id, /* throw_if_not_exists = */ false); } -std::vector IAccessStorage::insert(const std::vector & multiple_entities) +Strings IAccessStorage::tryReadNames(const std::vector & ids) const { - ErrorsTracker tracker(multiple_entities.size()); + return readNames(ids, /* throw_if_not_exists = */ false); +} - std::vector ids; - for (const auto & entity : multiple_entities) + +std::optional IAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const +{ + if (auto entity = read(id, throw_if_not_exists)) + return entity->getName(); + return std::nullopt; +} + + +UUID IAccessStorage::insert(const AccessEntityPtr & entity) +{ + return *insert(entity, /* replace_if_exists = */ false, /* throw_if_exists = */ true); +} + + +std::optional IAccessStorage::insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) +{ + return insertImpl(entity, replace_if_exists, throw_if_exists); +} + + +std::vector IAccessStorage::insert(const std::vector & multiple_entities, bool replace_if_exists, bool throw_if_exists) +{ + if (multiple_entities.empty()) + return {}; + + if (multiple_entities.size() == 1) { - UUID id; - auto func = [&] { id = insertImpl(entity, /* replace_if_exists = */ false); }; - if (tracker.tryCall(func)) - ids.push_back(id); + if (auto id = insert(multiple_entities[0], replace_if_exists, throw_if_exists)) + return {*id}; + return {}; } - if (tracker.errors()) + std::vector successfully_inserted; + try { - auto get_name_function = [&](size_t i) { return multiple_entities[i]->formatTypeWithName(); }; - tracker.showErrors("Couldn't insert {failed_names}. Successfully inserted: {succeeded_names}", get_name_function); + std::vector ids; + for (const auto & entity : multiple_entities) + { + if (auto id = insertImpl(entity, replace_if_exists, throw_if_exists)) + { + successfully_inserted.push_back(entity); + ids.push_back(*id); + } + } + return ids; + } + catch (Exception & e) + { + /// Try to add more information to the error message. + if (!successfully_inserted.empty()) + { + String successfully_inserted_str; + for (const auto & entity : successfully_inserted) + { + if (!successfully_inserted_str.empty()) + successfully_inserted_str += ", "; + successfully_inserted_str += entity->formatTypeWithName(); + } + e.addMessage("After successfully inserting {}/{}: {}", successfully_inserted.size(), multiple_entities.size(), successfully_inserted_str); + } + e.rethrow(); + __builtin_unreachable(); } - - return ids; } std::optional IAccessStorage::tryInsert(const AccessEntityPtr & entity) { - UUID id; - auto func = [&] { id = insertImpl(entity, /* replace_if_exists = */ false); }; - if (!tryCall(func)) - return {}; - return id; + return insert(entity, /* replace_if_exists = */ false, /* throw_if_exists = */ false); } std::vector IAccessStorage::tryInsert(const std::vector & multiple_entities) { - std::vector ids; - for (const auto & entity : multiple_entities) - { - UUID id; - auto func = [&] { id = insertImpl(entity, /* replace_if_exists = */ false); }; - if (tryCall(func)) - ids.push_back(id); - } - return ids; + return insert(multiple_entities, /* replace_if_exists = */ false, /* throw_if_exists = */ false); } UUID IAccessStorage::insertOrReplace(const AccessEntityPtr & entity) { - return insertImpl(entity, /* replace_if_exists = */ true); + return *insert(entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false); } std::vector IAccessStorage::insertOrReplace(const std::vector & multiple_entities) { - ErrorsTracker tracker(multiple_entities.size()); - - std::vector ids; - for (const auto & entity : multiple_entities) - { - UUID id; - auto func = [&] { id = insertImpl(entity, /* replace_if_exists = */ true); }; - if (tracker.tryCall(func)) - ids.push_back(id); - } - - if (tracker.errors()) - { - auto get_name_function = [&](size_t i) { return multiple_entities[i]->formatTypeWithName(); }; - tracker.showErrors("Couldn't insert {failed_names}. Successfully inserted: {succeeded_names}", get_name_function); - } - - return ids; + return insert(multiple_entities, /* replace_if_exists = */ true, /* throw_if_exists = */ false); } -void IAccessStorage::remove(const UUID & id) +std::optional IAccessStorage::insertImpl(const AccessEntityPtr & entity, bool, bool) { - removeImpl(id); + if (isReadOnly()) + throwReadonlyCannotInsert(entity->getType(), entity->getName()); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "insertImpl() is not implemented in {}", getStorageType()); } -void IAccessStorage::remove(const std::vector & ids) +bool IAccessStorage::remove(const UUID & id, bool throw_if_not_exists) { - ErrorsTracker tracker(ids.size()); + return removeImpl(id, throw_if_not_exists); +} - for (const auto & id : ids) + +std::vector IAccessStorage::remove(const std::vector & ids, bool throw_if_not_exists) +{ + if (ids.empty()) + return {}; + if (ids.size() == 1) + return remove(ids[0], throw_if_not_exists) ? ids : std::vector{}; + + Strings removed_names; + try { - auto func = [&] { removeImpl(id); }; - tracker.tryCall(func); + std::vector removed_ids; + std::vector readonly_ids; + + /// First we call remove() for non-readonly entities. + for (const auto & id : ids) + { + if (isReadOnly(id)) + readonly_ids.push_back(id); + else + { + auto name = tryReadName(id); + if (remove(id, throw_if_not_exists)) + { + removed_ids.push_back(id); + if (name) + removed_names.push_back(std::move(name).value()); + } + } + } + + /// For readonly entities we're still going to call remove() because + /// isReadOnly(id) could change and even if it's not then a storage-specific + /// implementation of removeImpl() will probably generate a better error message. + for (const auto & id : readonly_ids) + { + auto name = tryReadName(id); + if (remove(id, throw_if_not_exists)) + { + removed_ids.push_back(id); + if (name) + removed_names.push_back(std::move(name).value()); + } + } + + return removed_ids; } - - if (tracker.errors()) + catch (Exception & e) { - auto get_name_function = [&](size_t i) { return formatTypeWithNameOrID(*this, ids[i]); }; - tracker.showErrors("Couldn't remove {failed_names}. Successfully removed: {succeeded_names}", get_name_function); + /// Try to add more information to the error message. + if (!removed_names.empty()) + { + String removed_names_str; + for (const auto & name : removed_names) + { + if (!removed_names_str.empty()) + removed_names_str += ", "; + removed_names_str += backQuote(name); + } + e.addMessage("After successfully removing {}/{}: {}", removed_names.size(), ids.size(), removed_names_str); + } + e.rethrow(); + __builtin_unreachable(); } } bool IAccessStorage::tryRemove(const UUID & id) { - auto func = [&] { removeImpl(id); }; - return tryCall(func); + return remove(id, /* throw_if_not_exists = */ false); } std::vector IAccessStorage::tryRemove(const std::vector & ids) { - std::vector removed_ids; - for (const auto & id : ids) - { - auto func = [&] { removeImpl(id); }; - if (tryCall(func)) - removed_ids.push_back(id); - } - return removed_ids; + return remove(ids, /* throw_if_not_exists = */ false); } -void IAccessStorage::update(const UUID & id, const UpdateFunc & update_func) +bool IAccessStorage::removeImpl(const UUID & id, bool throw_if_not_exists) { - updateImpl(id, update_func); + if (isReadOnly(id)) + { + auto entity = read(id, throw_if_not_exists); + if (!entity) + return false; + throwReadonlyCannotRemove(entity->getType(), entity->getName()); + } + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "removeImpl() is not implemented in {}", getStorageType()); } -void IAccessStorage::update(const std::vector & ids, const UpdateFunc & update_func) +bool IAccessStorage::update(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) { - ErrorsTracker tracker(ids.size()); + return updateImpl(id, update_func, throw_if_not_exists); +} - for (const auto & id : ids) + +std::vector IAccessStorage::update(const std::vector & ids, const UpdateFunc & update_func, bool throw_if_not_exists) +{ + if (ids.empty()) + return {}; + if (ids.size() == 1) + return update(ids[0], update_func, throw_if_not_exists) ? ids : std::vector{}; + + Strings names_of_updated; + try { - auto func = [&] { updateImpl(id, update_func); }; - tracker.tryCall(func); + std::vector ids_of_updated; + std::vector readonly_ids; + + /// First we call update() for non-readonly entities. + for (const auto & id : ids) + { + if (isReadOnly(id)) + readonly_ids.push_back(id); + else + { + auto name = tryReadName(id); + if (update(id, update_func, throw_if_not_exists)) + { + ids_of_updated.push_back(id); + if (name) + names_of_updated.push_back(std::move(name).value()); + } + } + } + + /// For readonly entities we're still going to call update() because + /// isReadOnly(id) could change and even if it's not then a storage-specific + /// implementation of updateImpl() will probably generate a better error message. + for (const auto & id : readonly_ids) + { + auto name = tryReadName(id); + if (update(id, update_func, throw_if_not_exists)) + { + ids_of_updated.push_back(id); + if (name) + names_of_updated.push_back(std::move(name).value()); + } + } + + return ids_of_updated; } - - if (tracker.errors()) + catch (Exception & e) { - auto get_name_function = [&](size_t i) { return formatTypeWithNameOrID(*this, ids[i]); }; - tracker.showErrors("Couldn't update {failed_names}. Successfully updated: {succeeded_names}", get_name_function); + /// Try to add more information to the error message. + if (!names_of_updated.empty()) + { + String names_of_updated_str; + for (const auto & name : names_of_updated) + { + if (!names_of_updated_str.empty()) + names_of_updated_str += ", "; + names_of_updated_str += backQuote(name); + } + e.addMessage("After successfully updating {}/{}: {}", names_of_updated.size(), ids.size(), names_of_updated_str); + } + e.rethrow(); + __builtin_unreachable(); } } bool IAccessStorage::tryUpdate(const UUID & id, const UpdateFunc & update_func) { - auto func = [&] { updateImpl(id, update_func); }; - return tryCall(func); + return update(id, update_func, /* throw_if_not_exists = */ false); } std::vector IAccessStorage::tryUpdate(const std::vector & ids, const UpdateFunc & update_func) { - std::vector updated_ids; - for (const auto & id : ids) + return update(ids, update_func, /* throw_if_not_exists = */ false); +} + + +bool IAccessStorage::updateImpl(const UUID & id, const UpdateFunc &, bool throw_if_not_exists) +{ + if (isReadOnly(id)) { - auto func = [&] { updateImpl(id, update_func); }; - if (tryCall(func)) - updated_ids.push_back(id); + auto entity = read(id, throw_if_not_exists); + if (!entity) + return false; + throwReadonlyCannotUpdate(entity->getType(), entity->getName()); } - return updated_ids; + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "updateImpl() is not implemented in {}", getStorageType()); } @@ -421,18 +430,6 @@ scope_guard IAccessStorage::subscribeForChanges(const std::vector & ids, c } -bool IAccessStorage::hasSubscription(AccessEntityType type) const -{ - return hasSubscriptionImpl(type); -} - - -bool IAccessStorage::hasSubscription(const UUID & id) const -{ - return hasSubscriptionImpl(id); -} - - void IAccessStorage::notify(const Notifications & notifications) { for (const auto & [fn, id, new_entity] : notifications) @@ -440,50 +437,53 @@ void IAccessStorage::notify(const Notifications & notifications) } -UUID IAccessStorage::login( - const Credentials & credentials, - const Poco::Net::IPAddress & address, - const ExternalAuthenticators & external_authenticators, - bool replace_exception_with_cannot_authenticate) const -{ - try - { - return loginImpl(credentials, address, external_authenticators); - } - catch (...) - { - if (!replace_exception_with_cannot_authenticate) - throw; - - tryLogCurrentException(getLogger(), "from: " + address.toString() + ", user: " + credentials.getUserName() + ": Authentication failed"); - throwCannotAuthenticate(credentials.getUserName()); - } -} - - -UUID IAccessStorage::loginImpl( +UUID IAccessStorage::authenticate( const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const +{ + return *authenticateImpl(credentials, address, external_authenticators, /* throw_if_user_not_exists = */ true); +} + + +std::optional IAccessStorage::authenticate( + const Credentials & credentials, + const Poco::Net::IPAddress & address, + const ExternalAuthenticators & external_authenticators, + bool throw_if_user_not_exists) const +{ + return authenticateImpl(credentials, address, external_authenticators, throw_if_user_not_exists); +} + + +std::optional IAccessStorage::authenticateImpl( + const Credentials & credentials, + const Poco::Net::IPAddress & address, + const ExternalAuthenticators & external_authenticators, + bool throw_if_user_not_exists) const { if (auto id = find(credentials.getUserName())) { if (auto user = tryRead(*id)) { - if (!isAddressAllowedImpl(*user, address)) + if (!isAddressAllowed(*user, address)) throwAddressNotAllowed(address); - if (!areCredentialsValidImpl(*user, credentials, external_authenticators)) + if (!areCredentialsValid(*user, credentials, external_authenticators)) throwInvalidCredentials(); - return *id; + return id; } } - throwNotFound(AccessEntityType::USER, credentials.getUserName()); + + if (throw_if_user_not_exists) + throwNotFound(AccessEntityType::USER, credentials.getUserName()); + else + return std::nullopt; } -bool IAccessStorage::areCredentialsValidImpl( +bool IAccessStorage::areCredentialsValid( const User & user, const Credentials & credentials, const ExternalAuthenticators & external_authenticators) const @@ -498,24 +498,12 @@ bool IAccessStorage::areCredentialsValidImpl( } -bool IAccessStorage::isAddressAllowedImpl(const User & user, const Poco::Net::IPAddress & address) const +bool IAccessStorage::isAddressAllowed(const User & user, const Poco::Net::IPAddress & address) const { return user.allowed_client_hosts.contains(address); } -UUID IAccessStorage::getIDOfLoggedUser(const String & user_name) const -{ - return getIDOfLoggedUserImpl(user_name); -} - - -UUID IAccessStorage::getIDOfLoggedUserImpl(const String & user_name) const -{ - return getID(user_name); -} - - UUID IAccessStorage::generateRandomID() { static Poco::UUIDGenerator generator; @@ -615,11 +603,4 @@ void IAccessStorage::throwInvalidCredentials() throw Exception("Invalid credentials", ErrorCodes::WRONG_PASSWORD); } -void IAccessStorage::throwCannotAuthenticate(const String & user_name) -{ - /// We use the same message for all authentication failures because we don't want to give away any unnecessary information for security reasons, - /// only the log will show the exact reason. - throw Exception(user_name + ": Authentication failed: password is incorrect or there is no user with such name", ErrorCodes::AUTHENTICATION_FAILED); -} - } diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index ccbb1ffe5bc..6e533a439d1 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -34,6 +34,12 @@ public: /// Returns a JSON with the parameters of the storage. It's up to the storage type to fill the JSON. virtual String getStorageParamsJSON() const { return "{}"; } + /// Returns true if this storage is readonly. + virtual bool isReadOnly() const { return false; } + + /// Returns true if this entity is readonly. + virtual bool isReadOnly(const UUID &) const { return isReadOnly(); } + /// Returns the identifiers of all the entities of a specified type contained in the storage. std::vector findAll(AccessEntityType type) const; @@ -63,14 +69,14 @@ public: std::vector getIDs(const Strings & names) const { return getIDs(EntityClassT::TYPE, names); } /// Returns whether there is an entity with such identifier in the storage. - bool exists(const UUID & id) const; + virtual bool exists(const UUID & id) const = 0; /// Reads an entity. Throws an exception if not found. template - std::shared_ptr read(const UUID & id) const; + std::shared_ptr read(const UUID & id, bool throw_if_not_exists = true) const; template - std::shared_ptr read(const String & name) const; + std::shared_ptr read(const String & name, bool throw_if_not_exists = true) const; /// Reads an entity. Returns nullptr if not found. template @@ -81,18 +87,16 @@ public: /// Reads only name of an entity. String readName(const UUID & id) const; - Strings readNames(const std::vector & ids) const; + std::optional readName(const UUID & id, bool throw_if_not_exists) const; + Strings readNames(const std::vector & ids, bool throw_if_not_exists = true) const; std::optional tryReadName(const UUID & id) const; Strings tryReadNames(const std::vector & ids) const; - /// Returns true if a specified entity can be inserted into this storage. - /// This function doesn't check whether there are no entities with such name in the storage. - bool canInsert(const AccessEntityPtr & entity) const { return canInsertImpl(entity); } - /// Inserts an entity to the storage. Returns ID of a new entry in the storage. /// Throws an exception if the specified name already exists. UUID insert(const AccessEntityPtr & entity); - std::vector insert(const std::vector & multiple_entities); + std::optional insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists); + std::vector insert(const std::vector & multiple_entities, bool replace_if_exists = false, bool throw_if_exists = true); /// Inserts an entity to the storage. Returns ID of a new entry in the storage. std::optional tryInsert(const AccessEntityPtr & entity); @@ -104,8 +108,8 @@ public: std::vector insertOrReplace(const std::vector & multiple_entities); /// Removes an entity from the storage. Throws an exception if couldn't remove. - void remove(const UUID & id); - void remove(const std::vector & ids); + bool remove(const UUID & id, bool throw_if_not_exists = true); + std::vector remove(const std::vector & ids, bool throw_if_not_exists = true); /// Removes an entity from the storage. Returns false if couldn't remove. bool tryRemove(const UUID & id); @@ -116,8 +120,8 @@ public: using UpdateFunc = std::function; /// Updates an entity stored in the storage. Throws an exception if couldn't update. - void update(const UUID & id, const UpdateFunc & update_func); - void update(const std::vector & ids, const UpdateFunc & update_func); + bool update(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists = true); + std::vector update(const std::vector & ids, const UpdateFunc & update_func, bool throw_if_not_exists = true); /// Updates an entity stored in the storage. Returns false if couldn't update. bool tryUpdate(const UUID & id, const UpdateFunc & update_func); @@ -139,35 +143,27 @@ public: scope_guard subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const; scope_guard subscribeForChanges(const std::vector & ids, const OnChangedHandler & handler) const; - bool hasSubscription(AccessEntityType type) const; - bool hasSubscription(const UUID & id) const; + virtual bool hasSubscription(AccessEntityType type) const = 0; + virtual bool hasSubscription(const UUID & id) const = 0; /// Finds a user, check the provided credentials and returns the ID of the user if they are valid. /// Throws an exception if no such user or credentials are invalid. - UUID login(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool replace_exception_with_cannot_authenticate = true) const; - - /// Returns the ID of a user who has logged in (maybe on another node). - /// The function assumes that the password has been already checked somehow, so we can skip checking it now. - UUID getIDOfLoggedUser(const String & user_name) const; + UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const; + std::optional authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists) const; protected: virtual std::optional findImpl(AccessEntityType type, const String & name) const = 0; virtual std::vector findAllImpl(AccessEntityType type) const = 0; - virtual bool existsImpl(const UUID & id) const = 0; - virtual AccessEntityPtr readImpl(const UUID & id) const = 0; - virtual String readNameImpl(const UUID & id) const = 0; - virtual bool canInsertImpl(const AccessEntityPtr & entity) const = 0; - virtual UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) = 0; - virtual void removeImpl(const UUID & id) = 0; - virtual void updateImpl(const UUID & id, const UpdateFunc & update_func) = 0; + virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const = 0; + virtual std::optional readNameImpl(const UUID & id, bool throw_if_not_exists) const; + virtual std::optional insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists); + virtual bool removeImpl(const UUID & id, bool throw_if_not_exists); + virtual bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists); virtual scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const = 0; virtual scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const = 0; - virtual bool hasSubscriptionImpl(const UUID & id) const = 0; - virtual bool hasSubscriptionImpl(AccessEntityType type) const = 0; - virtual UUID loginImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const; - virtual bool areCredentialsValidImpl(const User & user, const Credentials & credentials, const ExternalAuthenticators & external_authenticators) const; - virtual bool isAddressAllowedImpl(const User & user, const Poco::Net::IPAddress & address) const; - virtual UUID getIDOfLoggedUserImpl(const String & user_name) const; + virtual std::optional authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists) const; + virtual bool areCredentialsValid(const User & user, const Credentials & credentials, const ExternalAuthenticators & external_authenticators) const; + virtual bool isAddressAllowed(const User & user, const Poco::Net::IPAddress & address) const; static UUID generateRandomID(); Poco::Logger * getLogger() const; @@ -184,30 +180,28 @@ protected: [[noreturn]] void throwReadonlyCannotRemove(AccessEntityType type, const String & name) const; [[noreturn]] static void throwAddressNotAllowed(const Poco::Net::IPAddress & address); [[noreturn]] static void throwInvalidCredentials(); - [[noreturn]] static void throwCannotAuthenticate(const String & user_name); using Notification = std::tuple; using Notifications = std::vector; static void notify(const Notifications & notifications); private: - AccessEntityPtr tryReadBase(const UUID & id) const; - const String storage_name; mutable std::atomic log = nullptr; }; template -std::shared_ptr IAccessStorage::read(const UUID & id) const +std::shared_ptr IAccessStorage::read(const UUID & id, bool throw_if_not_exists) const { - auto entity = readImpl(id); + auto entity = readImpl(id, throw_if_not_exists); if constexpr (std::is_same_v) return entity; else { - auto ptr = typeid_cast>(entity); - if (ptr) + if (!entity) + return nullptr; + if (auto ptr = typeid_cast>(entity)) return ptr; throwBadCast(id, entity->getType(), entity->getName(), EntityClassT::TYPE); } @@ -215,26 +209,27 @@ std::shared_ptr IAccessStorage::read(const UUID & id) const template -std::shared_ptr IAccessStorage::read(const String & name) const +std::shared_ptr IAccessStorage::read(const String & name, bool throw_if_not_exists) const { - return read(getID(name)); + if (auto id = find(name)) + return read(*id, throw_if_not_exists); + if (throw_if_not_exists) + throwNotFound(EntityClassT::TYPE, name); + else + return nullptr; } template std::shared_ptr IAccessStorage::tryRead(const UUID & id) const { - auto entity = tryReadBase(id); - if (!entity) - return nullptr; - return typeid_cast>(entity); + return read(id, false); } template std::shared_ptr IAccessStorage::tryRead(const String & name) const { - auto id = find(name); - return id ? tryRead(*id) : nullptr; + return read(name, false); } } diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index c89d8c1f953..77c3281e5ab 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -426,52 +426,24 @@ std::vector LDAPAccessStorage::findAllImpl(AccessEntityType type) const } -bool LDAPAccessStorage::existsImpl(const UUID & id) const +bool LDAPAccessStorage::exists(const UUID & id) const { std::scoped_lock lock(mutex); return memory_storage.exists(id); } -AccessEntityPtr LDAPAccessStorage::readImpl(const UUID & id) const +AccessEntityPtr LDAPAccessStorage::readImpl(const UUID & id, bool throw_if_not_exists) const { std::scoped_lock lock(mutex); - return memory_storage.read(id); + return memory_storage.read(id, throw_if_not_exists); } -String LDAPAccessStorage::readNameImpl(const UUID & id) const +std::optional LDAPAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const { std::scoped_lock lock(mutex); - return memory_storage.readName(id); -} - - -bool LDAPAccessStorage::canInsertImpl(const AccessEntityPtr &) const -{ - return false; -} - - -UUID LDAPAccessStorage::insertImpl(const AccessEntityPtr & entity, bool) -{ - throwReadonlyCannotInsert(entity->getType(), entity->getName()); -} - - -void LDAPAccessStorage::removeImpl(const UUID & id) -{ - std::scoped_lock lock(mutex); - auto entity = read(id); - throwReadonlyCannotRemove(entity->getType(), entity->getName()); -} - - -void LDAPAccessStorage::updateImpl(const UUID & id, const UpdateFunc &) -{ - std::scoped_lock lock(mutex); - auto entity = read(id); - throwReadonlyCannotUpdate(entity->getType(), entity->getName()); + return memory_storage.readName(id, throw_if_not_exists); } @@ -489,20 +461,24 @@ scope_guard LDAPAccessStorage::subscribeForChangesImpl(AccessEntityType type, co } -bool LDAPAccessStorage::hasSubscriptionImpl(const UUID & id) const +bool LDAPAccessStorage::hasSubscription(const UUID & id) const { std::scoped_lock lock(mutex); return memory_storage.hasSubscription(id); } -bool LDAPAccessStorage::hasSubscriptionImpl(AccessEntityType type) const +bool LDAPAccessStorage::hasSubscription(AccessEntityType type) const { std::scoped_lock lock(mutex); return memory_storage.hasSubscription(type); } -UUID LDAPAccessStorage::loginImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const +std::optional LDAPAccessStorage::authenticateImpl( + const Credentials & credentials, + const Poco::Net::IPAddress & address, + const ExternalAuthenticators & external_authenticators, + bool /* throw_if_user_not_exists */) const { std::scoped_lock lock(mutex); LDAPClient::SearchResultsList external_roles; @@ -511,16 +487,19 @@ UUID LDAPAccessStorage::loginImpl(const Credentials & credentials, const Poco::N { auto user = memory_storage.read(*id); - if (!isAddressAllowedImpl(*user, address)) + if (!isAddressAllowed(*user, address)) throwAddressNotAllowed(address); + if (typeid_cast(&credentials)) + return id; + if (!areLDAPCredentialsValidNoLock(*user, credentials, external_authenticators, external_roles)) throwInvalidCredentials(); // Just in case external_roles are changed. This will be no-op if they are not. updateAssignedRolesNoLock(*id, user->getName(), external_roles); - return *id; + return id; } else { @@ -530,9 +509,16 @@ UUID LDAPAccessStorage::loginImpl(const Credentials & credentials, const Poco::N user->auth_data = AuthenticationData(AuthenticationType::LDAP); user->auth_data.setLDAPServerName(ldap_server_name); - if (!isAddressAllowedImpl(*user, address)) + if (!isAddressAllowed(*user, address)) throwAddressNotAllowed(address); + if (typeid_cast(&credentials)) + { + // TODO: mapped external roles are not available here. Without a password we can't authenticate and retrieve roles from LDAP server. + assignRolesNoLock(*user, external_roles); + return memory_storage.insert(user); + } + if (!areLDAPCredentialsValidNoLock(*user, credentials, external_authenticators, external_roles)) throwInvalidCredentials(); @@ -541,31 +527,4 @@ UUID LDAPAccessStorage::loginImpl(const Credentials & credentials, const Poco::N return memory_storage.insert(user); } } - -UUID LDAPAccessStorage::getIDOfLoggedUserImpl(const String & user_name) const -{ - std::scoped_lock lock(mutex); - auto id = memory_storage.find(user_name); - if (id) - { - return *id; - } - else - { - // User does not exist, so we create one, and add it pretending that the authentication is successful. - auto user = std::make_shared(); - user->setName(user_name); - user->auth_data = AuthenticationData(AuthenticationType::LDAP); - user->auth_data.setLDAPServerName(ldap_server_name); - - LDAPClient::SearchResultsList external_roles; - - // TODO: mapped external roles are not available here. Without a password we can't authenticate and retrieve roles from LDAP server. - - assignRolesNoLock(*user, external_roles); - - return memory_storage.insert(user); - } -} - } diff --git a/src/Access/LDAPAccessStorage.h b/src/Access/LDAPAccessStorage.h index feb6ee4d92a..cc93c523516 100644 --- a/src/Access/LDAPAccessStorage.h +++ b/src/Access/LDAPAccessStorage.h @@ -40,23 +40,19 @@ public: public: // IAccessStorage implementations. virtual const char * getStorageType() const override; virtual String getStorageParamsJSON() const override; + virtual bool isReadOnly() const override { return true; } + virtual bool exists(const UUID & id) const override; + virtual bool hasSubscription(const UUID & id) const override; + virtual bool hasSubscription(AccessEntityType type) const override; private: // IAccessStorage implementations. virtual std::optional findImpl(AccessEntityType type, const String & name) const override; virtual std::vector findAllImpl(AccessEntityType type) const override; - virtual bool existsImpl(const UUID & id) const override; - virtual AccessEntityPtr readImpl(const UUID & id) const override; - virtual String readNameImpl(const UUID & id) const override; - virtual bool canInsertImpl(const AccessEntityPtr &) const override; - virtual UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override; - virtual void removeImpl(const UUID & id) override; - virtual void updateImpl(const UUID & id, const UpdateFunc & update_func) override; + virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; + virtual std::optional readNameImpl(const UUID & id, bool throw_if_not_exists) const override; virtual scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; virtual scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override; - virtual bool hasSubscriptionImpl(const UUID & id) const override; - virtual bool hasSubscriptionImpl(AccessEntityType type) const override; - virtual UUID loginImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const override; - virtual UUID getIDOfLoggedUserImpl(const String & user_name) const override; + virtual std::optional authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists) const override; private: void setConfiguration(AccessControl * access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix); diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index c666520c069..49d01074f6a 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -448,7 +448,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) vals = nullptr; }); - for (std::size_t i = 0; vals[i]; i++) + for (size_t i = 0; vals[i]; ++i) { if (vals[i]->bv_val && vals[i]->bv_len > 0) result.emplace(vals[i]->bv_val, vals[i]->bv_len); @@ -473,7 +473,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) referrals = nullptr; }); - for (std::size_t i = 0; referrals[i]; i++) + for (size_t i = 0; referrals[i]; ++i) { LOG_WARNING(&Poco::Logger::get("LDAPClient"), "Received reference during LDAP search but not following it: {}", referrals[i]); } diff --git a/src/Access/MemoryAccessStorage.cpp b/src/Access/MemoryAccessStorage.cpp index d3c99204bd3..f7989693d87 100644 --- a/src/Access/MemoryAccessStorage.cpp +++ b/src/Access/MemoryAccessStorage.cpp @@ -38,64 +38,72 @@ std::vector MemoryAccessStorage::findAllImpl(AccessEntityType type) const } -bool MemoryAccessStorage::existsImpl(const UUID & id) const +bool MemoryAccessStorage::exists(const UUID & id) const { std::lock_guard lock{mutex}; return entries_by_id.count(id); } -AccessEntityPtr MemoryAccessStorage::readImpl(const UUID & id) const +AccessEntityPtr MemoryAccessStorage::readImpl(const UUID & id, bool throw_if_not_exists) const { std::lock_guard lock{mutex}; auto it = entries_by_id.find(id); if (it == entries_by_id.end()) - throwNotFound(id); + { + if (throw_if_not_exists) + throwNotFound(id); + else + return nullptr; + } const Entry & entry = it->second; return entry.entity; } -String MemoryAccessStorage::readNameImpl(const UUID & id) const -{ - return readImpl(id)->getName(); -} - - -UUID MemoryAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists) +std::optional MemoryAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) { Notifications notifications; SCOPE_EXIT({ notify(notifications); }); UUID id = generateRandomID(); std::lock_guard lock{mutex}; - insertNoLock(id, new_entity, replace_if_exists, notifications); - return id; + if (insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, notifications)) + return id; + + return std::nullopt; } -void MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, Notifications & notifications) +bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, Notifications & notifications) { const String & name = new_entity->getName(); AccessEntityType type = new_entity->getType(); /// Check that we can insert. - auto it = entries_by_id.find(id); - if (it != entries_by_id.end()) + auto & entries_by_name = entries_by_name_and_type[static_cast(type)]; + auto it_by_name = entries_by_name.find(name); + bool name_collision = (it_by_name != entries_by_name.end()); + + if (name_collision && !replace_if_exists) { - const auto & existing_entry = it->second; + if (throw_if_exists) + throwNameCollisionCannotInsert(type, name); + else + return false; + } + + auto it_by_id = entries_by_id.find(id); + if (it_by_id != entries_by_id.end()) + { + const auto & existing_entry = it_by_id->second; throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getName()); } - auto & entries_by_name = entries_by_name_and_type[static_cast(type)]; - auto it2 = entries_by_name.find(name); - if (it2 != entries_by_name.end()) + if (name_collision && replace_if_exists) { - const auto & existing_entry = *(it2->second); - if (replace_if_exists) - removeNoLock(existing_entry.id, notifications); - else - throwNameCollisionCannotInsert(type, name); + const auto & existing_entry = *(it_by_name->second); + removeNoLock(existing_entry.id, /* throw_if_not_exists = */ false, notifications); } /// Do insertion. @@ -104,24 +112,30 @@ void MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & entry.entity = new_entity; entries_by_name[name] = &entry; prepareNotifications(entry, false, notifications); + return true; } -void MemoryAccessStorage::removeImpl(const UUID & id) +bool MemoryAccessStorage::removeImpl(const UUID & id, bool throw_if_not_exists) { Notifications notifications; SCOPE_EXIT({ notify(notifications); }); std::lock_guard lock{mutex}; - removeNoLock(id, notifications); + return removeNoLock(id, throw_if_not_exists, notifications); } -void MemoryAccessStorage::removeNoLock(const UUID & id, Notifications & notifications) +bool MemoryAccessStorage::removeNoLock(const UUID & id, bool throw_if_not_exists, Notifications & notifications) { auto it = entries_by_id.find(id); if (it == entries_by_id.end()) - throwNotFound(id); + { + if (throw_if_not_exists) + throwNotFound(id); + else + return false; + } Entry & entry = it->second; const String & name = entry.entity->getName(); @@ -133,24 +147,30 @@ void MemoryAccessStorage::removeNoLock(const UUID & id, Notifications & notifica auto & entries_by_name = entries_by_name_and_type[static_cast(type)]; entries_by_name.erase(name); entries_by_id.erase(it); + return true; } -void MemoryAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func) +bool MemoryAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) { Notifications notifications; SCOPE_EXIT({ notify(notifications); }); std::lock_guard lock{mutex}; - updateNoLock(id, update_func, notifications); + return updateNoLock(id, update_func, throw_if_not_exists, notifications); } -void MemoryAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_func, Notifications & notifications) +bool MemoryAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, Notifications & notifications) { auto it = entries_by_id.find(id); if (it == entries_by_id.end()) - throwNotFound(id); + { + if (throw_if_not_exists) + throwNotFound(id); + else + return false; + } Entry & entry = it->second; auto old_entity = entry.entity; @@ -160,7 +180,7 @@ void MemoryAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & updat throwBadCast(id, new_entity->getType(), new_entity->getName(), old_entity->getType()); if (*new_entity == *old_entity) - return; + return true; entry.entity = new_entity; @@ -176,6 +196,7 @@ void MemoryAccessStorage::updateNoLock(const UUID & id, const UpdateFunc & updat } prepareNotifications(entry, false, notifications); + return true; } @@ -235,7 +256,7 @@ void MemoryAccessStorage::setAllNoLock(const std::vector ids_to_remove = std::move(not_used_ids); boost::range::copy(conflicting_ids, std::inserter(ids_to_remove, ids_to_remove.end())); for (const auto & id : ids_to_remove) - removeNoLock(id, notifications); + removeNoLock(id, /* throw_if_not_exists = */ false, notifications); /// Insert or update entities. for (const auto & [id, entity] : all_entities) @@ -246,11 +267,16 @@ void MemoryAccessStorage::setAllNoLock(const std::vectorsecond.entity) != *entity) { const AccessEntityPtr & changed_entity = entity; - updateNoLock(id, [&changed_entity](const AccessEntityPtr &) { return changed_entity; }, notifications); + updateNoLock(id, + [&changed_entity](const AccessEntityPtr &) { return changed_entity; }, + /* throw_if_not_exists = */ true, + notifications); } } else - insertNoLock(id, entity, false, notifications); + { + insertNoLock(id, entity, /* replace_if_exists = */ false, /* throw_if_exists = */ true, notifications); + } } } @@ -304,7 +330,7 @@ scope_guard MemoryAccessStorage::subscribeForChangesImpl(const UUID & id, const } -bool MemoryAccessStorage::hasSubscriptionImpl(const UUID & id) const +bool MemoryAccessStorage::hasSubscription(const UUID & id) const { std::lock_guard lock{mutex}; auto it = entries_by_id.find(id); @@ -317,7 +343,7 @@ bool MemoryAccessStorage::hasSubscriptionImpl(const UUID & id) const } -bool MemoryAccessStorage::hasSubscriptionImpl(AccessEntityType type) const +bool MemoryAccessStorage::hasSubscription(AccessEntityType type) const { std::lock_guard lock{mutex}; const auto & handlers = handlers_by_type[static_cast(type)]; diff --git a/src/Access/MemoryAccessStorage.h b/src/Access/MemoryAccessStorage.h index ea7b0193471..3e31f155ba7 100644 --- a/src/Access/MemoryAccessStorage.h +++ b/src/Access/MemoryAccessStorage.h @@ -23,20 +23,19 @@ public: void setAll(const std::vector & all_entities); void setAll(const std::vector> & all_entities); + bool exists(const UUID & id) const override; + bool hasSubscription(const UUID & id) const override; + bool hasSubscription(AccessEntityType type) const override; + private: std::optional findImpl(AccessEntityType type, const String & name) const override; std::vector findAllImpl(AccessEntityType type) const override; - bool existsImpl(const UUID & id) const override; - AccessEntityPtr readImpl(const UUID & id) const override; - String readNameImpl(const UUID & id) const override; - bool canInsertImpl(const AccessEntityPtr &) const override { return true; } - UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override; - void removeImpl(const UUID & id) override; - void updateImpl(const UUID & id, const UpdateFunc & update_func) override; + AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; + std::optional insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override; + bool removeImpl(const UUID & id, bool throw_if_not_exists) override; + bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override; scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override; - bool hasSubscriptionImpl(const UUID & id) const override; - bool hasSubscriptionImpl(AccessEntityType type) const override; struct Entry { @@ -45,9 +44,9 @@ private: mutable std::list handlers_by_id; }; - void insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, Notifications & notifications); - void removeNoLock(const UUID & id, Notifications & notifications); - void updateNoLock(const UUID & id, const UpdateFunc & update_func, Notifications & notifications); + bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, Notifications & notifications); + bool removeNoLock(const UUID & id, bool throw_if_not_exists, Notifications & notifications); + bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, Notifications & notifications); void setAllNoLock(const std::vector> & all_entities, Notifications & notifications); void prepareNotifications(const Entry & entry, bool remove, Notifications & notifications) const; diff --git a/src/Access/MultipleAccessStorage.cpp b/src/Access/MultipleAccessStorage.cpp index 61bc84e8ab2..11540dd1b77 100644 --- a/src/Access/MultipleAccessStorage.cpp +++ b/src/Access/MultipleAccessStorage.cpp @@ -13,8 +13,8 @@ namespace DB { namespace ErrorCodes { - extern const int ACCESS_STORAGE_FOR_INSERTION_NOT_FOUND; extern const int ACCESS_ENTITY_ALREADY_EXISTS; + extern const int ACCESS_STORAGE_FOR_INSERTION_NOT_FOUND; } using Storage = IAccessStorage; @@ -129,7 +129,7 @@ std::vector MultipleAccessStorage::findAllImpl(AccessEntityType type) cons } -bool MultipleAccessStorage::existsImpl(const UUID & id) const +bool MultipleAccessStorage::exists(const UUID & id) const { return findStorage(id) != nullptr; } @@ -180,39 +180,59 @@ ConstStoragePtr MultipleAccessStorage::getStorage(const UUID & id) const return const_cast(this)->getStorage(id); } -AccessEntityPtr MultipleAccessStorage::readImpl(const UUID & id) const +AccessEntityPtr MultipleAccessStorage::readImpl(const UUID & id, bool throw_if_not_exists) const { - return getStorage(id)->read(id); + if (auto storage = findStorage(id)) + return storage->read(id, throw_if_not_exists); + + if (throw_if_not_exists) + throwNotFound(id); + else + return nullptr; } -String MultipleAccessStorage::readNameImpl(const UUID & id) const +std::optional MultipleAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const { - return getStorage(id)->readName(id); + if (auto storage = findStorage(id)) + return storage->readName(id, throw_if_not_exists); + + if (throw_if_not_exists) + throwNotFound(id); + else + return std::nullopt; } -bool MultipleAccessStorage::canInsertImpl(const AccessEntityPtr & entity) const +bool MultipleAccessStorage::isReadOnly() const { auto storages = getStoragesInternal(); for (const auto & storage : *storages) { - if (storage->canInsert(entity)) - return true; + if (!storage->isReadOnly()) + return false; } + return true; +} + + +bool MultipleAccessStorage::isReadOnly(const UUID & id) const +{ + auto storage = findStorage(id); + if (storage) + return storage->isReadOnly(id); return false; } -UUID MultipleAccessStorage::insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) +std::optional MultipleAccessStorage::insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) { - auto storages = getStoragesInternal(); - std::shared_ptr storage_for_insertion; + + auto storages = getStoragesInternal(); for (const auto & storage : *storages) { - if (storage->canInsert(entity) || - storage->find(entity->getType(), entity->getName())) + if (!storage->isReadOnly() || storage->find(entity->getType(), entity->getName())) { storage_for_insertion = storage; break; @@ -220,49 +240,73 @@ UUID MultipleAccessStorage::insertImpl(const AccessEntityPtr & entity, bool repl } if (!storage_for_insertion) - throw Exception("Not found a storage to insert " + entity->formatTypeWithName(), ErrorCodes::ACCESS_STORAGE_FOR_INSERTION_NOT_FOUND); + { + throw Exception( + ErrorCodes::ACCESS_STORAGE_FOR_INSERTION_NOT_FOUND, + "Could not insert {} because there is no writeable access storage in {}", + entity->formatTypeWithName(), + getStorageName()); + } - auto id = replace_if_exists ? storage_for_insertion->insertOrReplace(entity) : storage_for_insertion->insert(entity); - std::lock_guard lock{mutex}; - ids_cache.set(id, storage_for_insertion); + auto id = storage_for_insertion->insert(entity, replace_if_exists, throw_if_exists); + if (id) + { + std::lock_guard lock{mutex}; + ids_cache.set(*id, storage_for_insertion); + } return id; } -void MultipleAccessStorage::removeImpl(const UUID & id) +bool MultipleAccessStorage::removeImpl(const UUID & id, bool throw_if_not_exists) { - getStorage(id)->remove(id); + if (auto storage = findStorage(id)) + return storage->remove(id, throw_if_not_exists); + + if (throw_if_not_exists) + throwNotFound(id); + else + return false; } -void MultipleAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func) +bool MultipleAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) { - auto storage_for_updating = getStorage(id); + auto storage_for_updating = findStorage(id); + if (!storage_for_updating) + { + if (throw_if_not_exists) + throwNotFound(id); + else + return false; + } /// If the updating involves renaming check that the renamed entity will be accessible by name. auto storages = getStoragesInternal(); if ((storages->size() > 1) && (storages->front() != storage_for_updating)) { - auto old_entity = storage_for_updating->read(id); - auto new_entity = update_func(old_entity); - if (new_entity->getName() != old_entity->getName()) + if (auto old_entity = storage_for_updating->tryRead(id)) { - for (const auto & storage : *storages) + auto new_entity = update_func(old_entity); + if (new_entity->getName() != old_entity->getName()) { - if (storage == storage_for_updating) - break; - if (storage->find(new_entity->getType(), new_entity->getName())) + for (const auto & storage : *storages) { - throw Exception( - old_entity->formatTypeWithName() + ": cannot rename to " + backQuote(new_entity->getName()) + " because " - + new_entity->formatTypeWithName() + " already exists in " + storage->getStorageName(), - ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS); + if (storage == storage_for_updating) + break; + if (storage->find(new_entity->getType(), new_entity->getName())) + { + throw Exception( + old_entity->formatTypeWithName() + ": cannot rename to " + backQuote(new_entity->getName()) + " because " + + new_entity->formatTypeWithName() + " already exists in " + storage->getStorageName(), + ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS); + } } } } } - storage_for_updating->update(id, update_func); + return storage_for_updating->update(id, update_func, throw_if_not_exists); } @@ -275,7 +319,7 @@ scope_guard MultipleAccessStorage::subscribeForChangesImpl(const UUID & id, cons } -bool MultipleAccessStorage::hasSubscriptionImpl(const UUID & id) const +bool MultipleAccessStorage::hasSubscription(const UUID & id) const { auto storages = getStoragesInternal(); for (const auto & storage : *storages) @@ -307,7 +351,7 @@ scope_guard MultipleAccessStorage::subscribeForChangesImpl(AccessEntityType type } -bool MultipleAccessStorage::hasSubscriptionImpl(AccessEntityType type) const +bool MultipleAccessStorage::hasSubscription(AccessEntityType type) const { std::lock_guard lock{mutex}; const auto & handlers = handlers_by_type[static_cast(type)]; @@ -405,57 +449,24 @@ void MultipleAccessStorage::updateSubscriptionsToNestedStorages(std::unique_lock } -UUID MultipleAccessStorage::loginImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const +std::optional MultipleAccessStorage::authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists) const { auto storages = getStoragesInternal(); for (const auto & storage : *storages) { - try + auto id = storage->authenticate(credentials, address, external_authenticators, /* throw_if_user_not_exists = */ false); + if (id) { - auto id = storage->login(credentials, address, external_authenticators, /* replace_exception_with_cannot_authenticate = */ false); std::lock_guard lock{mutex}; - ids_cache.set(id, storage); + ids_cache.set(*id, storage); return id; } - catch (...) - { - if (!storage->find(AccessEntityType::USER, credentials.getUserName())) - { - /// The authentication failed because there no users with such name in the `storage` - /// thus we can try to search in other nested storages. - continue; - } - throw; - } } - throwNotFound(AccessEntityType::USER, credentials.getUserName()); -} - -UUID MultipleAccessStorage::getIDOfLoggedUserImpl(const String & user_name) const -{ - auto storages = getStoragesInternal(); - for (const auto & storage : *storages) - { - try - { - auto id = storage->getIDOfLoggedUser(user_name); - std::lock_guard lock{mutex}; - ids_cache.set(id, storage); - return id; - } - catch (...) - { - if (!storage->find(AccessEntityType::USER, user_name)) - { - /// The authentication failed because there no users with such name in the `storage` - /// thus we can try to search in other nested storages. - continue; - } - throw; - } - } - throwNotFound(AccessEntityType::USER, user_name); + if (throw_if_user_not_exists) + throwNotFound(AccessEntityType::USER, credentials.getUserName()); + else + return std::nullopt; } } diff --git a/src/Access/MultipleAccessStorage.h b/src/Access/MultipleAccessStorage.h index 462f97d6fa9..36c316c71f4 100644 --- a/src/Access/MultipleAccessStorage.h +++ b/src/Access/MultipleAccessStorage.h @@ -21,6 +21,8 @@ public: ~MultipleAccessStorage() override; const char * getStorageType() const override { return STORAGE_TYPE; } + bool isReadOnly() const override; + bool isReadOnly(const UUID & id) const override; void setStorages(const std::vector & storages); void addStorage(const StoragePtr & new_storage); @@ -34,22 +36,21 @@ public: ConstStoragePtr getStorage(const UUID & id) const; StoragePtr getStorage(const UUID & id); + bool exists(const UUID & id) const override; + bool hasSubscription(const UUID & id) const override; + bool hasSubscription(AccessEntityType type) const override; + protected: std::optional findImpl(AccessEntityType type, const String & name) const override; std::vector findAllImpl(AccessEntityType type) const override; - bool existsImpl(const UUID & id) const override; - AccessEntityPtr readImpl(const UUID & id) const override; - String readNameImpl(const UUID &id) const override; - bool canInsertImpl(const AccessEntityPtr & entity) const override; - UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override; - void removeImpl(const UUID & id) override; - void updateImpl(const UUID & id, const UpdateFunc & update_func) override; + AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; + std::optional readNameImpl(const UUID & id, bool throw_if_not_exists) const override; + std::optional insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override; + bool removeImpl(const UUID & id, bool throw_if_not_exists) override; + bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override; scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override; - bool hasSubscriptionImpl(const UUID & id) const override; - bool hasSubscriptionImpl(AccessEntityType type) const override; - UUID loginImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const override; - UUID getIDOfLoggedUserImpl(const String & user_name) const override; + std::optional authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists) const override; private: using Storages = std::vector; diff --git a/src/Access/ReplicatedAccessStorage.cpp b/src/Access/ReplicatedAccessStorage.cpp index 93b8a5c992a..a564fa65223 100644 --- a/src/Access/ReplicatedAccessStorage.cpp +++ b/src/Access/ReplicatedAccessStorage.cpp @@ -91,7 +91,7 @@ static void retryOnZooKeeperUserError(size_t attempts, Func && function) } } -UUID ReplicatedAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists) +std::optional ReplicatedAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) { const UUID id = generateRandomID(); const AccessEntityTypeInfo type_info = AccessEntityTypeInfo::get(new_entity->getType()); @@ -99,7 +99,11 @@ UUID ReplicatedAccessStorage::insertImpl(const AccessEntityPtr & new_entity, boo LOG_DEBUG(getLogger(), "Inserting entity of type {} named {} with id {}", type_info.name, name, toString(id)); auto zookeeper = get_zookeeper(); - retryOnZooKeeperUserError(10, [&]{ insertZooKeeper(zookeeper, id, new_entity, replace_if_exists); }); + bool ok = false; + retryOnZooKeeperUserError(10, [&]{ ok = insertZooKeeper(zookeeper, id, new_entity, replace_if_exists, throw_if_exists); }); + + if (!ok) + return std::nullopt; Notifications notifications; SCOPE_EXIT({ notify(notifications); }); @@ -109,8 +113,12 @@ UUID ReplicatedAccessStorage::insertImpl(const AccessEntityPtr & new_entity, boo } -void ReplicatedAccessStorage::insertZooKeeper( - const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists) +bool ReplicatedAccessStorage::insertZooKeeper( + const zkutil::ZooKeeperPtr & zookeeper, + const UUID & id, + const AccessEntityPtr & new_entity, + bool replace_if_exists, + bool throw_if_exists) { const String & name = new_entity->getName(); const AccessEntityType type = new_entity->getType(); @@ -131,6 +139,7 @@ void ReplicatedAccessStorage::insertZooKeeper( Coordination::Responses responses; const Coordination::Error res = zookeeper->tryMulti(ops, responses); + if (res == Coordination::Error::ZNODEEXISTS) { if (responses[0]->error == Coordination::Error::ZNODEEXISTS) @@ -166,33 +175,47 @@ void ReplicatedAccessStorage::insertZooKeeper( /// If this fails, then we'll just retry from the start. zookeeper->multi(replace_ops); + + /// Everything's fine, the new entity has been inserted instead of an existing entity. + return true; } else { - throwNameCollisionCannotInsert(type, name); + /// Couldn't insert the new entity because there is an existing entity with such name. + if (throw_if_exists) + throwNameCollisionCannotInsert(type, name); + else + return false; } } - else - { - zkutil::KeeperMultiException::check(res, ops, responses); - } + + /// If this fails, then we'll just retry from the start. + zkutil::KeeperMultiException::check(res, ops, responses); + + /// Everything's fine, the new entity has been inserted. + return true; } -void ReplicatedAccessStorage::removeImpl(const UUID & id) +bool ReplicatedAccessStorage::removeImpl(const UUID & id, bool throw_if_not_exists) { LOG_DEBUG(getLogger(), "Removing entity {}", toString(id)); auto zookeeper = get_zookeeper(); - retryOnZooKeeperUserError(10, [&] { removeZooKeeper(zookeeper, id); }); + bool ok = false; + retryOnZooKeeperUserError(10, [&] { ok = removeZooKeeper(zookeeper, id, throw_if_not_exists); }); + + if (!ok) + return false; Notifications notifications; SCOPE_EXIT({ notify(notifications); }); std::lock_guard lock{mutex}; removeEntityNoLock(id, notifications); + return true; } -void ReplicatedAccessStorage::removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id) +bool ReplicatedAccessStorage::removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, bool throw_if_not_exists) { const String entity_uuid = toString(id); const String entity_path = zookeeper_path + "/uuid/" + entity_uuid; @@ -201,7 +224,13 @@ void ReplicatedAccessStorage::removeZooKeeper(const zkutil::ZooKeeperPtr & zooke Coordination::Stat entity_stat; const bool uuid_exists = zookeeper->tryGet(entity_path, entity_definition, &entity_stat); if (!uuid_exists) - throwNotFound(id); + { + /// Couldn't remove, there is no such entity. + if (throw_if_not_exists) + throwNotFound(id); + else + return false; + } const AccessEntityPtr entity = deserializeAccessEntity(entity_definition, entity_path); const AccessEntityTypeInfo type_info = AccessEntityTypeInfo::get(entity->getType()); @@ -212,26 +241,35 @@ void ReplicatedAccessStorage::removeZooKeeper(const zkutil::ZooKeeperPtr & zooke Coordination::Requests ops; ops.emplace_back(zkutil::makeRemoveRequest(entity_path, entity_stat.version)); ops.emplace_back(zkutil::makeRemoveRequest(entity_name_path, -1)); + /// If this fails, then we'll just retry from the start. zookeeper->multi(ops); + + /// Everything's fine, the entity has been removed. + return true; } -void ReplicatedAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func) +bool ReplicatedAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) { LOG_DEBUG(getLogger(), "Updating entity {}", toString(id)); auto zookeeper = get_zookeeper(); - retryOnZooKeeperUserError(10, [&] { updateZooKeeper(zookeeper, id, update_func); }); + bool ok = false; + retryOnZooKeeperUserError(10, [&] { ok = updateZooKeeper(zookeeper, id, update_func, throw_if_not_exists); }); + + if (!ok) + return false; Notifications notifications; SCOPE_EXIT({ notify(notifications); }); std::lock_guard lock{mutex}; refreshEntityNoLock(zookeeper, id, notifications); + return true; } -void ReplicatedAccessStorage::updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func) +bool ReplicatedAccessStorage::updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) { const String entity_uuid = toString(id); const String entity_path = zookeeper_path + "/uuid/" + entity_uuid; @@ -240,7 +278,12 @@ void ReplicatedAccessStorage::updateZooKeeper(const zkutil::ZooKeeperPtr & zooke Coordination::Stat stat; const bool uuid_exists = zookeeper->tryGet(entity_path, old_entity_definition, &stat); if (!uuid_exists) - throwNotFound(id); + { + if (throw_if_not_exists) + throwNotFound(id); + else + return false; + } const AccessEntityPtr old_entity = deserializeAccessEntity(old_entity_definition, entity_path); const AccessEntityPtr new_entity = update_func(old_entity); @@ -276,7 +319,11 @@ void ReplicatedAccessStorage::updateZooKeeper(const zkutil::ZooKeeperPtr & zooke } else { + /// If this fails, then we'll just retry from the start. zkutil::KeeperMultiException::check(res, ops, responses); + + /// Everything's fine, the entity has been updated. + return true; } } @@ -525,30 +572,29 @@ std::vector ReplicatedAccessStorage::findAllImpl(AccessEntityType type) co } -bool ReplicatedAccessStorage::existsImpl(const UUID & id) const +bool ReplicatedAccessStorage::exists(const UUID & id) const { std::lock_guard lock{mutex}; return entries_by_id.count(id); } -AccessEntityPtr ReplicatedAccessStorage::readImpl(const UUID & id) const +AccessEntityPtr ReplicatedAccessStorage::readImpl(const UUID & id, bool throw_if_not_exists) const { std::lock_guard lock{mutex}; const auto it = entries_by_id.find(id); if (it == entries_by_id.end()) - throwNotFound(id); + { + if (throw_if_not_exists) + throwNotFound(id); + else + return nullptr; + } const Entry & entry = it->second; return entry.entity; } -String ReplicatedAccessStorage::readNameImpl(const UUID & id) const -{ - return readImpl(id)->getName(); -} - - void ReplicatedAccessStorage::prepareNotifications(const Entry & entry, bool remove, Notifications & notifications) const { const AccessEntityPtr entity = remove ? nullptr : entry.entity; @@ -598,7 +644,7 @@ scope_guard ReplicatedAccessStorage::subscribeForChangesImpl(const UUID & id, co } -bool ReplicatedAccessStorage::hasSubscriptionImpl(const UUID & id) const +bool ReplicatedAccessStorage::hasSubscription(const UUID & id) const { std::lock_guard lock{mutex}; const auto & it = entries_by_id.find(id); @@ -611,7 +657,7 @@ bool ReplicatedAccessStorage::hasSubscriptionImpl(const UUID & id) const } -bool ReplicatedAccessStorage::hasSubscriptionImpl(AccessEntityType type) const +bool ReplicatedAccessStorage::hasSubscription(AccessEntityType type) const { std::lock_guard lock{mutex}; const auto & handlers = handlers_by_type[static_cast(type)]; diff --git a/src/Access/ReplicatedAccessStorage.h b/src/Access/ReplicatedAccessStorage.h index 54dbfbf5b7d..8fdd24b6d54 100644 --- a/src/Access/ReplicatedAccessStorage.h +++ b/src/Access/ReplicatedAccessStorage.h @@ -32,6 +32,10 @@ public: virtual void startup(); virtual void shutdown(); + bool exists(const UUID & id) const override; + bool hasSubscription(const UUID & id) const override; + bool hasSubscription(AccessEntityType type) const override; + private: String zookeeper_path; zkutil::GetZooKeeper get_zookeeper; @@ -41,13 +45,13 @@ private: ThreadFromGlobalPool worker_thread; ConcurrentBoundedQueue refresh_queue; - UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override; - void removeImpl(const UUID & id) override; - void updateImpl(const UUID & id, const UpdateFunc & update_func) override; + std::optional insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override; + bool removeImpl(const UUID & id, bool throw_if_not_exists) override; + bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override; - void insertZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists); - void removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id); - void updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func); + bool insertZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists); + bool removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, bool throw_if_not_exists); + bool updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists); void runWorkerThread(); void resetAfterError(); @@ -71,16 +75,11 @@ private: std::optional findImpl(AccessEntityType type, const String & name) const override; std::vector findAllImpl(AccessEntityType type) const override; - bool existsImpl(const UUID & id) const override; - AccessEntityPtr readImpl(const UUID & id) const override; - String readNameImpl(const UUID & id) const override; - bool canInsertImpl(const AccessEntityPtr &) const override { return true; } + AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; void prepareNotifications(const Entry & entry, bool remove, Notifications & notifications) const; scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override; - bool hasSubscriptionImpl(const UUID & id) const override; - bool hasSubscriptionImpl(AccessEntityType type) const override; mutable std::mutex mutex; std::unordered_map entries_by_id; diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index be59fc13136..9d21ba91f5d 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes extern const int READONLY; extern const int QUERY_IS_PROHIBITED; extern const int SETTING_CONSTRAINT_VIOLATION; + extern const int UNKNOWN_SETTING; } @@ -200,7 +201,23 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh }; if (reaction == THROW_ON_VIOLATION) - access_control->checkSettingNameIsAllowed(setting_name); + { + try + { + access_control->checkSettingNameIsAllowed(setting_name); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + { + if (const auto hints = current_settings.getHints(change.name); !hints.empty()) + { + e.addMessage(fmt::format("Maybe you meant {}", toString(hints))); + } + } + throw; + } + } else if (!access_control->isSettingNameAllowed(setting_name)) return false; diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 7b4ff2d3296..5bd2da97445 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -610,41 +610,21 @@ std::vector UsersConfigAccessStorage::findAllImpl(AccessEntityType type) c } -bool UsersConfigAccessStorage::existsImpl(const UUID & id) const +bool UsersConfigAccessStorage::exists(const UUID & id) const { return memory_storage.exists(id); } -AccessEntityPtr UsersConfigAccessStorage::readImpl(const UUID & id) const +AccessEntityPtr UsersConfigAccessStorage::readImpl(const UUID & id, bool throw_if_not_exists) const { - return memory_storage.read(id); + return memory_storage.read(id, throw_if_not_exists); } -String UsersConfigAccessStorage::readNameImpl(const UUID & id) const +std::optional UsersConfigAccessStorage::readNameImpl(const UUID & id, bool throw_if_not_exists) const { - return memory_storage.readName(id); -} - - -UUID UsersConfigAccessStorage::insertImpl(const AccessEntityPtr & entity, bool) -{ - throwReadonlyCannotInsert(entity->getType(), entity->getName()); -} - - -void UsersConfigAccessStorage::removeImpl(const UUID & id) -{ - auto entity = read(id); - throwReadonlyCannotRemove(entity->getType(), entity->getName()); -} - - -void UsersConfigAccessStorage::updateImpl(const UUID & id, const UpdateFunc &) -{ - auto entity = read(id); - throwReadonlyCannotUpdate(entity->getType(), entity->getName()); + return memory_storage.readName(id, throw_if_not_exists); } @@ -660,13 +640,13 @@ scope_guard UsersConfigAccessStorage::subscribeForChangesImpl(AccessEntityType t } -bool UsersConfigAccessStorage::hasSubscriptionImpl(const UUID & id) const +bool UsersConfigAccessStorage::hasSubscription(const UUID & id) const { return memory_storage.hasSubscription(id); } -bool UsersConfigAccessStorage::hasSubscriptionImpl(AccessEntityType type) const +bool UsersConfigAccessStorage::hasSubscription(AccessEntityType type) const { return memory_storage.hasSubscription(type); } diff --git a/src/Access/UsersConfigAccessStorage.h b/src/Access/UsersConfigAccessStorage.h index 8f87e5ad928..470990f0706 100644 --- a/src/Access/UsersConfigAccessStorage.h +++ b/src/Access/UsersConfigAccessStorage.h @@ -27,6 +27,7 @@ public: const char * getStorageType() const override { return STORAGE_TYPE; } String getStorageParamsJSON() const override; + bool isReadOnly() const override { return true; } String getPath() const; bool isPathEqual(const String & path_) const; @@ -41,22 +42,19 @@ public: void startPeriodicReloading(); void stopPeriodicReloading(); + bool exists(const UUID & id) const override; + bool hasSubscription(const UUID & id) const override; + bool hasSubscription(AccessEntityType type) const override; + private: void parseFromConfig(const Poco::Util::AbstractConfiguration & config); std::optional findImpl(AccessEntityType type, const String & name) const override; std::vector findAllImpl(AccessEntityType type) const override; - bool existsImpl(const UUID & id) const override; - AccessEntityPtr readImpl(const UUID & id) const override; - String readNameImpl(const UUID & id) const override; - bool canInsertImpl(const AccessEntityPtr &) const override { return false; } - UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override; - void removeImpl(const UUID & id) override; - void updateImpl(const UUID & id, const UpdateFunc & update_func) override; + AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; + std::optional readNameImpl(const UUID & id, bool throw_if_not_exists) const override; scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override; scope_guard subscribeForChangesImpl(AccessEntityType type, const OnChangedHandler & handler) const override; - bool hasSubscriptionImpl(const UUID & id) const override; - bool hasSubscriptionImpl(AccessEntityType type) const override; MemoryAccessStorage memory_storage; CheckSettingNameFunction check_setting_name_function; diff --git a/src/AggregateFunctions/AggregateFunctionForEach.h b/src/AggregateFunctions/AggregateFunctionForEach.h index 0de6272d23e..064b7b00c86 100644 --- a/src/AggregateFunctions/AggregateFunctionForEach.h +++ b/src/AggregateFunctions/AggregateFunctionForEach.h @@ -90,7 +90,7 @@ private: throw; } - for (i = 0; i < old_size; i++) + for (i = 0; i < old_size; ++i) { nested_func->merge(&new_state[i * nested_size_of_data], &old_state[i * nested_size_of_data], diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h index 77dcede4c20..94d64d47b51 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h @@ -54,6 +54,8 @@ public: template class AggregateFunctionBitmapL2 final : public IAggregateFunctionDataHelper> { +private: + static constexpr auto STATE_VERSION_1_MIN_REVISION = 54455; public: AggregateFunctionBitmapL2(const DataTypePtr & type) : IAggregateFunctionDataHelper>({type}, {}) @@ -105,9 +107,38 @@ public: } } - void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { this->data(place).rbs.write(buf); } + bool isVersioned() const override { return true; } - void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { this->data(place).rbs.read(buf); } + size_t getDefaultVersion() const override { return 1; } + + size_t getVersionFromRevision(size_t revision) const override + { + if (revision >= STATE_VERSION_1_MIN_REVISION) + return 1; + else + return 0; + } + + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional version) const override + { + if (!version) + version = getDefaultVersion(); + + if (*version >= 1) + DB::writeBoolText(this->data(place).init, buf); + + this->data(place).rbs.write(buf); + } + + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional version, Arena *) const override + { + if (!version) + version = getDefaultVersion(); + + if (*version >= 1) + DB::readBoolText(this->data(place).init, buf); + this->data(place).rbs.read(buf); + } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h index 878cbc3219f..eee91904b9b 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h @@ -421,6 +421,9 @@ public: */ UInt8 rb_contains(UInt64 x) const { + if (!std::is_same_v && x > rb_max()) + return 0; + if (isSmall()) return small.find(x) != small.end(); else @@ -432,6 +435,9 @@ public: */ void rb_remove(UInt64 x) { + if (!std::is_same_v && x > rb_max()) + return; + if (isSmall()) toLarge(); diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h index 665e505aa4e..b858c6b628c 100644 --- a/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -271,7 +271,7 @@ public: { lower_bound = std::min(lower_bound, other.lower_bound); upper_bound = std::max(upper_bound, other.upper_bound); - for (size_t i = 0; i < other.size; i++) + for (size_t i = 0; i < other.size; ++i) add(other.points[i].mean, other.points[i].weight, max_bins); } diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp index 4ac6a2dce21..d752900c018 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.cpp +++ b/src/AggregateFunctions/AggregateFunctionIf.cpp @@ -56,7 +56,7 @@ static bool ALWAYS_INLINE inline is_all_zeros(const UInt8 * flags, size_t size) i += 8; } - for (; i < size; i++) + for (; i < size; ++i) if (flags[i]) return false; diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp index c3b80cae080..801f3d5e28d 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.cpp +++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp @@ -7,18 +7,20 @@ #include -#define TOP_K_MAX_SIZE 0xFFFFFF +static inline constexpr UInt64 TOP_K_MAX_SIZE = 0xFFFFFF; namespace DB { + struct Settings; namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ARGUMENT_OUT_OF_BOUND; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -42,19 +44,22 @@ class AggregateFunctionTopKDateTime : public AggregateFunctionTopK -static IAggregateFunction * createWithExtraTypes(const DataTypePtr & argument_type, UInt64 threshold, UInt64 load_factor, const Array & params) +static IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 load_factor, const Array & params) { - WhichDataType which(argument_type); + if (argument_types.empty()) + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Got empty arguments list"); + + WhichDataType which(argument_types[0]); if (which.idx == TypeIndex::Date) - return new AggregateFunctionTopKDate(threshold, load_factor, {argument_type}, params); + return new AggregateFunctionTopKDate(threshold, load_factor, argument_types, params); if (which.idx == TypeIndex::DateTime) - return new AggregateFunctionTopKDateTime(threshold, load_factor, {argument_type}, params); + return new AggregateFunctionTopKDateTime(threshold, load_factor, argument_types, params); /// Check that we can use plain version of AggregateFunctionTopKGeneric - if (argument_type->isValueUnambiguouslyRepresentedInContiguousMemoryRegion()) - return new AggregateFunctionTopKGeneric(threshold, load_factor, argument_type, params); + if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion()) + return new AggregateFunctionTopKGeneric(threshold, load_factor, argument_types, params); else - return new AggregateFunctionTopKGeneric(threshold, load_factor, argument_type, params); + return new AggregateFunctionTopKGeneric(threshold, load_factor, argument_types, params); } @@ -78,40 +83,37 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const if (!params.empty()) { if (params.size() > 2) - throw Exception("Aggregate function " + name + " requires two parameters or less.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Aggregate function '{}' requires two parameters or less", name); - UInt64 k = applyVisitor(FieldVisitorConvertToNumber(), params[0]); if (params.size() == 2) { load_factor = applyVisitor(FieldVisitorConvertToNumber(), params[1]); if (load_factor < 1) - throw Exception("Too small parameter 'load_factor' for aggregate function " + name + ". Minimum: 1", - ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Too small parameter 'load_factor' for aggregate function '{}' (got {}, minimum is 1)", name, load_factor); } - if (k > TOP_K_MAX_SIZE || load_factor > TOP_K_MAX_SIZE || k * load_factor > TOP_K_MAX_SIZE) - throw Exception("Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(TOP_K_MAX_SIZE), - ErrorCodes::ARGUMENT_OUT_OF_BOUND); + threshold = applyVisitor(FieldVisitorConvertToNumber(), params[0]); - if (k == 0) - throw Exception("Parameter 0 is illegal for aggregate function " + name, - ErrorCodes::ARGUMENT_OUT_OF_BOUND); + if (threshold > TOP_K_MAX_SIZE || load_factor > TOP_K_MAX_SIZE || threshold * load_factor > TOP_K_MAX_SIZE) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Too large parameter(s) for aggregate function '{}' (maximum is {})", name, toString(TOP_K_MAX_SIZE)); - threshold = k; + if (threshold == 0) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Parameter 0 is illegal for aggregate function '{}'", name); } AggregateFunctionPtr res(createWithNumericType( *argument_types[0], threshold, load_factor, argument_types, params)); if (!res) - res = AggregateFunctionPtr(createWithExtraTypes(argument_types[0], threshold, load_factor, params)); + res = AggregateFunctionPtr(createWithExtraTypes(argument_types, threshold, load_factor, params)); if (!res) - throw Exception("Illegal type " + argument_types[0]->getName() + - " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument for aggregate function '{}'", argument_types[0]->getName(), name); return res; } diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h index eb84288a1ae..98774254695 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/src/AggregateFunctions/AggregateFunctionTopK.h @@ -132,8 +132,8 @@ private: public: AggregateFunctionTopKGeneric( - UInt64 threshold_, UInt64 load_factor, const DataTypePtr & input_data_type_, const Array & params) - : IAggregateFunctionDataHelper>({input_data_type_}, params) + UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params) + : IAggregateFunctionDataHelper>(argument_types_, params) , threshold(threshold_), reserved(load_factor * threshold), input_data_type(this->argument_types[0]) {} String getName() const override { return is_weighted ? "topKWeighted" : "topK"; } diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 1a27f036af7..fd2100cc334 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -181,6 +182,13 @@ public: Arena * arena, ssize_t if_argument_pos = -1) const = 0; + /// The version of "addBatch", that handle sparse columns as arguments. + virtual void addBatchSparse( + AggregateDataPtr * places, + size_t place_offset, + const IColumn ** columns, + Arena * arena) const = 0; + virtual void mergeBatch( size_t batch_size, AggregateDataPtr * places, @@ -193,6 +201,10 @@ public: virtual void addBatchSinglePlace( size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos = -1) const = 0; + /// The version of "addBatchSinglePlace", that handle sparse columns as arguments. + virtual void addBatchSparseSinglePlace( + AggregateDataPtr place, const IColumn ** columns, Arena * arena) const = 0; + /** The same for single place when need to aggregate only filtered data. * Instead of using an if-column, the condition is combined inside the null_map */ @@ -367,6 +379,22 @@ public: } } + void addBatchSparse( + AggregateDataPtr * places, + size_t place_offset, + const IColumn ** columns, + Arena * arena) const override + { + const auto & column_sparse = assert_cast(*columns[0]); + const auto * values = &column_sparse.getValuesColumn(); + size_t batch_size = column_sparse.size(); + auto offset_it = column_sparse.begin(); + + for (size_t i = 0; i < batch_size; ++i, ++offset_it) + static_cast(this)->add(places[offset_it.getCurrentRow()] + place_offset, + &values, offset_it.getValueIndex(), arena); + } + void mergeBatch( size_t batch_size, AggregateDataPtr * places, @@ -398,6 +426,19 @@ public: } } + void addBatchSparseSinglePlace( + AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override + { + /// TODO: add values and defaults separately if order of adding isn't important. + const auto & column_sparse = assert_cast(*columns[0]); + const auto * values = &column_sparse.getValuesColumn(); + size_t batch_size = column_sparse.size(); + auto offset_it = column_sparse.begin(); + + for (size_t i = 0; i < batch_size; ++i, ++offset_it) + static_cast(this)->add(place, &values, offset_it.getValueIndex(), arena); + } + void addBatchSinglePlaceNotNull( size_t batch_size, AggregateDataPtr place, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bca9bd9d280..7124961821e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -107,7 +107,7 @@ if (USE_AWS_S3) endif() if (USE_AZURE_BLOB_STORAGE) - add_headers_and_sources(dbms Disks/BlobStorage) + add_headers_and_sources(dbms Disks/AzureBlobStorage) endif() if (USE_HDFS) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 58bc239f003..75e0588f786 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -4,10 +4,12 @@ #include #include #include +#include +#include #include -#include -#include +#include +#include #include #include #include "Common/Exception.h" @@ -52,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -105,6 +108,99 @@ namespace ProfileEvents namespace DB { +static void incrementProfileEventsBlock(Block & dst, const Block & src) +{ + if (!dst) + { + dst = src; + return; + } + + assertBlocksHaveEqualStructure(src, dst, "ProfileEvents"); + + std::unordered_map name_pos; + for (size_t i = 0; i < dst.columns(); ++i) + name_pos[dst.getByPosition(i).name] = i; + + size_t dst_rows = dst.rows(); + MutableColumns mutable_columns = dst.mutateColumns(); + + auto & dst_column_host_name = typeid_cast(*mutable_columns[name_pos["host_name"]]); + auto & dst_array_current_time = typeid_cast(*mutable_columns[name_pos["current_time"]]).getData(); + auto & dst_array_thread_id = typeid_cast(*mutable_columns[name_pos["thread_id"]]).getData(); + auto & dst_array_type = typeid_cast(*mutable_columns[name_pos["type"]]).getData(); + auto & dst_column_name = typeid_cast(*mutable_columns[name_pos["name"]]); + auto & dst_array_value = typeid_cast(*mutable_columns[name_pos["value"]]).getData(); + + const auto & src_column_host_name = typeid_cast(*src.getByName("host_name").column); + const auto & src_array_current_time = typeid_cast(*src.getByName("current_time").column).getData(); + const auto & src_array_thread_id = typeid_cast(*src.getByName("thread_id").column).getData(); + const auto & src_column_name = typeid_cast(*src.getByName("name").column); + const auto & src_array_value = typeid_cast(*src.getByName("value").column).getData(); + + struct Id + { + StringRef name; + StringRef host_name; + UInt64 thread_id; + + bool operator<(const Id & rhs) const + { + return std::tie(name, host_name, thread_id) + < std::tie(rhs.name, rhs.host_name, rhs.thread_id); + } + }; + std::map rows_by_name; + for (size_t src_row = 0; src_row < src.rows(); ++src_row) + { + Id id{ + src_column_name.getDataAt(src_row), + src_column_host_name.getDataAt(src_row), + src_array_thread_id[src_row], + }; + rows_by_name[id] = src_row; + } + + /// Merge src into dst. + for (size_t dst_row = 0; dst_row < dst_rows; ++dst_row) + { + Id id{ + dst_column_name.getDataAt(dst_row), + dst_column_host_name.getDataAt(dst_row), + dst_array_thread_id[dst_row], + }; + + if (auto it = rows_by_name.find(id); it != rows_by_name.end()) + { + size_t src_row = it->second; + dst_array_current_time[dst_row] = src_array_current_time[src_row]; + + switch (dst_array_type[dst_row]) + { + case ProfileEvents::Type::INCREMENT: + dst_array_value[dst_row] += src_array_value[src_row]; + break; + case ProfileEvents::Type::GAUGE: + dst_array_value[dst_row] = src_array_value[src_row]; + break; + } + + rows_by_name.erase(it); + } + } + + /// Copy rows from src that dst does not contains. + for (const auto & [id, pos] : rows_by_name) + { + for (size_t col = 0; col < src.columns(); ++col) + { + mutable_columns[col]->insert((*src.getByPosition(col).column)[pos]); + } + } + + dst.setColumns(std::move(mutable_columns)); +} + std::atomic_flag exit_on_signal = ATOMIC_FLAG_INIT; @@ -465,7 +561,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) try { - processParsedSingleQuery(full_query, query_to_execute, parsed_query); + processParsedSingleQuery(full_query, query_to_execute, parsed_query, echo_queries); } catch (Exception & e) { @@ -753,7 +849,7 @@ void ClientBase::onProfileEvents(Block & block) } else { - profile_events.last_block = block; + incrementProfileEventsBlock(profile_events.last_block, block); } } profile_events.watch.restart(); @@ -1635,7 +1731,13 @@ void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, /// Check unrecognized options without positional options. auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional); if (!unrecognized_options.empty()) + { + auto hints = this->getHints(unrecognized_options[0]); + if (!hints.empty()) + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'. Maybe you meant {}", unrecognized_options[0], toString(hints)); + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); + } /// Check positional options (options after ' -- ', ex: clickhouse-client -- ). unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); @@ -1713,6 +1815,25 @@ void ClientBase::init(int argc, char ** argv) ; addOptions(options_description); + + auto getter = [](const auto & op) + { + String op_long_name = op->long_name(); + return "--" + String(op_long_name); + }; + + if (options_description.main_description) + { + const auto & main_options = options_description.main_description->options(); + std::transform(main_options.begin(), main_options.end(), std::back_inserter(cmd_options), getter); + } + + if (options_description.external_description) + { + const auto & external_options = options_description.external_description->options(); + std::transform(external_options.begin(), external_options.end(), std::back_inserter(cmd_options), getter); + } + parseAndCheckOptions(options_description, options, common_arguments); po::notify(options); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 4c5d29b390b..1926df5afea 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -1,5 +1,6 @@ #pragma once +#include "Common/NamePrompter.h" #include #include #include @@ -37,7 +38,7 @@ void interruptSignalHandler(int signum); class InternalTextLogs; -class ClientBase : public Poco::Util::Application +class ClientBase : public Poco::Util::Application, public IHints<2, ClientBase> { public: @@ -48,6 +49,8 @@ public: void init(int argc, char ** argv); + std::vector getAllRegisteredNames() const override { return cmd_options; } + protected: void runInteractive(); void runNonInteractive(); @@ -145,6 +148,7 @@ protected: std::vector queries_files; /// If not empty, queries will be read from these files std::vector interleave_queries_files; /// If not empty, run queries from these files before processing every file from 'queries_files'. + std::vector cmd_options; bool stdin_is_a_tty = false; /// stdin is a terminal. bool stdout_is_a_tty = false; /// stdout is a terminal. diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index e1c1481c5b4..3a5d4f4cf33 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -1,8 +1,8 @@ #include "ClientBaseHelpers.h" -#include -#include +#include +#include #include #include diff --git a/src/Client/IConnections.cpp b/src/Client/IConnections.cpp index dc57cae61a4..1ab7ba6e5d8 100644 --- a/src/Client/IConnections.cpp +++ b/src/Client/IConnections.cpp @@ -25,7 +25,12 @@ struct PocoSocketWrapper : public Poco::Net::SocketImpl void IConnections::DrainCallback::operator()(int fd, Poco::Timespan, const std::string fd_description) const { if (!PocoSocketWrapper(fd).poll(drain_timeout, Poco::Net::Socket::SELECT_READ)) - throw Exception(ErrorCodes::SOCKET_TIMEOUT, "Read timeout while draining from {}", fd_description); + { + throw Exception(ErrorCodes::SOCKET_TIMEOUT, + "Read timeout ({} ms) while draining from {}", + drain_timeout.totalMilliseconds(), + fd_description); + } } } diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 9eaa9ce883a..8ad853950b2 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -214,15 +214,15 @@ bool LocalConnection::poll(size_t) if (next_packet_type) return true; - if (send_progress && (state->after_send_progress.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) - { - state->after_send_progress.restart(); - next_packet_type = Protocol::Server::Progress; - return true; - } - if (!state->is_finished) { + if (send_progress && (state->after_send_progress.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) + { + state->after_send_progress.restart(); + next_packet_type = Protocol::Server::Progress; + return true; + } + try { pollImpl(); @@ -282,6 +282,18 @@ bool LocalConnection::poll(size_t) } } + if (state->is_finished && !state->sent_profile_info) + { + state->sent_profile_info = true; + + if (state->executor) + { + next_packet_type = Protocol::Server::ProfileInfo; + state->profile_info = state->executor->getProfileInfo(); + return true; + } + } + if (state->is_finished) { finishQuery(); @@ -349,6 +361,16 @@ Packet LocalConnection::receivePacket() next_packet_type.reset(); break; } + case Protocol::Server::ProfileInfo: + { + if (state->profile_info) + { + packet.profile_info = std::move(*state->profile_info); + state->profile_info.reset(); + } + next_packet_type.reset(); + break; + } case Protocol::Server::TableColumns: { if (state->columns_description) diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index fbd054506e7..92c2af30c80 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -35,6 +35,7 @@ struct LocalQueryState /// Current block to be sent next. std::optional block; std::optional columns_description; + std::optional profile_info; /// Is request cancelled bool is_cancelled = false; @@ -43,6 +44,7 @@ struct LocalQueryState bool sent_totals = false; bool sent_extremes = false; bool sent_progress = false; + bool sent_profile_info = false; /// To output progress, the difference after the previous sending of progress. Progress progress; diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index c3000443a9c..37a372dfb45 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -395,17 +395,17 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead read_list.push_back(*connection->socket); } + auto timeout = is_draining ? drain_timeout : receive_timeout; int n = Poco::Net::Socket::select( read_list, write_list, except_list, - is_draining ? drain_timeout : receive_timeout); + timeout); /// We treat any error as timeout for simplicity. /// And we also check if read_list is still empty just in case. if (n <= 0 || read_list.empty()) { - auto err_msg = fmt::format("Timeout exceeded while reading from {}", dumpAddressesUnlocked()); for (ReplicaState & state : replica_states) { Connection * connection = state.connection; @@ -415,7 +415,10 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead invalidateReplica(state); } } - throw Exception(err_msg, ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, + "Timeout ({} ms) exceeded while reading from {}", + timeout.totalMilliseconds(), + dumpAddressesUnlocked()); } } diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index b5efff928bb..cfc4f74ac26 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -133,6 +133,11 @@ public: void get(size_t n, Field & res) const override; + bool isDefaultAt(size_t) const override + { + throw Exception("Method isDefaultAt is not supported for ColumnAggregateFunction", ErrorCodes::NOT_IMPLEMENTED); + } + StringRef getDataAt(size_t n) const override; void insertData(const char * pos, size_t length) override; @@ -208,6 +213,16 @@ public: throw Exception("Method hasEqualValues is not supported for ColumnAggregateFunction", ErrorCodes::NOT_IMPLEMENTED); } + double getRatioOfDefaultRows(double) const override + { + throw Exception("Method getRatioOfDefaultRows is not supported for ColumnAggregateFunction", ErrorCodes::NOT_IMPLEMENTED); + } + + void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override + { + throw Exception("Method getIndicesOfNonDefaultRows is not supported for ColumnAggregateFunction", ErrorCodes::NOT_IMPLEMENTED); + } + void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 5e40b89cc7e..929c0153a0a 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -182,6 +182,13 @@ StringRef ColumnArray::getDataAt(size_t n) const } +bool ColumnArray::isDefaultAt(size_t n) const +{ + const auto & offsets_data = getOffsets(); + return offsets_data[n] == offsets_data[static_cast(n) - 1]; +} + + void ColumnArray::insertData(const char * pos, size_t length) { /** Similarly - only for arrays of fixed length values. @@ -576,7 +583,8 @@ void ColumnArray::expand(const IColumn::Filter & mask, bool inverted) } if (from != -1) - throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);} + throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR); +} template ColumnPtr ColumnArray::filterNumber(const Filter & filt, ssize_t result_size_hint) const @@ -868,6 +876,16 @@ ColumnPtr ColumnArray::compress() const }); } +double ColumnArray::getRatioOfDefaultRows(double sample_ratio) const +{ + return getRatioOfDefaultRowsImpl(sample_ratio); +} + +void ColumnArray::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const +{ + return getIndicesOfNonDefaultRowsImpl(indices, from, limit); +} + ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const { diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 48c54eca319..4d15e123770 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -60,6 +60,7 @@ public: Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; StringRef getDataAt(size_t n) const override; + bool isDefaultAt(size_t n) const override; void insertData(const char * pos, size_t length) override; StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; const char * deserializeAndInsertFromArena(const char * pos) override; @@ -143,6 +144,10 @@ public: return false; } + double getRatioOfDefaultRows(double sample_ratio) const override; + + void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override; + bool isCollationSupported() const override { return getData().isCollationSupported(); } private: diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index 92bdca7cd72..e7bbed53b8d 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -82,6 +82,7 @@ public: Field operator[](size_t) const override { throwMustBeDecompressed(); } void get(size_t, Field &) const override { throwMustBeDecompressed(); } StringRef getDataAt(size_t) const override { throwMustBeDecompressed(); } + bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); } void insert(const Field &) override { throwMustBeDecompressed(); } void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } void insertData(const char *, size_t) override { throwMustBeDecompressed(); } @@ -113,6 +114,8 @@ public: void gather(ColumnGathererStream &) override { throwMustBeDecompressed(); } void getExtremes(Field &, Field &) const override { throwMustBeDecompressed(); } size_t byteSizeAt(size_t) const override { throwMustBeDecompressed(); } + double getRatioOfDefaultRows(double) const override { throwMustBeDecompressed(); } + void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeDecompressed(); } protected: size_t rows; diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 1faf2a999b2..1ddc8789e7d 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -115,6 +116,11 @@ public: return data->getFloat32(0); } + bool isDefaultAt(size_t) const override + { + return data->isDefaultAt(0); + } + bool isNullAt(size_t) const override { return data->isNullAt(0); @@ -239,6 +245,27 @@ public: return false; } + double getRatioOfDefaultRows(double) const override + { + return data->isDefaultAt(0) ? 1.0 : 0.0; + } + + void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override + { + if (!data->isDefaultAt(0)) + { + size_t to = limit && from + limit < size() ? from + limit : size(); + indices.reserve(indices.size() + to - from); + for (size_t i = from; i < to; ++i) + indices.push_back(i); + } + } + + SerializationInfoPtr getSerializationInfo() const override + { + return data->getSerializationInfo(); + } + bool isNullable() const override { return isColumnNullable(*data); } bool onlyNull() const override { return data->isNullAt(0); } bool isNumeric() const override { return data->isNumeric(); } diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index 70e2b4a6d96..99085f0f976 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -331,7 +331,8 @@ void ColumnDecimal::gather(ColumnGathererStream & gatherer) template ColumnPtr ColumnDecimal::compress() const { - size_t source_size = data.size() * sizeof(T); + const size_t data_size = data.size(); + const size_t source_size = data_size * sizeof(T); /// Don't compress small blocks. if (source_size < 4096) /// A wild guess. @@ -342,8 +343,9 @@ ColumnPtr ColumnDecimal::compress() const if (!compressed) return ColumnCompressed::wrap(this->getPtr()); - return ColumnCompressed::create(data.size(), compressed->size(), - [compressed = std::move(compressed), column_size = data.size(), scale = this->scale] + const size_t compressed_size = compressed->size(); + return ColumnCompressed::create(data_size, compressed_size, + [compressed = std::move(compressed), column_size = data_size, scale = this->scale] { auto res = ColumnDecimal::create(column_size, scale); ColumnCompressed::decompressBuffer( diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index ee13c7e7b2b..18d4526e0f3 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -177,8 +177,17 @@ public: return false; } - ColumnPtr compress() const override; + double getRatioOfDefaultRows(double sample_ratio) const override + { + return this->template getRatioOfDefaultRowsImpl(sample_ratio); + } + void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override + { + return this->template getIndicesOfNonDefaultRowsImpl(indices, from, limit); + } + + ColumnPtr compress() const override; void insertValue(const T value) { data.push_back(value); } Container & getData() { return data; } diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 1eb2d4d5b1f..0828f8ebd89 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -51,6 +51,12 @@ MutableColumnPtr ColumnFixedString::cloneResized(size_t size) const return new_col_holder; } +bool ColumnFixedString::isDefaultAt(size_t index) const +{ + assert(index < size()); + return memoryIsZero(chars.data() + index * n, n); +} + void ColumnFixedString::insert(const Field & x) { const String & s = DB::get(x); @@ -409,9 +415,9 @@ ColumnPtr ColumnFixedString::compress() const if (!compressed) return ColumnCompressed::wrap(this->getPtr()); - size_t column_size = size(); - - return ColumnCompressed::create(column_size, compressed->size(), + const size_t column_size = size(); + const size_t compressed_size = compressed->size(); + return ColumnCompressed::create(column_size, compressed_size, [compressed = std::move(compressed), column_size, n = n] { size_t chars_size = n * column_size; diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 4a66a429d96..f813ef47f21 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -88,6 +88,8 @@ public: return StringRef(&chars[n * index], n); } + bool isDefaultAt(size_t index) const override; + void insert(const Field & x) override; void insertFrom(const IColumn & src_, size_t index) override; @@ -173,6 +175,11 @@ public: chars.reserve(n * size); } + void resize(size_t size) + { + chars.resize(n * size); + } + void getExtremes(Field & min, Field & max) const override; bool structureEquals(const IColumn & rhs) const override @@ -182,6 +189,16 @@ public: return false; } + double getRatioOfDefaultRows(double sample_ratio) const override + { + return getRatioOfDefaultRowsImpl(sample_ratio); + } + + void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override + { + return getIndicesOfNonDefaultRowsImpl(indices, from, limit); + } + bool canBeInsideNullable() const override { return true; } bool isFixedAndContiguous() const override { return true; } diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 2354a4f0cb3..2592dc01f98 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -24,7 +24,12 @@ class ColumnFunction final : public COWHelper private: friend class COWHelper; - ColumnFunction(size_t size, FunctionBasePtr function_, const ColumnsWithTypeAndName & columns_to_capture, bool is_short_circuit_argument_ = false, bool is_function_compiled_ = false); + ColumnFunction( + size_t size, + FunctionBasePtr function_, + const ColumnsWithTypeAndName & columns_to_capture, + bool is_short_circuit_argument_ = false, + bool is_function_compiled_ = false); public: const char * getFamilyName() const override { return "Function"; } @@ -68,6 +73,11 @@ public: throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + bool isDefaultAt(size_t) const override + { + throw Exception("isDefaultAt is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + void insert(const Field &) override { throw Exception("Cannot insert into " + getName(), ErrorCodes::NOT_IMPLEMENTED); @@ -153,6 +163,16 @@ public: throw Exception("Method gather is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + double getRatioOfDefaultRows(double) const override + { + throw Exception("Method getRatioOfDefaultRows is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + + void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override + { + throw Exception("Method getIndicesOfNonDefaultRows is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + bool isShortCircuitArgument() const { return is_short_circuit_argument; } DataTypePtr getResultType() const; diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 13957357637..0baed4cfb2d 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -64,6 +64,7 @@ public: return getDictionary().getDataAtWithTerminatingZero(getIndexes().getUInt(n)); } + bool isDefaultAt(size_t n) const override { return getDictionary().isDefaultAt(getIndexes().getUInt(n)); } UInt64 get64(size_t n) const override { return getDictionary().get64(getIndexes().getUInt(n)); } UInt64 getUInt(size_t n) const override { return getDictionary().getUInt(getIndexes().getUInt(n)); } Int64 getInt(size_t n) const override { return getDictionary().getInt(getIndexes().getUInt(n)); } @@ -180,6 +181,16 @@ public: return false; } + double getRatioOfDefaultRows(double sample_ratio) const override + { + return getIndexes().getRatioOfDefaultRows(sample_ratio); + } + + void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override + { + return getIndexes().getIndicesOfNonDefaultRows(indices, from, limit); + } + bool valuesHaveFixedSize() const override { return getDictionary().valuesHaveFixedSize(); } bool isFixedAndContiguous() const override { return false; } size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); } diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 99d965a4ec1..e595525d9e8 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -81,6 +81,11 @@ void ColumnMap::get(size_t n, Field & res) const getNestedData().get(offset + i, map[i]); } +bool ColumnMap::isDefaultAt(size_t n) const +{ + return nested->isDefaultAt(n); +} + StringRef ColumnMap::getDataAt(size_t) const { throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); @@ -273,6 +278,16 @@ bool ColumnMap::structureEquals(const IColumn & rhs) const return false; } +double ColumnMap::getRatioOfDefaultRows(double sample_ratio) const +{ + return getRatioOfDefaultRowsImpl(sample_ratio); +} + +void ColumnMap::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const +{ + return getIndicesOfNonDefaultRowsImpl(indices, from, limit); +} + ColumnPtr ColumnMap::compress() const { auto compressed = nested->compress(); diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index aa13bfd3d68..fb69541c363 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -51,6 +51,7 @@ public: Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; + bool isDefaultAt(size_t n) const override; StringRef getDataAt(size_t n) const override; void insertData(const char * pos, size_t length) override; void insert(const Field & x) override; @@ -85,6 +86,8 @@ public: void protect() override; void forEachSubcolumn(ColumnCallback callback) override; bool structureEquals(const IColumn & rhs) const override; + double getRatioOfDefaultRows(double sample_ratio) const override; + void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override; const ColumnArray & getNestedColumn() const { return assert_cast(*nested); } ColumnArray & getNestedColumn() { return assert_cast(*nested); } diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 4b3340a6b50..7a4ce8288a3 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -648,6 +648,29 @@ void ColumnNullable::checkConsistency() const ErrorCodes::SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT); } +ColumnPtr ColumnNullable::createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const +{ + ColumnPtr new_values; + ColumnPtr new_null_map; + + if (default_field.getType() == Field::Types::Null) + { + auto default_column = nested_column->cloneEmpty(); + default_column->insertDefault(); + + /// Value in main column, when null map is 1 is implementation defined. So, take any value. + new_values = nested_column->createWithOffsets(offsets, (*default_column)[0], total_rows, shift); + new_null_map = null_map->createWithOffsets(offsets, Field(1u), total_rows, shift); + } + else + { + new_values = nested_column->createWithOffsets(offsets, default_field, total_rows, shift); + new_null_map = null_map->createWithOffsets(offsets, Field(0u), total_rows, shift); + } + + return ColumnNullable::create(new_values, new_null_map); +} + ColumnPtr makeNullable(const ColumnPtr & column) { if (isColumnNullable(*column)) diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 9da7b0dac1c..3e99a25a445 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -54,6 +54,7 @@ public: void get(size_t n, Field & res) const override; bool getBool(size_t n) const override { return isNullAt(n) ? false : nested_column->getBool(n); } UInt64 get64(size_t n) const override { return nested_column->get64(n); } + bool isDefaultAt(size_t n) const override { return isNullAt(n); } /** * If isNullAt(n) returns false, returns the nested column's getDataAt(n), otherwise returns a special value @@ -137,6 +138,18 @@ public: return false; } + double getRatioOfDefaultRows(double sample_ratio) const override + { + return null_map->getRatioOfDefaultRows(sample_ratio); + } + + void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override + { + null_map->getIndicesOfNonDefaultRows(indices, from, limit); + } + + ColumnPtr createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const override; + bool isNullable() const override { return true; } bool isFixedAndContiguous() const override { return false; } bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); } diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp new file mode 100644 index 00000000000..e9bdc3971c0 --- /dev/null +++ b/src/Columns/ColumnSparse.cpp @@ -0,0 +1,780 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; +} + +ColumnSparse::ColumnSparse(MutableColumnPtr && values_) + : values(std::move(values_)), _size(0) +{ + if (!values->empty()) + throw Exception("Not empty values passed to ColumnSparse, but no offsets passed", ErrorCodes::LOGICAL_ERROR); + + values->insertDefault(); + offsets = ColumnUInt64::create(); +} + +ColumnSparse::ColumnSparse(MutableColumnPtr && values_, MutableColumnPtr && offsets_, size_t size_) + : values(std::move(values_)), offsets(std::move(offsets_)), _size(size_) +{ + const ColumnUInt64 * offsets_concrete = typeid_cast(offsets.get()); + + if (!offsets_concrete) + throw Exception(ErrorCodes::LOGICAL_ERROR, "'offsets' column must be a ColumnUInt64, got: {}", offsets->getName()); + + /// 'values' should contain one extra element: default value at 0 position. + if (offsets->size() + 1 != values->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Values size ({}) is inconsistent with offsets size ({})", values->size(), offsets->size()); + + if (_size < offsets->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Size of sparse column ({}) cannot be lower than number of non-default values ({})", _size, offsets->size()); + + if (!offsets_concrete->empty() && _size <= offsets_concrete->getData().back()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Size of sparse column ({}) should be greater than last position of non-default value ({})", + _size, offsets_concrete->getData().back()); + +#ifndef NDEBUG + const auto & offsets_data = getOffsetsData(); + const auto * it = std::adjacent_find(offsets_data.begin(), offsets_data.end(), std::greater_equal()); + if (it != offsets_data.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Offsets of ColumnSparse must be strictly sorted"); +#endif +} + +MutableColumnPtr ColumnSparse::cloneResized(size_t new_size) const +{ + if (new_size == 0) + return ColumnSparse::create(values->cloneEmpty()); + + if (new_size >= _size) + return ColumnSparse::create(IColumn::mutate(values), IColumn::mutate(offsets), new_size); + + auto res = ColumnSparse::create(values->cloneEmpty()); + res->insertRangeFrom(*this, 0, new_size); + return res; +} + +bool ColumnSparse::isDefaultAt(size_t n) const +{ + return getValueIndex(n) == 0; +} + +bool ColumnSparse::isNullAt(size_t n) const +{ + return values->isNullAt(getValueIndex(n)); +} + +Field ColumnSparse::operator[](size_t n) const +{ + return (*values)[getValueIndex(n)]; +} + +void ColumnSparse::get(size_t n, Field & res) const +{ + values->get(getValueIndex(n), res); +} + +bool ColumnSparse::getBool(size_t n) const +{ + return values->getBool(getValueIndex(n)); +} + +Float64 ColumnSparse::getFloat64(size_t n) const +{ + return values->getFloat64(getValueIndex(n)); +} + +Float32 ColumnSparse::getFloat32(size_t n) const +{ + return values->getFloat32(getValueIndex(n)); +} + +UInt64 ColumnSparse::getUInt(size_t n) const +{ + return values->getUInt(getValueIndex(n)); +} + +Int64 ColumnSparse::getInt(size_t n) const +{ + return values->getInt(getValueIndex(n)); +} + +UInt64 ColumnSparse::get64(size_t n) const +{ + return values->get64(getValueIndex(n)); +} + +StringRef ColumnSparse::getDataAt(size_t n) const +{ + return values->getDataAt(getValueIndex(n)); +} + +ColumnPtr ColumnSparse::convertToFullColumnIfSparse() const +{ + return values->createWithOffsets(getOffsetsData(), (*values)[0], _size, /*shift=*/ 1); +} + +void ColumnSparse::insertSingleValue(const Inserter & inserter) +{ + inserter(*values); + + size_t last_idx = values->size() - 1; + if (values->isDefaultAt(last_idx)) + values->popBack(1); + else + getOffsetsData().push_back(_size); + + ++_size; +} + +void ColumnSparse::insertData(const char * pos, size_t length) +{ + insertSingleValue([&](IColumn & column) { column.insertData(pos, length); }); +} + +StringRef ColumnSparse::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const +{ + return values->serializeValueIntoArena(getValueIndex(n), arena, begin); +} + +const char * ColumnSparse::deserializeAndInsertFromArena(const char * pos) +{ + const char * res = nullptr; + insertSingleValue([&](IColumn & column) { res = column.deserializeAndInsertFromArena(pos); }); + return res; +} + +const char * ColumnSparse::skipSerializedInArena(const char * pos) const +{ + return values->skipSerializedInArena(pos); +} + +void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length) +{ + if (length == 0) + return; + + if (start + length > src.size()) + throw Exception("Parameter out of bound in IColumnString::insertRangeFrom method.", + ErrorCodes::LOGICAL_ERROR); + + auto & offsets_data = getOffsetsData(); + + size_t end = start + length; + if (const auto * src_sparse = typeid_cast(&src)) + { + const auto & src_offsets = src_sparse->getOffsetsData(); + const auto & src_values = src_sparse->getValuesColumn(); + + size_t offset_start = std::lower_bound(src_offsets.begin(), src_offsets.end(), start) - src_offsets.begin(); + size_t offset_end = std::lower_bound(src_offsets.begin(), src_offsets.end(), end) - src_offsets.begin(); + assert(offset_start <= offset_end); + + if (offset_start != offset_end) + { + offsets_data.reserve(offsets_data.size() + offset_end - offset_start); + insertManyDefaults(src_offsets[offset_start] - start); + offsets_data.push_back(_size); + ++_size; + + for (size_t i = offset_start + 1; i < offset_end; ++i) + { + size_t current_diff = src_offsets[i] - src_offsets[i - 1]; + insertManyDefaults(current_diff - 1); + offsets_data.push_back(_size); + ++_size; + } + + /// 'end' <= 'src_offsets[offsets_end]', but end is excluded, so index is 'offsets_end' - 1. + /// Since 'end' is excluded, need to subtract one more row from result. + insertManyDefaults(end - src_offsets[offset_end - 1] - 1); + values->insertRangeFrom(src_values, offset_start + 1, offset_end - offset_start); + } + else + { + insertManyDefaults(length); + } + } + else + { + for (size_t i = start; i < end; ++i) + { + if (!src.isDefaultAt(i)) + { + values->insertFrom(src, i); + offsets_data.push_back(_size); + } + + ++_size; + } + } +} + +void ColumnSparse::insert(const Field & x) +{ + insertSingleValue([&](IColumn & column) { column.insert(x); }); +} + +void ColumnSparse::insertFrom(const IColumn & src, size_t n) +{ + if (const auto * src_sparse = typeid_cast(&src)) + { + if (size_t value_index = src_sparse->getValueIndex(n)) + { + getOffsetsData().push_back(_size); + values->insertFrom(src_sparse->getValuesColumn(), value_index); + } + } + else + { + if (!src.isDefaultAt(n)) + { + values->insertFrom(src, n); + getOffsetsData().push_back(_size); + } + } + + ++_size; +} + +void ColumnSparse::insertDefault() +{ + ++_size; +} + +void ColumnSparse::insertManyDefaults(size_t length) +{ + _size += length; +} + +void ColumnSparse::popBack(size_t n) +{ + assert(n < _size); + + auto & offsets_data = getOffsetsData(); + size_t new_size = _size - n; + + size_t removed_values = 0; + while (!offsets_data.empty() && offsets_data.back() >= new_size) + { + offsets_data.pop_back(); + ++removed_values; + } + + if (removed_values) + values->popBack(removed_values); + + _size = new_size; +} + +ColumnPtr ColumnSparse::filter(const Filter & filt, ssize_t) const +{ + if (_size != filt.size()) + throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + if (offsets->empty()) + { + auto res = cloneEmpty(); + res->insertManyDefaults(countBytesInFilter(filt)); + return res; + } + + auto res_offsets = offsets->cloneEmpty(); + auto & res_offsets_data = assert_cast(*res_offsets).getData(); + + Filter values_filter; + values_filter.reserve(values->size()); + values_filter.push_back(1); + size_t values_result_size_hint = 1; + + size_t res_offset = 0; + auto offset_it = begin(); + for (size_t i = 0; i < _size; ++i, ++offset_it) + { + if (!offset_it.isDefault()) + { + if (filt[i]) + { + res_offsets_data.push_back(res_offset); + values_filter.push_back(1); + ++res_offset; + ++values_result_size_hint; + } + else + { + values_filter.push_back(0); + } + } + else + { + res_offset += filt[i] != 0; + } + } + + auto res_values = values->filter(values_filter, values_result_size_hint); + return this->create(std::move(res_values), std::move(res_offsets), res_offset); +} + +void ColumnSparse::expand(const Filter & mask, bool inverted) +{ + if (mask.size() < _size) + throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR); + + auto res_offsets = offsets->cloneEmpty(); + auto & res_offsets_data = assert_cast(*res_offsets).getData(); + + auto it = begin(); + for (size_t i = 0; i < mask.size(); ++i) + { + if (!!mask[i] ^ inverted) + { + if (it.getCurrentRow() == _size) + throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR); + + if (!it.isDefault()) + res_offsets_data[it.getCurrentOffset()] = i; + + ++it; + } + } + + _size = mask.size(); +} + +ColumnPtr ColumnSparse::permute(const Permutation & perm, size_t limit) const +{ + return permuteImpl(*this, perm, limit); +} + +ColumnPtr ColumnSparse::index(const IColumn & indexes, size_t limit) const +{ + return selectIndexImpl(*this, indexes, limit); +} + +template +ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray & indexes, size_t limit) const +{ + assert(limit <= indexes.size()); + if (limit == 0) + return ColumnSparse::create(values->cloneEmpty()); + + if (offsets->empty()) + { + auto res = cloneEmpty(); + res->insertManyDefaults(limit); + return res; + } + + auto res_offsets = offsets->cloneEmpty(); + auto & res_offsets_data = assert_cast(*res_offsets).getData(); + auto res_values = values->cloneEmpty(); + res_values->insertDefault(); + + /// If we need to permute full column, or if limit is large enough, + /// it's better to save indexes of values in O(size) + /// and avoid binary search for obtaining every index. + /// 3 is just a guess for overhead on copying indexes. + bool execute_linear = + limit == _size || limit * std::bit_width(offsets->size()) > _size * 3; + + if (execute_linear) + { + PaddedPODArray values_index(_size); + auto offset_it = begin(); + for (size_t i = 0; i < _size; ++i, ++offset_it) + values_index[i] = offset_it.getValueIndex(); + + for (size_t i = 0; i < limit; ++i) + { + size_t index = values_index[indexes[i]]; + if (index != 0) + { + res_values->insertFrom(*values, index); + res_offsets_data.push_back(i); + } + } + } + else + { + for (size_t i = 0; i < limit; ++i) + { + size_t index = getValueIndex(indexes[i]); + if (index != 0) + { + res_values->insertFrom(*values, index); + res_offsets_data.push_back(i); + } + } + } + + return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit); +} + +int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +{ + if (const auto * rhs_sparse = typeid_cast(&rhs_)) + return values->compareAt(getValueIndex(n), rhs_sparse->getValueIndex(m), rhs_sparse->getValuesColumn(), null_direction_hint); + + return values->compareAt(getValueIndex(n), m, rhs_, null_direction_hint); +} + +void ColumnSparse::compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray * row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const +{ + if (row_indexes) + { + /// TODO: implement without conversion to full column. + auto this_full = convertToFullColumnIfSparse(); + auto rhs_full = rhs.convertToFullColumnIfSparse(); + this_full->compareColumn(*rhs_full, rhs_row_num, row_indexes, compare_results, direction, nan_direction_hint); + } + else + { + const auto & rhs_sparse = assert_cast(rhs); + PaddedPODArray nested_result; + values->compareColumn(rhs_sparse.getValuesColumn(), rhs_sparse.getValueIndex(rhs_row_num), + nullptr, nested_result, direction, nan_direction_hint); + + const auto & offsets_data = getOffsetsData(); + compare_results.resize_fill(_size, nested_result[0]); + for (size_t i = 0; i < offsets_data.size(); ++i) + compare_results[offsets_data[i]] = nested_result[i + 1]; + } +} + +int ColumnSparse::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator & collator) const +{ + if (const auto * rhs_sparse = typeid_cast(&rhs)) + return values->compareAtWithCollation(getValueIndex(n), rhs_sparse->getValueIndex(m), rhs_sparse->getValuesColumn(), null_direction_hint, collator); + + return values->compareAtWithCollation(getValueIndex(n), m, rhs, null_direction_hint, collator); +} + +bool ColumnSparse::hasEqualValues() const +{ + size_t num_defaults = getNumberOfDefaults(); + if (num_defaults == _size) + return true; + + /// Have at least 1 default and 1 non-default values. + if (num_defaults != 0) + return false; + + /// Check that probably all non-default values are equal. + /// It's suboptiomal, but it's a rare case. + for (size_t i = 2; i < values->size(); ++i) + if (values->compareAt(1, i, *values, 1) != 0) + return false; + + return true; +} + +void ColumnSparse::getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const +{ + if (_size == 0) + return; + + res.resize(_size); + if (offsets->empty()) + { + for (size_t i = 0; i < _size; ++i) + res[i] = i; + return; + } + + if (limit == 0 || limit > _size) + limit = _size; + + Permutation perm; + /// Firstly we sort all values. + /// limit + 1 for case when there are 0 default values. + if (collator) + values->getPermutationWithCollation(*collator, reverse, limit + 1, null_direction_hint, perm); + else + values->getPermutation(reverse, limit + 1, null_direction_hint, perm); + + size_t num_of_defaults = getNumberOfDefaults(); + size_t row = 0; + + const auto & offsets_data = getOffsetsData(); + + /// Fill the permutation. + for (size_t i = 0; i < perm.size() && row < limit; ++i) + { + if (perm[i] == 0) + { + if (!num_of_defaults) + continue; + + /// Fill the positions of default values in the required quantity. + auto offset_it = begin(); + while (row < limit) + { + while (offset_it.getCurrentRow() < _size && !offset_it.isDefault()) + ++offset_it; + + if (offset_it.getCurrentRow() == _size) + break; + + res[row++] = offset_it.getCurrentRow(); + ++offset_it; + } + } + else + { + res[row++] = offsets_data[perm[i] - 1]; + } + } + + assert(row == limit); +} + +void ColumnSparse::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const +{ + return getPermutationImpl(reverse, limit, null_direction_hint, res, nullptr); +} + +void ColumnSparse::updatePermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const +{ + auto this_full = convertToFullColumnIfSparse(); + this_full->updatePermutation(reverse, limit, null_direction_hint, res, equal_range); +} + +void ColumnSparse::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const +{ + return getPermutationImpl(reverse, limit, null_direction_hint, res, &collator); +} + +void ColumnSparse::updatePermutationWithCollation( + const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const +{ + auto this_full = convertToFullColumnIfSparse(); + this_full->updatePermutationWithCollation(collator, reverse, limit, null_direction_hint, res, equal_range); +} + +size_t ColumnSparse::byteSize() const +{ + return values->byteSize() + offsets->byteSize() + sizeof(_size); +} + +size_t ColumnSparse::byteSizeAt(size_t n) const +{ + size_t index = getValueIndex(n); + size_t res = values->byteSizeAt(index); + if (index) + res += sizeof(UInt64); + + return res; +} + +size_t ColumnSparse::allocatedBytes() const +{ + return values->allocatedBytes() + offsets->allocatedBytes() + sizeof(_size); +} + +void ColumnSparse::protect() +{ + values->protect(); + offsets->protect(); +} + +ColumnPtr ColumnSparse::replicate(const Offsets & replicate_offsets) const +{ + /// TODO: implement specializations. + if (_size != replicate_offsets.size()) + throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + if (_size == 0) + return ColumnSparse::create(values->cloneEmpty()); + + auto res_offsets = offsets->cloneEmpty(); + auto & res_offsets_data = assert_cast(*res_offsets).getData(); + auto res_values = values->cloneEmpty(); + res_values->insertDefault(); + + auto offset_it = begin(); + for (size_t i = 0; i < _size; ++i, ++offset_it) + { + if (!offset_it.isDefault()) + { + size_t replicate_size = replicate_offsets[i] - replicate_offsets[i - 1]; + res_offsets_data.reserve(res_offsets_data.size() + replicate_size); + for (size_t row = replicate_offsets[i - 1]; row < replicate_offsets[i]; ++row) + { + res_offsets_data.push_back(row); + res_values->insertFrom(*values, offset_it.getValueIndex()); + } + } + } + + return ColumnSparse::create(std::move(res_values), std::move(res_offsets), replicate_offsets.back()); +} + +void ColumnSparse::updateHashWithValue(size_t n, SipHash & hash) const +{ + values->updateHashWithValue(getValueIndex(n), hash); +} + +void ColumnSparse::updateWeakHash32(WeakHash32 & hash) const +{ + if (hash.getData().size() != _size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of WeakHash32 does not match size of column: " + "column size is {}, hash size is {}", _size, hash.getData().size()); + + auto offset_it = begin(); + auto & hash_data = hash.getData(); + for (size_t i = 0; i < _size; ++i, ++offset_it) + { + size_t value_index = offset_it.getValueIndex(); + auto data_ref = values->getDataAt(value_index); + hash_data[i] = ::updateWeakHash32(reinterpret_cast(data_ref.data), data_ref.size, hash_data[i]); + } +} + +void ColumnSparse::updateHashFast(SipHash & hash) const +{ + values->updateHashFast(hash); + offsets->updateHashFast(hash); + hash.update(_size); +} + +void ColumnSparse::getExtremes(Field & min, Field & max) const +{ + if (_size == 0) + { + values->get(0, min); + values->get(0, max); + return; + } + + if (getNumberOfDefaults() == 0) + { + size_t min_idx = 1; + size_t max_idx = 1; + + for (size_t i = 2; i < values->size(); ++i) + { + if (values->compareAt(i, min_idx, *values, 1) < 0) + min_idx = i; + else if (values->compareAt(i, max_idx, *values, 1) > 0) + max_idx = i; + } + + values->get(min_idx, min); + values->get(max_idx, max); + return; + } + + values->getExtremes(min, max); +} + +void ColumnSparse::getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const +{ + const auto & offsets_data = getOffsetsData(); + const auto * start = from ? std::lower_bound(offsets_data.begin(), offsets_data.end(), from) : offsets_data.begin(); + const auto * end = limit ? std::lower_bound(offsets_data.begin(), offsets_data.end(), from + limit) : offsets_data.end(); + + indices.insert(start, end); +} + +double ColumnSparse::getRatioOfDefaultRows(double) const +{ + return static_cast(getNumberOfDefaults()) / _size; +} + +MutableColumns ColumnSparse::scatter(ColumnIndex num_columns, const Selector & selector) const +{ + return scatterImpl(num_columns, selector); +} + +void ColumnSparse::gather(ColumnGathererStream & gatherer_stream) +{ + gatherer_stream.gather(*this); +} + +ColumnPtr ColumnSparse::compress() const +{ + auto values_compressed = values->compress(); + auto offsets_compressed = offsets->compress(); + + size_t byte_size = values_compressed->byteSize() + offsets_compressed->byteSize(); + + return ColumnCompressed::create(size(), byte_size, + [values_compressed = std::move(values_compressed), offsets_compressed = std::move(offsets_compressed), size = size()] + { + return ColumnSparse::create(values_compressed->decompress(), offsets_compressed->decompress(), size); + }); +} + +bool ColumnSparse::structureEquals(const IColumn & rhs) const +{ + if (const auto * rhs_sparse = typeid_cast(&rhs)) + return values->structureEquals(*rhs_sparse->values); + return false; +} + +void ColumnSparse::forEachSubcolumn(ColumnCallback callback) +{ + callback(values); + callback(offsets); +} + +const IColumn::Offsets & ColumnSparse::getOffsetsData() const +{ + return assert_cast(*offsets).getData(); +} + +IColumn::Offsets & ColumnSparse::getOffsetsData() +{ + return assert_cast(*offsets).getData(); +} + +size_t ColumnSparse::getValueIndex(size_t n) const +{ + assert(n < _size); + + const auto & offsets_data = getOffsetsData(); + const auto * it = std::lower_bound(offsets_data.begin(), offsets_data.end(), n); + if (it == offsets_data.end() || *it != n) + return 0; + + return it - offsets_data.begin() + 1; +} + +ColumnPtr recursiveRemoveSparse(const ColumnPtr & column) +{ + if (!column) + return column; + + if (const auto * column_tuple = typeid_cast(column.get())) + { + auto columns = column_tuple->getColumns(); + for (auto & element : columns) + element = recursiveRemoveSparse(element); + + return ColumnTuple::create(columns); + } + + return column->convertToFullColumnIfSparse(); +} + +} diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h new file mode 100644 index 00000000000..6eb09642510 --- /dev/null +++ b/src/Columns/ColumnSparse.h @@ -0,0 +1,231 @@ +#pragma once + +#include +#include +#include +#include +#include + +class Collator; + +namespace DB +{ + + +/** Column for spare representation. + * It stores column with non-default values and column + * with their sorted positions in original column. Column with + * values contains also one default value at 0 position to make + * implementation of execution of functions and sorting more convenient. + */ +class ColumnSparse final : public COWHelper +{ +private: + friend class COWHelper; + + explicit ColumnSparse(MutableColumnPtr && values_); + ColumnSparse(MutableColumnPtr && values_, MutableColumnPtr && offsets_, size_t size_); + ColumnSparse(const ColumnSparse &) = default; + +public: + static constexpr auto DEFAULT_ROWS_SEARCH_SAMPLE_RATIO = 0.1; + static constexpr auto DEFAULT_RATIO_FOR_SPARSE_SERIALIZATION = 0.95; + + using Base = COWHelper; + static Ptr create(const ColumnPtr & values_, const ColumnPtr & offsets_, size_t size_) + { + return Base::create(values_->assumeMutable(), offsets_->assumeMutable(), size_); + } + + template ::value>::type> + static MutablePtr create(TColumnPtr && values_, TColumnPtr && offsets_, size_t size_) + { + return Base::create(std::move(values_), std::move(offsets_), size_); + } + + static Ptr create(const ColumnPtr & values_) + { + return Base::create(values_->assumeMutable()); + } + + template ::value>::type> + static MutablePtr create(TColumnPtr && values_) + { + return Base::create(std::forward(values_)); + } + + bool isSparse() const override { return true; } + const char * getFamilyName() const override { return "Sparse"; } + std::string getName() const override { return "Sparse(" + values->getName() + ")"; } + TypeIndex getDataType() const override { return values->getDataType(); } + MutableColumnPtr cloneResized(size_t new_size) const override; + size_t size() const override { return _size; } + bool isDefaultAt(size_t n) const override; + bool isNullAt(size_t n) const override; + Field operator[](size_t n) const override; + void get(size_t n, Field & res) const override; + bool getBool(size_t n) const override; + Float64 getFloat64(size_t n) const override; + Float32 getFloat32(size_t n) const override; + UInt64 getUInt(size_t n) const override; + Int64 getInt(size_t n) const override; + UInt64 get64(size_t n) const override; + StringRef getDataAt(size_t n) const override; + + ColumnPtr convertToFullColumnIfSparse() const override; + + /// Will insert null value if pos=nullptr + void insertData(const char * pos, size_t length) override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + const char * deserializeAndInsertFromArena(const char * pos) override; + const char * skipSerializedInArena(const char *) const override; + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void insert(const Field & x) override; + void insertFrom(const IColumn & src, size_t n) override; + void insertDefault() override; + void insertManyDefaults(size_t length) override; + + void popBack(size_t n) override; + ColumnPtr filter(const Filter & filt, ssize_t) const override; + void expand(const Filter & mask, bool inverted) override; + ColumnPtr permute(const Permutation & perm, size_t limit) const override; + + ColumnPtr index(const IColumn & indexes, size_t limit) const override; + + template + ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; + + int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; + void compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray * row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const override; + + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator & collator) const override; + bool hasEqualValues() const override; + + void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const; + + void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; + void updatePermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; + void updatePermutationWithCollation( + const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const override; + + size_t byteSize() const override; + size_t byteSizeAt(size_t n) const override; + size_t allocatedBytes() const override; + void protect() override; + ColumnPtr replicate(const Offsets & replicate_offsets) const override; + void updateHashWithValue(size_t n, SipHash & hash) const override; + void updateWeakHash32(WeakHash32 & hash) const override; + void updateHashFast(SipHash & hash) const override; + void getExtremes(Field & min, Field & max) const override; + + void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override; + double getRatioOfDefaultRows(double sample_ratio) const override; + + MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; + + void gather(ColumnGathererStream & gatherer_stream) override; + + ColumnPtr compress() const override; + + void forEachSubcolumn(ColumnCallback callback) override; + + bool structureEquals(const IColumn & rhs) const override; + + bool isNullable() const override { return values->isNullable(); } + bool isFixedAndContiguous() const override { return false; } + bool valuesHaveFixedSize() const override { return values->valuesHaveFixedSize(); } + size_t sizeOfValueIfFixed() const override { return values->sizeOfValueIfFixed() + values->sizeOfValueIfFixed(); } + bool isCollationSupported() const override { return values->isCollationSupported(); } + + size_t getNumberOfDefaults() const { return _size - offsets->size(); } + size_t getNumberOfTrailingDefaults() const + { + return offsets->empty() ? _size : _size - getOffsetsData().back() - 1; + } + + /// Return position of element in 'values' columns, + /// that corresponds to n-th element of full column. + /// O(log(offsets.size())) complexity, + size_t getValueIndex(size_t n) const; + + const IColumn & getValuesColumn() const { return *values; } + IColumn & getValuesColumn() { return *values; } + + const ColumnPtr & getValuesPtr() const { return values; } + ColumnPtr & getValuesPtr() { return values; } + + const IColumn::Offsets & getOffsetsData() const; + IColumn::Offsets & getOffsetsData(); + + const ColumnPtr & getOffsetsPtr() const { return offsets; } + ColumnPtr & getOffsetsPtr() { return offsets; } + + const IColumn & getOffsetsColumn() const { return *offsets; } + IColumn & getOffsetsColumn() { return *offsets; } + + /// This class helps to iterate over all values in ColumnSparse. + class Iterator + { + public: + Iterator(const PaddedPODArray & offsets_, size_t size_, size_t current_offset_, size_t current_row_) + : offsets(offsets_), size(size_), current_offset(current_offset_), current_row(current_row_) + { + } + + bool ALWAYS_INLINE isDefault() const { return current_offset == offsets.size() || current_row != offsets[current_offset]; } + size_t ALWAYS_INLINE getValueIndex() const { return isDefault() ? 0 : current_offset + 1; } + size_t ALWAYS_INLINE getCurrentRow() const { return current_row; } + size_t ALWAYS_INLINE getCurrentOffset() const { return current_offset; } + + bool operator==(const Iterator & other) const + { + return size == other.size + && current_offset == other.current_offset + && current_row == other.current_row; + } + + bool operator!=(const Iterator & other) const { return !(*this == other); } + + Iterator operator++() + { + if (!isDefault()) + ++current_offset; + ++current_row; + return *this; + } + + private: + const PaddedPODArray & offsets; + const size_t size; + size_t current_offset; + size_t current_row; + }; + + Iterator begin() const { return Iterator(getOffsetsData(), _size, 0, 0); } + Iterator end() const { return Iterator(getOffsetsData(), _size, getOffsetsData().size(), _size); } + +private: + using Inserter = std::function; + + /// Inserts value to 'values' column via callback. + /// Properly handles cases, when inserted value is default. + /// Used, when it's unknown in advance if inserted value is default. + void insertSingleValue(const Inserter & inserter); + + /// Contains default value at 0 position. + /// It's convenient, because it allows to execute, e.g functions or sorting, + /// for this column without handling different cases. + WrappedPtr values; + + /// Sorted offsets of non-default values in the full column. + /// 'offsets[i]' corresponds to 'values[i + 1]'. + WrappedPtr offsets; + size_t _size; +}; + +ColumnPtr recursiveRemoveSparse(const ColumnPtr & column); + +} diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 2beb9add318..cd8a3e698d8 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -474,8 +474,9 @@ void ColumnString::getExtremes(Field & min, Field & max) const ColumnPtr ColumnString::compress() const { - size_t source_chars_size = chars.size(); - size_t source_offsets_size = offsets.size() * sizeof(Offset); + const size_t source_chars_size = chars.size(); + const size_t source_offsets_elements = offsets.size(); + const size_t source_offsets_size = source_offsets_elements * sizeof(Offset); /// Don't compress small blocks. if (source_chars_size < 4096) /// A wild guess. @@ -489,12 +490,14 @@ ColumnPtr ColumnString::compress() const auto offsets_compressed = ColumnCompressed::compressBuffer(offsets.data(), source_offsets_size, true); - return ColumnCompressed::create(offsets.size(), chars_compressed->size() + offsets_compressed->size(), + const size_t chars_compressed_size = chars_compressed->size(); + const size_t offsets_compressed_size = offsets_compressed->size(); + return ColumnCompressed::create(source_offsets_elements, chars_compressed_size + offsets_compressed_size, [ chars_compressed = std::move(chars_compressed), offsets_compressed = std::move(offsets_compressed), source_chars_size, - source_offsets_elements = offsets.size() + source_offsets_elements ] { auto res = ColumnString::create(); diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 0ab4ed8e49d..a7b2c60d9a1 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -107,6 +107,12 @@ public: return StringRef(&chars[offsetAt(n)], sizeAt(n)); } + bool isDefaultAt(size_t n) const override + { + assert(n < size()); + return sizeAt(n) == 1; + } + /// Suppress gcc 7.3.1 warning: '*((void*)& +8)' may be used uninitialized in this function #if !defined(__clang__) #pragma GCC diagnostic push @@ -278,6 +284,16 @@ public: return typeid(rhs) == typeid(ColumnString); } + double getRatioOfDefaultRows(double sample_ratio) const override + { + return getRatioOfDefaultRowsImpl(sample_ratio); + } + + void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override + { + return getIndicesOfNonDefaultRowsImpl(indices, from, limit); + } + Chars & getChars() { return chars; } const Chars & getChars() const { return chars; } diff --git a/src/Columns/ColumnStringHelpers.h b/src/Columns/ColumnStringHelpers.h new file mode 100644 index 00000000000..851486e490a --- /dev/null +++ b/src/Columns/ColumnStringHelpers.h @@ -0,0 +1,91 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int TOO_LARGE_STRING_SIZE; +} + +namespace ColumnStringHelpers +{ + +/** Simplifies writing data to the ColumnString or ColumnFixedString via WriteBuffer. + * + * Take care of little subtle details, like padding or proper offsets. + */ +template +class WriteHelper +{ + ColumnType & col; + WriteBufferFromVector buffer; + size_t prev_row_buffer_size = 0; + + static ColumnType & resizeColumn(ColumnType & column, size_t rows) + { + if constexpr (std::is_same_v) + column.resize(rows); + else + { + column.getOffsets().reserve(rows); + /// Using coefficient 2 for initial size is arbitrary. + column.getChars().resize(rows * 2); + } + return column; + } + +public: + WriteHelper(ColumnType & col_, size_t expected_rows) + : col(resizeColumn(col_, expected_rows)) + , buffer(col.getChars()) + {} + + ~WriteHelper() = default; + + void finalize() + { + buffer.finalize(); + } + + auto & getWriteBuffer() + { + return buffer; + } + + inline void rowWritten() + { + if constexpr (std::is_same_v) + { + if (buffer.count() > prev_row_buffer_size + col.getN()) + throw Exception( + ErrorCodes::TOO_LARGE_STRING_SIZE, + "Too large string for FixedString column"); + + // Pad with zeroes on the right to maintain FixedString invariant. + const auto excess_bytes = buffer.count() % col.getN(); + const auto fill_bytes = col.getN() - excess_bytes; + writeChar(0, fill_bytes, buffer); + } + else + { + writeChar(0, buffer); + col.getOffsets().push_back(buffer.count()); + } + + prev_row_buffer_size = buffer.count(); + } +}; + +} + +} diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index d157f18bf32..d667b264d55 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB @@ -113,6 +114,15 @@ void ColumnTuple::get(size_t n, Field & res) const res = tuple; } +bool ColumnTuple::isDefaultAt(size_t n) const +{ + const size_t tuple_size = columns.size(); + for (size_t i = 0; i < tuple_size; ++i) + if (!columns[i]->isDefaultAt(n)) + return false; + return true; +} + StringRef ColumnTuple::getDataAt(size_t) const { throw Exception("Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); @@ -536,4 +546,25 @@ ColumnPtr ColumnTuple::compress() const }); } +double ColumnTuple::getRatioOfDefaultRows(double sample_ratio) const +{ + return getRatioOfDefaultRowsImpl(sample_ratio); +} + +void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const +{ + return getIndicesOfNonDefaultRowsImpl(indices, from, limit); +} + +SerializationInfoPtr ColumnTuple::getSerializationInfo() const +{ + MutableSerializationInfos infos; + infos.reserve(columns.size()); + + for (const auto & column : columns) + infos.push_back(const_pointer_cast(column->getSerializationInfo())); + + return std::make_shared(std::move(infos), SerializationInfo::Settings{}); +} + } diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 54723239a06..b4c7f6bdf9a 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -53,6 +53,7 @@ public: Field operator[](size_t n) const override; void get(size_t n, Field & res) const override; + bool isDefaultAt(size_t n) const override; StringRef getDataAt(size_t n) const override; void insertData(const char * pos, size_t length) override; void insert(const Field & x) override; @@ -93,6 +94,9 @@ public: bool structureEquals(const IColumn & rhs) const override; bool isCollationSupported() const override; ColumnPtr compress() const override; + double getRatioOfDefaultRows(double sample_ratio) const override; + void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override; + SerializationInfoPtr getSerializationInfo() const override; size_t tupleSize() const { return columns.size(); } diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 66bb56983fc..59eca547852 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -68,6 +68,7 @@ public: Field operator[](size_t n) const override { return (*getNestedColumn())[n]; } void get(size_t n, Field & res) const override { getNestedColumn()->get(n, res); } + bool isDefaultAt(size_t n) const override { return n == 0; } StringRef getDataAt(size_t n) const override { return getNestedColumn()->getDataAt(n); } StringRef getDataAtWithTerminatingZero(size_t n) const override { @@ -122,6 +123,16 @@ public: return false; } + double getRatioOfDefaultRows(double) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getRatioOfDefaultRows' not implemented for ColumnUnique"); + } + + void getIndicesOfNonDefaultRows(IColumn::Offsets &, size_t, size_t) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'getIndicesOfNonDefaultRows' not implemented for ColumnUnique"); + } + const UInt64 * tryGetSavedHash() const override { return reverse_index.tryGetSavedHash(); } UInt128 getHash() const override { return hash.getHash(*getRawColumnPtr()); } diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 13ba522b2ac..9808acf48c8 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -481,7 +481,8 @@ void ColumnVector::getExtremes(Field & min, Field & max) const template ColumnPtr ColumnVector::compress() const { - size_t source_size = data.size() * sizeof(T); + const size_t data_size = data.size(); + const size_t source_size = data_size * sizeof(T); /// Don't compress small blocks. if (source_size < 4096) /// A wild guess. @@ -492,8 +493,9 @@ ColumnPtr ColumnVector::compress() const if (!compressed) return ColumnCompressed::wrap(this->getPtr()); - return ColumnCompressed::create(data.size(), compressed->size(), - [compressed = std::move(compressed), column_size = data.size()] + const size_t compressed_size = compressed->size(); + return ColumnCompressed::create(data_size, compressed_size, + [compressed = std::move(compressed), column_size = data_size] { auto res = ColumnVector::create(column_size); ColumnCompressed::decompressBuffer( @@ -502,6 +504,24 @@ ColumnPtr ColumnVector::compress() const }); } +template +ColumnPtr ColumnVector::createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const +{ + if (offsets.size() + shift != size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Incompatible sizes of offsets ({}), shift ({}) and size of column {}", offsets.size(), shift, size()); + + auto res = this->create(); + auto & res_data = res->getData(); + + T default_value = safeGet(default_field); + res_data.resize_fill(total_rows, default_value); + for (size_t i = 0; i < offsets.size(); ++i) + res_data[offsets[i]] = data[i + shift]; + + return res; +} + /// Explicit template instantiations - to avoid code bloat in headers. template class ColumnVector; template class ColumnVector; diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index d3e5e8d2bdd..bee7bfa738c 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -328,11 +328,25 @@ public: return StringRef(reinterpret_cast(&data[n]), sizeof(data[n])); } + bool isDefaultAt(size_t n) const override { return data[n] == T{}; } + bool structureEquals(const IColumn & rhs) const override { return typeid(rhs) == typeid(ColumnVector); } + double getRatioOfDefaultRows(double sample_ratio) const override + { + return this->template getRatioOfDefaultRowsImpl(sample_ratio); + } + + void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override + { + return this->template getIndicesOfNonDefaultRowsImpl(indices, from, limit); + } + + ColumnPtr createWithOffsets(const IColumn::Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const override; + ColumnPtr compress() const override; /// Replace elements that match the filter with zeroes. If inverted replaces not matched elements. diff --git a/src/Columns/FilterDescription.cpp b/src/Columns/FilterDescription.cpp index c9968d841c2..973d5bc4391 100644 --- a/src/Columns/FilterDescription.cpp +++ b/src/Columns/FilterDescription.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -50,6 +51,9 @@ ConstantFilterDescription::ConstantFilterDescription(const IColumn & column) FilterDescription::FilterDescription(const IColumn & column_) { + if (column_.isSparse()) + data_holder = recursiveRemoveSparse(column_.getPtr()); + if (column_.lowCardinality()) data_holder = column_.convertToFullColumnIfLowCardinality(); diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp index a3ed0885651..e7caee3b23d 100644 --- a/src/Columns/IColumn.cpp +++ b/src/Columns/IColumn.cpp @@ -4,11 +4,17 @@ #include #include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + String IColumn::dumpStructure() const { WriteBufferFromOwnString res; @@ -30,6 +36,39 @@ void IColumn::insertFrom(const IColumn & src, size_t n) insert(src[n]); } +ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const +{ + if (offsets.size() + shift != size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Incompatible sizes of offsets ({}), shift ({}) and size of column {}", offsets.size(), shift, size()); + + auto res = cloneEmpty(); + res->reserve(total_rows); + + ssize_t current_offset = -1; + for (size_t i = 0; i < offsets.size(); ++i) + { + ssize_t offsets_diff = static_cast(offsets[i]) - current_offset; + current_offset = offsets[i]; + + if (offsets_diff > 1) + res->insertMany(default_field, offsets_diff - 1); + + res->insertFrom(*this, i + shift); + } + + ssize_t offsets_diff = static_cast(total_rows) - current_offset; + if (offsets_diff > 1) + res->insertMany(default_field, offsets_diff - 1); + + return res; +} + +SerializationInfoPtr IColumn::getSerializationInfo() const +{ + return std::make_shared(ISerialization::getKind(*this), SerializationInfo::Settings{}); +} + bool isColumnNullable(const IColumn & column) { return checkColumn(column); diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index e4f99cc0a1d..b1a6e83ee98 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -26,9 +26,8 @@ class ColumnGathererStream; class Field; class WeakHash32; -class ISerialization; -using SerializationPtr = std::shared_ptr; - +class SerializationInfo; +using SerializationInfoPtr = std::shared_ptr; /* * Represents a set of equal ranges in previous column to perform sorting in current column. @@ -64,9 +63,18 @@ public: virtual Ptr convertToFullColumnIfConst() const { return getPtr(); } /// If column isn't ColumnLowCardinality, return itself. - /// If column is ColumnLowCardinality, transforms is to full column. + /// If column is ColumnLowCardinality, transforms it to full column. virtual Ptr convertToFullColumnIfLowCardinality() const { return getPtr(); } + /// If column isn't ColumnSparse, return itself. + /// If column is ColumnSparse, transforms it to full column. + virtual Ptr convertToFullColumnIfSparse() const { return getPtr(); } + + Ptr convertToFullIfNeeded() const + { + return convertToFullColumnIfSparse()->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality(); + } + /// Creates empty column with the same type. virtual MutablePtr cloneEmpty() const { return cloneResized(0); } @@ -133,7 +141,7 @@ public: throw Exception("Method getInt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - virtual bool isDefaultAt(size_t n) const { return get64(n) == 0; } + virtual bool isDefaultAt(size_t n) const = 0; virtual bool isNullAt(size_t /*n*/) const { return false; } /** If column is numeric, return value of n-th element, casted to bool. @@ -173,6 +181,13 @@ public: insertFrom(src, position); } + /// Appends one field multiple times. Can be optimized in inherited classes. + virtual void insertMany(const Field & field, size_t length) + { + for (size_t i = 0; i < length; ++i) + insert(field); + } + /// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented). /// Is used to optimize some computations (in aggregation, for example). /// Parameter length could be ignored if column values have fixed size. @@ -375,6 +390,22 @@ public: throw Exception("Method structureEquals is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + /// Returns ration of values in column, that equal to default value of column. + /// Checks only @sample_ratio ratio of rows. + virtual double getRatioOfDefaultRows(double sample_ratio = 1.0) const = 0; + + /// Returns indices of values in column, that not equal to default value of column. + virtual void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const = 0; + + /// Returns column with @total_size elements. + /// In result column values from current column are at positions from @offsets. + /// Other values are filled by @default_value. + /// @shift means how much rows to skip from the beginning of current column. + /// Used to create full column from sparse. + virtual Ptr createWithOffsets(const Offsets & offsets, const Field & default_field, size_t total_rows, size_t shift) const; + + virtual SerializationInfoPtr getSerializationInfo() const; + /// Compress column in memory to some representation that allows to decompress it back. /// Return itself if compression is not applicable for this column type. virtual Ptr compress() const @@ -457,6 +488,8 @@ public: virtual bool lowCardinality() const { return false; } + virtual bool isSparse() const { return false; } + virtual bool isCollationSupported() const { return false; } virtual ~IColumn() = default; @@ -468,7 +501,6 @@ public: String dumpStructure() const; protected: - /// Template is to devirtualize calls to insertFrom method. /// In derived classes (that use final keyword), implement scatter method as call to scatterImpl. template @@ -489,6 +521,13 @@ protected: template bool hasEqualValuesImpl() const; + /// Template is to devirtualize calls to 'isDefaultAt' method. + template + double getRatioOfDefaultRowsImpl(double sample_ratio) const; + + template + void getIndicesOfNonDefaultRowsImpl(Offsets & indices, size_t from, size_t limit) const; + /// Uses std::sort and partial_sort as default algorithms. /// Implements 'less' and 'equals' via comparator. /// If 'less' and 'equals' can be implemented more optimal diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index ff45cf28737..89844f4b0b3 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -46,6 +46,7 @@ public: Field operator[](size_t) const override { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); } void get(size_t, Field &) const override { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); } void insert(const Field &) override { throw Exception("Cannot insert element into " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + bool isDefaultAt(size_t) const override { throw Exception("isDefaultAt is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } StringRef getDataAt(size_t) const override { @@ -161,6 +162,16 @@ public: return res; } + double getRatioOfDefaultRows(double) const override + { + throw Exception("Method getRatioOfDefaultRows is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + + void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override + { + throw Exception("Method getIndicesOfNonDefaultRows is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + void gather(ColumnGathererStream &) override { throw Exception("Method gather is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h index d45867e289d..1be52087d11 100644 --- a/src/Columns/IColumnImpl.h +++ b/src/Columns/IColumnImpl.h @@ -16,6 +16,7 @@ namespace DB namespace ErrorCodes { extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; + extern const int LOGICAL_ERROR; } template @@ -141,6 +142,56 @@ bool IColumn::hasEqualValuesImpl() const return true; } +template +double IColumn::getRatioOfDefaultRowsImpl(double sample_ratio) const +{ + if (sample_ratio <= 0.0 || sample_ratio > 1.0) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Value of 'sample_ratio' must be in interval (0.0; 1.0], but got: {}", sample_ratio); + + /// Randomize a little to avoid boundary effects. + std::uniform_int_distribution dist(1, static_cast(1.0 / sample_ratio)); + + size_t num_rows = size(); + size_t num_sampled_rows = static_cast(num_rows * sample_ratio); + size_t num_checked_rows = dist(thread_local_rng); + num_sampled_rows = std::min(num_sampled_rows + dist(thread_local_rng), num_rows); + size_t res = 0; + + if (num_sampled_rows == num_rows) + { + for (size_t i = 0; i < num_rows; ++i) + res += static_cast(*this).isDefaultAt(i); + num_checked_rows = num_rows; + } + else if (num_sampled_rows != 0) + { + for (size_t i = num_checked_rows; i < num_rows; ++i) + { + if (num_checked_rows * num_rows <= i * num_sampled_rows) + { + res += static_cast(*this).isDefaultAt(i); + ++num_checked_rows; + } + } + } + + return static_cast(res) / num_checked_rows; +} + +template +void IColumn::getIndicesOfNonDefaultRowsImpl(Offsets & indices, size_t from, size_t limit) const +{ + size_t to = limit && from + limit < size() ? from + limit : size(); + indices.reserve(indices.size() + to - from); + + for (size_t i = from; i < to; ++i) + { + if (!static_cast(*this).isDefaultAt(i)) + indices.push_back(i); + } +} + template void IColumn::updatePermutationImpl( size_t limit, diff --git a/src/Columns/MaskOperations.cpp b/src/Columns/MaskOperations.cpp index 9499185da30..1641bdf5a4c 100644 --- a/src/Columns/MaskOperations.cpp +++ b/src/Columns/MaskOperations.cpp @@ -293,7 +293,7 @@ void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty) column.column = column_function->getResultType()->createColumn(); } -int checkShirtCircuitArguments(const ColumnsWithTypeAndName & arguments) +int checkShortCircuitArguments(const ColumnsWithTypeAndName & arguments) { int last_short_circuit_argument_index = -1; for (size_t i = 0; i != arguments.size(); ++i) diff --git a/src/Columns/MaskOperations.h b/src/Columns/MaskOperations.h index bd6c5e8fe2c..e43b4588258 100644 --- a/src/Columns/MaskOperations.h +++ b/src/Columns/MaskOperations.h @@ -66,7 +66,7 @@ void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty = false); /// Check if arguments contain lazy executed argument. If contain, return index of the last one, /// otherwise return -1. -int checkShirtCircuitArguments(const ColumnsWithTypeAndName & arguments); +int checkShortCircuitArguments(const ColumnsWithTypeAndName & arguments); void copyMask(const PaddedPODArray & from, PaddedPODArray & to); diff --git a/src/Columns/tests/gtest_column_sparse.cpp b/src/Columns/tests/gtest_column_sparse.cpp new file mode 100644 index 00000000000..56284b75204 --- /dev/null +++ b/src/Columns/tests/gtest_column_sparse.cpp @@ -0,0 +1,327 @@ +#include +#include + +#include +#include +#include + +#include +#include + +#include + +using namespace DB; +pcg64 rng(randomSeed()); + +std::pair createColumns(size_t n, size_t k) +{ + auto values = ColumnVector::create(); + auto offsets = ColumnVector::create(); + auto full = ColumnVector::create(); + + auto & values_data = values->getData(); + auto & offsets_data = offsets->getData(); + auto & full_data = full->getData(); + + values_data.push_back(0); + + for (size_t i = 0; i < n; ++i) + { + bool not_zero = rng() % k == 0; + size_t value = not_zero ? rng() % 1000000 : 0; + full_data.push_back(value); + + if (not_zero) + { + values_data.push_back(value); + offsets_data.push_back(i); + } + } + + auto sparse = ColumnSparse::create(std::move(values), std::move(offsets), n); + return std::make_pair(std::move(sparse), std::move(full)); +} + +bool checkEquals(const IColumn & lhs, const IColumn & rhs) +{ + if (lhs.size() != rhs.size()) + return false; + + for (size_t i = 0; i < lhs.size(); ++i) + if (lhs.compareAt(i, i, rhs, 0) != 0) + return false; + + return true; +} + +// Can't use ErrorCodes, because of 'using namespace DB'. +constexpr int error_code = 12345; + +constexpr size_t T = 5000; +constexpr size_t MAX_ROWS = 10000; +constexpr size_t sparse_ratios[] = {1, 2, 5, 10, 32, 50, 64, 100, 256, 500, 1000, 5000, 10000}; +constexpr size_t K = sizeof(sparse_ratios) / sizeof(sparse_ratios[0]); + +#define DUMP_COLUMN(column) std::cerr << #column << ": " << (column)->dumpStructure() << "\n" + +TEST(ColumnSparse, InsertRangeFrom) +{ + auto test_case = [&](size_t n1, size_t k1, size_t n2, size_t k2, size_t from, size_t len) + { + auto [sparse_dst, full_dst] = createColumns(n1, k1); + auto [sparse_src, full_src] = createColumns(n2, k2); + + sparse_dst->insertRangeFrom(*sparse_src, from, len); + full_dst->insertRangeFrom(*full_src, from, len); + + if (!checkEquals(*sparse_dst->convertToFullColumnIfSparse(), *full_dst)) + { + DUMP_COLUMN(sparse_src); + DUMP_COLUMN(full_src); + DUMP_COLUMN(sparse_dst); + DUMP_COLUMN(full_dst); + throw Exception(error_code, "Columns are unequal"); + } + }; + + try + { + for (size_t i = 0; i < T; ++i) + { + size_t n1 = rng() % MAX_ROWS + 1; + size_t k1 = sparse_ratios[rng() % K]; + + size_t n2 = rng() % MAX_ROWS + 1; + size_t k2 = sparse_ratios[rng() % K]; + + size_t from = rng() % n2; + size_t to = rng() % n2; + + if (from > to) + std::swap(from, to); + + test_case(n1, k1, n2, k2, from, to - from); + } + } + catch (const Exception & e) + { + FAIL() << e.displayText(); + } +} + +TEST(ColumnSparse, PopBack) +{ + auto test_case = [&](size_t n, size_t k, size_t m) + { + auto [sparse_dst, full_dst] = createColumns(n, k); + + sparse_dst->popBack(m); + full_dst->popBack(m); + + if (!checkEquals(*sparse_dst->convertToFullColumnIfSparse(), *full_dst)) + { + DUMP_COLUMN(sparse_dst); + DUMP_COLUMN(full_dst); + throw Exception(error_code, "Columns are unequal"); + } + }; + + try + { + for (size_t i = 0; i < T; ++i) + { + size_t n = rng() % MAX_ROWS + 1; + size_t k = sparse_ratios[rng() % K]; + size_t m = rng() % n; + + test_case(n, k, m); + } + } + catch (const Exception & e) + { + FAIL() << e.displayText(); + } +} + +TEST(ColumnSparse, Filter) +{ + auto test_case = [&](size_t n, size_t k, size_t m) + { + auto [sparse_src, full_src] = createColumns(n, k); + + PaddedPODArray filt(n); + for (size_t i = 0; i < n; ++i) + filt[i] = rng() % m == 0; + + auto sparse_dst = sparse_src->filter(filt, -1); + auto full_dst = full_src->filter(filt, -1); + + if (!checkEquals(*sparse_dst->convertToFullColumnIfSparse(), *full_dst)) + { + DUMP_COLUMN(sparse_src); + DUMP_COLUMN(full_src); + DUMP_COLUMN(sparse_dst); + DUMP_COLUMN(full_dst); + throw Exception(error_code, "Columns are unequal"); + } + }; + + try + { + for (size_t i = 0; i < T; ++i) + { + size_t n = rng() % MAX_ROWS + 1; + size_t k = sparse_ratios[rng() % K]; + size_t m = sparse_ratios[rng() % K]; + + test_case(n, k, m); + } + } + catch (const Exception & e) + { + FAIL() << e.displayText(); + } +} + +TEST(ColumnSparse, Permute) +{ + auto test_case = [&](size_t n, size_t k, size_t limit) + { + auto [sparse_src, full_src] = createColumns(n, k); + + IColumn::Permutation perm(n); + std::iota(perm.begin(), perm.end(), 0); + std::shuffle(perm.begin(), perm.end(), rng); + + auto sparse_dst = sparse_src->permute(perm, limit); + auto full_dst = full_src->permute(perm, limit); + + if (limit) + { + sparse_dst = sparse_dst->cut(0, limit); + full_dst = full_dst->cut(0, limit); + } + + if (!checkEquals(*sparse_dst->convertToFullColumnIfSparse(), *full_dst)) + { + DUMP_COLUMN(sparse_src); + DUMP_COLUMN(full_src); + DUMP_COLUMN(sparse_dst); + DUMP_COLUMN(full_dst); + throw Exception(error_code, "Columns are unequal"); + } + }; + + try + { + for (size_t i = 0; i < T; ++i) + { + size_t n = rng() % MAX_ROWS + 1; + size_t k = sparse_ratios[rng() % K]; + size_t limit = rng() % 2 ? 0 : rng() % n; + + test_case(n, k, limit); + } + } + catch (const Exception & e) + { + FAIL() << e.displayText(); + } +} + +TEST(ColumnSparse, CompareColumn) +{ + auto test_case = [&](size_t n1, size_t k1, size_t n2, size_t k2, size_t row_num) + { + auto [sparse_src1, full_src1] = createColumns(n1, k1); + auto [sparse_src2, full_src2] = createColumns(n2, k2); + + PaddedPODArray comp_sparse; + PaddedPODArray comp_full; + + sparse_src1->compareColumn(*sparse_src2, row_num, nullptr, comp_sparse, 1, 1); + full_src1->compareColumn(*full_src2, row_num, nullptr, comp_full, 1, 1); + + if (comp_sparse != comp_full) + { + DUMP_COLUMN(sparse_src1); + DUMP_COLUMN(full_src1); + DUMP_COLUMN(sparse_src2); + DUMP_COLUMN(full_src2); + throw Exception(error_code, "Compare results are unequal"); + } + }; + + try + { + for (size_t i = 0; i < T; ++i) + { + size_t n1 = rng() % MAX_ROWS + 1; + size_t k1 = sparse_ratios[rng() % K]; + + size_t n2 = rng() % MAX_ROWS + 1; + size_t k2 = sparse_ratios[rng() % K]; + + size_t row_num = rng() % n2; + + test_case(n1, k1, n2, k2, row_num); + } + } + catch (const Exception & e) + { + FAIL() << e.displayText(); + } +} + +TEST(ColumnSparse, GetPermutation) +{ + auto test_case = [&](size_t n, size_t k, size_t limit, bool reverse) + { + auto [sparse_src, full_src] = createColumns(n, k); + + IColumn::Permutation perm_sparse; + IColumn::Permutation perm_full; + + sparse_src->getPermutation(reverse, limit, 1, perm_sparse); + full_src->getPermutation(reverse, limit, 1, perm_full); + + auto sparse_sorted = sparse_src->permute(perm_sparse, limit); + auto full_sorted = full_src->permute(perm_full, limit); + + if (limit) + { + sparse_sorted = sparse_sorted->cut(0, limit); + full_sorted = full_sorted->cut(0, limit); + } + + if (!checkEquals(*sparse_sorted->convertToFullColumnIfSparse(), *full_sorted)) + { + DUMP_COLUMN(sparse_src); + DUMP_COLUMN(full_src); + DUMP_COLUMN(sparse_sorted); + DUMP_COLUMN(full_sorted); + throw Exception(error_code, "Sorted columns are unequal"); + } + }; + + try + { + for (size_t i = 0; i < T; ++i) + { + size_t n = rng() % MAX_ROWS + 1; + size_t k = sparse_ratios[rng() % K]; + + size_t limit = rng() % 2 ? 0 : rng() % n; + bool reverse = rng() % 2; + + test_case(n, k, limit, reverse); + } + } + catch (const Exception & e) + { + FAIL() << e.displayText(); + } +} + +#undef DUMP_COLUMN +#undef DUMP_NON_DEFAULTS diff --git a/src/Common/CMakeLists.txt b/src/Common/CMakeLists.txt index 1935fe4fed1..1e7d3591a48 100644 --- a/src/Common/CMakeLists.txt +++ b/src/Common/CMakeLists.txt @@ -1,8 +1,9 @@ add_subdirectory(StringUtils) -# after common_io -#add_subdirectory(ZooKeeper) -#add_subdirectory(ConfigProcessor) if (ENABLE_EXAMPLES) add_subdirectory(examples) endif() + +if (USE_MYSQL) + add_subdirectory (mysqlxx) +endif () diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index da7405b993f..41e9a53e50f 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -41,24 +41,6 @@ namespace ErrorCodes /// For cutting preprocessed path to this base static std::string main_config_path; -/// Extracts from a string the first encountered number consisting of at least two digits. -static std::string numberFromHost(const std::string & s) -{ - for (size_t i = 0; i < s.size(); ++i) - { - std::string res; - size_t j = i; - while (j < s.size() && isNumericASCII(s[j])) - res += s[j++]; - if (res.size() >= 2) - { - while (res[0] == '0') - res.erase(res.begin()); - return res; - } - } - return ""; -} bool ConfigProcessor::isPreprocessedFile(const std::string & path) { @@ -245,19 +227,6 @@ void ConfigProcessor::merge(XMLDocumentPtr config, XMLDocumentPtr with) mergeRecursive(config, config_root, with_root); } -static std::string layerFromHost() -{ - struct utsname buf; - if (uname(&buf)) - throw Poco::Exception(std::string("uname failed: ") + errnoToString(errno)); - - std::string layer = numberFromHost(buf.nodename); - if (layer.empty()) - throw Poco::Exception(std::string("no layer in host name: ") + buf.nodename); - - return layer; -} - void ConfigProcessor::doIncludesRecursive( XMLDocumentPtr config, XMLDocumentPtr include_from, @@ -288,18 +257,6 @@ void ConfigProcessor::doIncludesRecursive( if (node->nodeType() != Node::ELEMENT_NODE) return; - /// Substitute for the number extracted from the hostname only if there is an - /// empty tag without attributes in the original file. - if (node->nodeName() == "layer" - && !node->hasAttributes() - && !node->hasChildNodes() - && node->nodeValue().empty()) - { - NodePtr new_node = config->createTextNode(layerFromHost()); - node->appendChild(new_node); - return; - } - std::map attr_nodes; NamedNodeMapPtr attributes = node->attributes(); size_t substs_count = 0; diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 2a92a709934..04278d72303 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -59,7 +59,6 @@ public: /// 4) If zk_node_cache is non-NULL, replace elements matching the "" pattern with /// "contents of the /bar ZooKeeper node". /// If has_zk_includes is non-NULL and there are such elements, set has_zk_includes to true. - /// 5) (Yandex.Metrika-specific) Substitute "" with "layer number from the hostname". XMLDocumentPtr processConfig( bool * has_zk_includes = nullptr, zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr, diff --git a/base/base/DateLUT.cpp b/src/Common/DateLUT.cpp similarity index 100% rename from base/base/DateLUT.cpp rename to src/Common/DateLUT.cpp diff --git a/base/base/DateLUT.h b/src/Common/DateLUT.h similarity index 98% rename from base/base/DateLUT.h rename to src/Common/DateLUT.h index 31fc6b1e24b..edf09250e6a 100644 --- a/base/base/DateLUT.h +++ b/src/Common/DateLUT.h @@ -2,7 +2,7 @@ #include "DateLUTImpl.h" -#include "defines.h" +#include #include diff --git a/base/base/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp similarity index 99% rename from base/base/DateLUTImpl.cpp rename to src/Common/DateLUTImpl.cpp index bbce3b111d3..ebf32c4dbd9 100644 --- a/base/base/DateLUTImpl.cpp +++ b/src/Common/DateLUTImpl.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/base/base/DateLUTImpl.h b/src/Common/DateLUTImpl.h similarity index 99% rename from base/base/DateLUTImpl.h rename to src/Common/DateLUTImpl.h index 012d2cefe84..e52e6547fa2 100644 --- a/base/base/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -1,8 +1,8 @@ #pragma once -#include "DayNum.h" -#include "defines.h" -#include "types.h" +#include +#include +#include #include #include diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp index 18e9315d5c3..a85bbe818b5 100644 --- a/src/Common/Dwarf.cpp +++ b/src/Common/Dwarf.cpp @@ -838,7 +838,7 @@ bool Dwarf::findLocation( // The next inlined subroutine's call file and call line is the current // caller's location. - for (size_t i = 0; i < num_found - 1; i++) + for (size_t i = 0; i < num_found - 1; ++i) { call_locations[i].file = call_locations[i + 1].file; call_locations[i].line = call_locations[i + 1].line; diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 9b0e2cdc494..f2e5e018e1b 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -470,7 +470,7 @@ M(497, ACCESS_DENIED) \ M(498, LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED) \ M(499, S3_ERROR) \ - M(500, BLOB_STORAGE_ERROR) \ + M(500, AZURE_BLOB_STORAGE_ERROR) \ M(501, CANNOT_CREATE_DATABASE) \ M(502, CANNOT_SIGQUEUE) \ M(503, AGGREGATE_FUNCTION_THROW) \ @@ -603,6 +603,7 @@ M(632, UNEXPECTED_DATA_AFTER_PARSED_VALUE) \ M(633, QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW) \ M(634, MONGODB_ERROR) \ + M(635, CANNOT_POLL) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h index 94d4cee197c..f5baa1b92b3 100644 --- a/src/Common/FiberStack.h +++ b/src/Common/FiberStack.h @@ -4,7 +4,7 @@ #include #include #include - +#include #include #include #include @@ -36,7 +36,7 @@ public: explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_) { - page_size = ::sysconf(_SC_PAGESIZE); + page_size = getPageSize(); } boost::context::stack_context allocate() diff --git a/base/base/LocalDate.h b/src/Common/LocalDate.h similarity index 90% rename from base/base/LocalDate.h rename to src/Common/LocalDate.h index d199b9dbb4b..6c847ceff0e 100644 --- a/base/base/LocalDate.h +++ b/src/Common/LocalDate.h @@ -2,9 +2,8 @@ #include #include -#include #include -#include +#include /** Stores a calendar date in broken-down form (year, month, day-in-month). @@ -154,19 +153,6 @@ public: { return !(*this == other); } - - /// NOTE Inefficient. - std::string toString(char separator = '-') const - { - std::stringstream ss; - if (separator) - ss << year() << separator << (month() / 10) << (month() % 10) - << separator << (day() / 10) << (day() % 10); - else - ss << year() << (month() / 10) << (month() % 10) - << (day() / 10) << (day() % 10); - return ss.str(); - } }; static_assert(sizeof(LocalDate) == 4); diff --git a/base/base/LocalDateTime.h b/src/Common/LocalDateTime.h similarity index 98% rename from base/base/LocalDateTime.h rename to src/Common/LocalDateTime.h index 282a56ac640..fa0a680d274 100644 --- a/base/base/LocalDateTime.h +++ b/src/Common/LocalDateTime.h @@ -3,8 +3,8 @@ #include #include #include -#include -#include +#include +#include /** Stores calendar date and time in broken-down form. diff --git a/src/Common/NamePrompter.h b/src/Common/NamePrompter.h index 4a44a3adeaa..74725ede08c 100644 --- a/src/Common/NamePrompter.h +++ b/src/Common/NamePrompter.h @@ -102,6 +102,13 @@ public: return prompter.getHints(name, getAllRegisteredNames()); } + IHints() = default; + + IHints(const IHints &) = default; + IHints(IHints &&) = default; + IHints & operator=(const IHints &) = default; + IHints & operator=(IHints &&) = default; + virtual ~IHints() = default; private: diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index 8a621b8db1c..b312fbda21c 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -37,6 +37,7 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_MPROTECT; + extern const int CANNOT_ALLOCATE_MEMORY; } /** A dynamic array for POD types. @@ -104,7 +105,13 @@ protected: char * c_end_of_storage = null; /// Does not include pad_right. /// The amount of memory occupied by the num_elements of the elements. - static size_t byte_size(size_t num_elements) { return num_elements * ELEMENT_SIZE; } + static size_t byte_size(size_t num_elements) + { + size_t amount; + if (__builtin_mul_overflow(num_elements, ELEMENT_SIZE, &amount)) + throw Exception("Amount of memory requested to allocate is more than allowed", ErrorCodes::CANNOT_ALLOCATE_MEMORY); + return amount; + } /// Minimum amount of memory to allocate for num_elements, including padding. static size_t minimum_memory_for_elements(size_t num_elements) { return byte_size(num_elements) + pad_right + pad_left; } diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 982523a3ef2..878930f58d9 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -259,6 +259,8 @@ M(RemoteFSUnusedPrefetches, "Number of prefetches pending at buffer destruction") \ M(RemoteFSPrefetchedReads, "Number of reads from prefecthed buffer") \ M(RemoteFSUnprefetchedReads, "Number of reads from unprefetched buffer") \ + M(RemoteFSLazySeeks, "Number of lazy seeks") \ + M(RemoteFSSeeksWithReset, "Number of seeks which lead to a new connection") \ M(RemoteFSBuffers, "Number of buffers created for asynchronous reading from remote filesystem") \ \ M(ReadBufferSeekCancelConnection, "Number of seeks which lead to new connection (s3, http)") \ @@ -274,7 +276,8 @@ M(ThreadPoolReaderPageCacheMissElapsedMicroseconds, "Time spent reading data inside the asynchronous job in ThreadPoolReader - when read was not done from page cache.") \ \ M(AsynchronousReadWaitMicroseconds, "Time spent in waiting for asynchronous reads.") \ - + \ + M(MainConfigLoads, "Number of times the main configuration was reloaded.") \ namespace ProfileEvents { diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp index f238e976f8a..0093d72e766 100644 --- a/src/Common/ShellCommand.cpp +++ b/src/Common/ShellCommand.cpp @@ -65,14 +65,14 @@ ShellCommand::~ShellCommand() size_t try_wait_timeout = config.terminate_in_destructor_strategy.wait_for_normal_exit_before_termination_seconds; bool process_terminated_normally = tryWaitProcessWithTimeout(try_wait_timeout); - if (!process_terminated_normally) - { - LOG_TRACE(getLogger(), "Will kill shell command pid {} with SIGTERM", pid); + if (process_terminated_normally) + return; - int retcode = kill(pid, SIGTERM); - if (retcode != 0) - LOG_WARNING(getLogger(), "Cannot kill shell command pid {} errno '{}'", pid, errnoToString(retcode)); - } + LOG_TRACE(getLogger(), "Will kill shell command pid {} with SIGTERM", pid); + + int retcode = kill(pid, SIGTERM); + if (retcode != 0) + LOG_WARNING(getLogger(), "Cannot kill shell command pid {} errno '{}'", pid, errnoToString(retcode)); } else { @@ -91,7 +91,7 @@ bool ShellCommand::tryWaitProcessWithTimeout(size_t timeout_in_seconds) { int status = 0; - LOG_TRACE(getLogger(), "Try wait for shell command pid ({}) with timeout ({})", pid, timeout_in_seconds); + LOG_TRACE(getLogger(), "Try wait for shell command pid {} with timeout {}", pid, timeout_in_seconds); wait_called = true; struct timespec interval {.tv_sec = 1, .tv_nsec = 0}; @@ -119,7 +119,9 @@ bool ShellCommand::tryWaitProcessWithTimeout(size_t timeout_in_seconds) bool process_terminated_normally = (waitpid_res == pid); if (process_terminated_normally) + { return true; + } else if (waitpid_res == 0) { --timeout_in_seconds; @@ -128,7 +130,9 @@ bool ShellCommand::tryWaitProcessWithTimeout(size_t timeout_in_seconds) continue; } else if (waitpid_res == -1 && errno != EINTR) + { return false; + } } return false; @@ -155,12 +159,17 @@ std::unique_ptr ShellCommand::executeImpl( { logCommand(filename, argv); +#if !defined(USE_MUSL) /** Here it is written that with a normal call `vfork`, there is a chance of deadlock in multithreaded programs, * because of the resolving of symbols in the shared library * http://www.oracle.com/technetwork/server-storage/solaris10/subprocess-136439.html * Therefore, separate the resolving of the symbol from the call. */ static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); +#else + /// If we use Musl with static linking, there is no dlsym and no issue with vfork. + static void * real_vfork = reinterpret_cast(&vfork); +#endif if (!real_vfork) throwFromErrno("Cannot find symbol vfork in myself", ErrorCodes::CANNOT_DLSYM); diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp index 5e4c31b149e..cfeab149d30 100644 --- a/src/Common/StatusFile.cpp +++ b/src/Common/StatusFile.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index 2d875b7042d..568f633975b 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -5,6 +5,7 @@ #include #include +#include #include @@ -85,12 +86,43 @@ namespace /// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object +void updateResources(std::string_view name, const void * address, SymbolIndex::Resources & resources) +{ + const char * char_address = static_cast(address); + + if (name.starts_with("_binary_") || name.starts_with("binary_")) + { + if (name.ends_with("_start")) + { + name = name.substr((name[0] == '_') + strlen("binary_")); + name = name.substr(0, name.size() - strlen("_start")); + + resources.emplace(name, std::string_view{char_address, 0}); // NOLINT + } + else if (name.ends_with("_end")) + { + name = name.substr((name[0] == '_') + strlen("binary_")); + name = name.substr(0, name.size() - strlen("_end")); + + if (auto it = resources.find(name); it != resources.end() && it->second.empty()) + { + const char * start = it->second.data(); + assert(char_address >= start); + it->second = std::string_view{start, static_cast(char_address - start)}; + } + } + } +} + + /// Based on the code of musl-libc and the answer of Kanalpiroge on /// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture /// It does not extract all the symbols (but only public - exported and used for dynamic linking), /// but will work if we cannot find or parse ELF files. -void collectSymbolsFromProgramHeaders(dl_phdr_info * info, - std::vector & symbols) +void collectSymbolsFromProgramHeaders( + dl_phdr_info * info, + std::vector & symbols, + SymbolIndex::Resources & resources) { /* Iterate over all headers of the current shared lib * (first call is for the executable itself) @@ -184,10 +216,6 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, /* Iterate over the symbol table */ for (ElfW(Word) sym_index = 0; sym_index < ElfW(Word)(sym_cnt); ++sym_index) { - /// We are not interested in empty symbols. - if (!elf_sym[sym_index].st_size) - continue; - /* Get the name of the sym_index-th symbol. * This is located at the address of st_name relative to the beginning of the string table. */ @@ -197,10 +225,18 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, continue; SymbolIndex::Symbol symbol; - symbol.address_begin = reinterpret_cast(info->dlpi_addr + elf_sym[sym_index].st_value); - symbol.address_end = reinterpret_cast(info->dlpi_addr + elf_sym[sym_index].st_value + elf_sym[sym_index].st_size); + symbol.address_begin = reinterpret_cast( + info->dlpi_addr + elf_sym[sym_index].st_value); + symbol.address_end = reinterpret_cast( + info->dlpi_addr + elf_sym[sym_index].st_value + elf_sym[sym_index].st_size); symbol.name = sym_name; - symbols.push_back(symbol); + + /// We are not interested in empty symbols. + if (elf_sym[sym_index].st_size) + symbols.push_back(symbol); + + /// But resources can be represented by a pair of empty symbols (indicating their boundaries). + updateResources(symbol.name, symbol.address_begin, resources); } break; @@ -210,6 +246,7 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, } +#if !defined USE_MUSL String getBuildIDFromProgramHeaders(dl_phdr_info * info) { for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) @@ -222,6 +259,7 @@ String getBuildIDFromProgramHeaders(dl_phdr_info * info) } return {}; } +#endif void collectSymbolsFromELFSymbolTable( @@ -229,7 +267,8 @@ void collectSymbolsFromELFSymbolTable( const Elf & elf, const Elf::Section & symbol_table, const Elf::Section & string_table, - std::vector & symbols) + std::vector & symbols, + SymbolIndex::Resources & resources) { /// Iterate symbol table. const ElfSym * symbol_table_entry = reinterpret_cast(symbol_table.begin()); @@ -241,7 +280,6 @@ void collectSymbolsFromELFSymbolTable( { if (!symbol_table_entry->st_name || !symbol_table_entry->st_value - || !symbol_table_entry->st_size || strings + symbol_table_entry->st_name >= elf.end()) continue; @@ -252,10 +290,16 @@ void collectSymbolsFromELFSymbolTable( continue; SymbolIndex::Symbol symbol; - symbol.address_begin = reinterpret_cast(info->dlpi_addr + symbol_table_entry->st_value); - symbol.address_end = reinterpret_cast(info->dlpi_addr + symbol_table_entry->st_value + symbol_table_entry->st_size); + symbol.address_begin = reinterpret_cast( + info->dlpi_addr + symbol_table_entry->st_value); + symbol.address_end = reinterpret_cast( + info->dlpi_addr + symbol_table_entry->st_value + symbol_table_entry->st_size); symbol.name = symbol_name; - symbols.push_back(symbol); + + if (symbol_table_entry->st_size) + symbols.push_back(symbol); + + updateResources(symbol.name, symbol.address_begin, resources); } } @@ -265,7 +309,8 @@ bool searchAndCollectSymbolsFromELFSymbolTable( const Elf & elf, unsigned section_header_type, const char * string_table_name, - std::vector & symbols) + std::vector & symbols, + SymbolIndex::Resources & resources) { std::optional symbol_table; std::optional string_table; @@ -283,31 +328,45 @@ bool searchAndCollectSymbolsFromELFSymbolTable( return false; } - collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols); + collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols, resources); return true; } -void collectSymbolsFromELF(dl_phdr_info * info, +void collectSymbolsFromELF( + dl_phdr_info * info, std::vector & symbols, std::vector & objects, + SymbolIndex::Resources & resources, String & build_id) { + String object_name; + String our_build_id; + +#if defined (USE_MUSL) + object_name = "/proc/self/exe"; + our_build_id = Elf(object_name).getBuildID(); + build_id = our_build_id; +#else /// MSan does not know that the program segments in memory are initialized. __msan_unpoison_string(info->dlpi_name); - std::string object_name = info->dlpi_name; - - String our_build_id = getBuildIDFromProgramHeaders(info); + object_name = info->dlpi_name; + our_build_id = getBuildIDFromProgramHeaders(info); /// If the name is empty and there is a non-empty build-id - it's main executable. /// Find a elf file for the main executable and set the build-id. if (object_name.empty()) { object_name = "/proc/self/exe"; + + if (our_build_id.empty()) + our_build_id = Elf(object_name).getBuildID(); + if (build_id.empty()) build_id = our_build_id; } +#endif std::error_code ec; std::filesystem::path canonical_path = std::filesystem::canonical(object_name, ec); @@ -377,10 +436,12 @@ void collectSymbolsFromELF(dl_phdr_info * info, object.name = object_name; objects.push_back(std::move(object)); - searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols); + searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols, resources); - /// Unneeded because they were parsed from "program headers" of loaded objects. - //searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols); + /// Unneeded if they were parsed from "program headers" of loaded objects. +#if defined USE_MUSL + searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols, resources); +#endif } @@ -392,8 +453,8 @@ int collectSymbols(dl_phdr_info * info, size_t, void * data_ptr) { SymbolIndex::Data & data = *reinterpret_cast(data_ptr); - collectSymbolsFromProgramHeaders(info, data.symbols); - collectSymbolsFromELF(info, data.symbols, data.objects, data.build_id); + collectSymbolsFromProgramHeaders(info, data.symbols, data.resources); + collectSymbolsFromELF(info, data.symbols, data.objects, data.resources, data.build_id); /* Continue iterations */ return 0; @@ -424,7 +485,7 @@ const T * find(const void * address, const std::vector & vec) void SymbolIndex::update() { - dl_iterate_phdr(collectSymbols, &data.symbols); + dl_iterate_phdr(collectSymbols, &data); std::sort(data.objects.begin(), data.objects.end(), [](const Object & a, const Object & b) { return a.address_begin < b.address_begin; }); std::sort(data.symbols.begin(), data.symbols.end(), [](const Symbol & a, const Symbol & b) { return a.address_begin < b.address_begin; }); diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h index 37862987bd2..7c542980099 100644 --- a/src/Common/SymbolIndex.h +++ b/src/Common/SymbolIndex.h @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -47,15 +48,27 @@ public: const std::vector & symbols() const { return data.symbols; } const std::vector & objects() const { return data.objects; } + std::string_view getResource(String name) const + { + if (auto it = data.resources.find(name); it != data.resources.end()) + return it->second; + return {}; + } + /// The BuildID that is generated by compiler. String getBuildID() const { return data.build_id; } String getBuildIDHex() const; + using Resources = std::unordered_map; + struct Data { std::vector symbols; std::vector objects; String build_id; + + /// Resources (embedded binary data) are located by symbols in form of _binary_name_start and _binary_name_end. + Resources resources; }; private: Data data; diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index c976e4ca16a..411f725f2db 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -72,6 +72,24 @@ static thread_local bool has_alt_stack = false; #endif +std::vector ThreadGroupStatus::getProfileEventsCountersAndMemoryForThreads() +{ + std::lock_guard guard(mutex); + + /// It is OK to move it, since it is enough to report statistics for the thread at least once. + auto stats = std::move(finished_threads_counters_memory); + for (auto * thread : threads) + { + stats.emplace_back(ProfileEventsCountersAndMemory{ + thread->performance_counters.getPartiallyAtomicSnapshot(), + thread->memory_tracker.get(), + thread->thread_id, + }); + } + + return stats; +} + ThreadStatus::ThreadStatus() : thread_id{getThreadId()} { @@ -139,11 +157,17 @@ ThreadStatus::~ThreadStatus() { /// It's a minor tracked memory leak here (not the memory itself but it's counter). /// We've already allocated a little bit more than the limit and cannot track it in the thread memory tracker or its parent. + tryLogCurrentException(log); } if (thread_group) { std::lock_guard guard(thread_group->mutex); + thread_group->finished_threads_counters_memory.emplace_back(ThreadGroupStatus::ProfileEventsCountersAndMemory{ + performance_counters.getPartiallyAtomicSnapshot(), + memory_tracker.get(), + thread_id, + }); thread_group->threads.erase(this); } diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 97ddda1ea30..f3920474111 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -61,6 +61,13 @@ using ThreadStatusPtr = ThreadStatus *; class ThreadGroupStatus { public: + struct ProfileEventsCountersAndMemory + { + ProfileEvents::Counters::Snapshot counters; + Int64 memory_usage; + UInt64 thread_id; + }; + mutable std::mutex mutex; ProfileEvents::Counters performance_counters{VariableContext::Process}; @@ -83,6 +90,10 @@ public: String query; UInt64 normalized_query_hash = 0; + + std::vector finished_threads_counters_memory; + + std::vector getProfileEventsCountersAndMemoryForThreads(); }; using ThreadGroupStatusPtr = std::shared_ptr; diff --git a/src/Common/TraceCollector.cpp b/src/Common/TraceCollector.cpp index d84202449d1..523251fa2a2 100644 --- a/src/Common/TraceCollector.cpp +++ b/src/Common/TraceCollector.cpp @@ -153,7 +153,7 @@ void TraceCollector::run() Array trace; trace.reserve(trace_size); - for (size_t i = 0; i < trace_size; i++) + for (size_t i = 0; i < trace_size; ++i) { uintptr_t addr = 0; readPODBinary(addr, in); diff --git a/base/base/getResource.cpp b/src/Common/getResource.cpp similarity index 80% rename from base/base/getResource.cpp rename to src/Common/getResource.cpp index 6682ae0a01f..fe603fcc550 100644 --- a/base/base/getResource.cpp +++ b/src/Common/getResource.cpp @@ -1,8 +1,9 @@ #include "getResource.h" -#include "unaligned.h" #include #include #include +#include + std::string_view getResource(std::string_view name) { @@ -13,6 +14,11 @@ std::string_view getResource(std::string_view name) std::replace(name_replaced.begin(), name_replaced.end(), '.', '_'); boost::replace_all(name_replaced, "+", "_PLUS_"); +#if defined USE_MUSL + /// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself. + return DB::SymbolIndex::instance()->getResource(name_replaced); + +#else // In most `dlsym(3)` APIs, one passes the symbol name as it appears via // something like `nm` or `objdump -t`. For example, a symbol `_foo` would be // looked up with the string `"_foo"`. @@ -33,8 +39,8 @@ std::string_view getResource(std::string_view name) std::string symbol_name_start = prefix + name_replaced + "_start"; std::string symbol_name_end = prefix + name_replaced + "_end"; - const char* sym_start = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_start.c_str())); - const char* sym_end = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_end.c_str())); + const char * sym_start = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_start.c_str())); + const char * sym_end = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_end.c_str())); if (sym_start && sym_end) { @@ -42,4 +48,5 @@ std::string_view getResource(std::string_view name) return { sym_start, resource_size }; } return {}; +#endif } diff --git a/base/base/getResource.h b/src/Common/getResource.h similarity index 100% rename from base/base/getResource.h rename to src/Common/getResource.h diff --git a/src/Common/mysqlxx/CMakeLists.txt b/src/Common/mysqlxx/CMakeLists.txt new file mode 100644 index 00000000000..76005651e61 --- /dev/null +++ b/src/Common/mysqlxx/CMakeLists.txt @@ -0,0 +1,24 @@ +add_library (mysqlxx + Connection.cpp + Exception.cpp + Query.cpp + ResultBase.cpp + UseQueryResult.cpp + Row.cpp + Value.cpp + Pool.cpp + PoolFactory.cpp + PoolWithFailover.cpp +) + +target_include_directories (mysqlxx PUBLIC .) + +target_link_libraries (mysqlxx + clickhouse_common_io + ${MYSQLCLIENT_LIBRARIES} + ${ZLIB_LIBRARIES} +) + +if (ENABLE_TESTS) + add_subdirectory (tests) +endif () diff --git a/base/mysqlxx/Connection.cpp b/src/Common/mysqlxx/Connection.cpp similarity index 100% rename from base/mysqlxx/Connection.cpp rename to src/Common/mysqlxx/Connection.cpp diff --git a/base/mysqlxx/Exception.cpp b/src/Common/mysqlxx/Exception.cpp similarity index 70% rename from base/mysqlxx/Exception.cpp rename to src/Common/mysqlxx/Exception.cpp index 188e7bd740d..0f5320da754 100644 --- a/base/mysqlxx/Exception.cpp +++ b/src/Common/mysqlxx/Exception.cpp @@ -4,6 +4,7 @@ #include #endif #include +#include namespace mysqlxx @@ -11,11 +12,7 @@ namespace mysqlxx std::string errorMessage(MYSQL * driver) { - std::stringstream res; - res << mysql_error(driver) - << " (" << (driver->host ? driver->host : "(nullptr)") - << ":" << driver->port << ")"; - return res.str(); + return fmt::format("{} ({}:{})", mysql_error(driver), driver->host ? driver->host : "(nullptr)", driver->port); } void checkError(MYSQL * driver) diff --git a/base/mysqlxx/Pool.cpp b/src/Common/mysqlxx/Pool.cpp similarity index 100% rename from base/mysqlxx/Pool.cpp rename to src/Common/mysqlxx/Pool.cpp diff --git a/base/mysqlxx/PoolFactory.cpp b/src/Common/mysqlxx/PoolFactory.cpp similarity index 100% rename from base/mysqlxx/PoolFactory.cpp rename to src/Common/mysqlxx/PoolFactory.cpp diff --git a/base/mysqlxx/PoolWithFailover.cpp b/src/Common/mysqlxx/PoolWithFailover.cpp similarity index 93% rename from base/mysqlxx/PoolWithFailover.cpp rename to src/Common/mysqlxx/PoolWithFailover.cpp index 14c0db9ecd5..e4da07c7a79 100644 --- a/base/mysqlxx/PoolWithFailover.cpp +++ b/src/Common/mysqlxx/PoolWithFailover.cpp @@ -3,13 +3,8 @@ #include #include #include - - -/// Duplicate of code from StringUtils.h. Copied here for less dependencies. -static bool startsWith(const std::string & s, const char * prefix) -{ - return s.size() >= strlen(prefix) && 0 == memcmp(s.data(), prefix, strlen(prefix)); -} +#include +#include using namespace mysqlxx; @@ -31,7 +26,7 @@ PoolWithFailover::PoolWithFailover( for (const auto & replica_config_key : replica_keys) { /// There could be another elements in the same level in configuration file, like "password", "port"... - if (startsWith(replica_config_key, "replica")) + if (replica_config_key.starts_with("replica")) { std::string replica_name = config_name_ + "." + replica_config_key; @@ -82,7 +77,9 @@ PoolWithFailover::PoolWithFailover( unsigned default_connections_, unsigned max_connections_, size_t max_tries_, - uint64_t wait_timeout_) + uint64_t wait_timeout_, + size_t connect_timeout_, + size_t rw_timeout_) : max_tries(max_tries_) , shareable(false) , wait_timeout(wait_timeout_) @@ -93,8 +90,8 @@ PoolWithFailover::PoolWithFailover( replicas_by_priority[0].emplace_back(std::make_shared(database, host, user, password, port, /* socket_ = */ "", - MYSQLXX_DEFAULT_TIMEOUT, - MYSQLXX_DEFAULT_RW_TIMEOUT, + connect_timeout_, + rw_timeout_, default_connections_, max_connections_)); } @@ -179,7 +176,7 @@ PoolWithFailover::Entry PoolWithFailover::get() return (*full_pool)->get(wait_timeout); } - std::stringstream message; + DB::WriteBufferFromOwnString message; message << "Connections to all replicas failed: "; for (auto it = replicas_by_priority.begin(); it != replicas_by_priority.end(); ++it) for (auto jt = it->second.begin(); jt != it->second.end(); ++jt) diff --git a/base/mysqlxx/Query.cpp b/src/Common/mysqlxx/Query.cpp similarity index 82% rename from base/mysqlxx/Query.cpp rename to src/Common/mysqlxx/Query.cpp index d4514c3e988..11e72e14f9d 100644 --- a/base/mysqlxx/Query.cpp +++ b/src/Common/mysqlxx/Query.cpp @@ -21,10 +21,7 @@ Query::Query(Connection * conn_, const std::string & query_string) : conn(conn_) /// Важно в случае, если Query используется не из того же потока, что Connection. mysql_thread_init(); - if (!query_string.empty()) - query_buf << query_string; - - query_buf.imbue(std::locale::classic()); + query = query_string; } Query::Query(const Query & other) : conn(other.conn) @@ -32,9 +29,7 @@ Query::Query(const Query & other) : conn(other.conn) /// Важно в случае, если Query используется не из того же потока, что Connection. mysql_thread_init(); - query_buf.imbue(std::locale::classic()); - - *this << other.str(); + query = other.query; } Query & Query::operator= (const Query & other) @@ -43,8 +38,7 @@ Query & Query::operator= (const Query & other) return *this; conn = other.conn; - - query_buf.str(other.str()); + query = other.query; return *this; } @@ -54,20 +48,13 @@ Query::~Query() mysql_thread_end(); } -void Query::reset() -{ - query_buf.str({}); -} - void Query::executeImpl() { - std::string query_string = query_buf.str(); - MYSQL* mysql_driver = conn->getDriver(); auto & logger = Poco::Logger::get("mysqlxx::Query"); logger.trace("Running MySQL query using connection %lu", mysql_thread_id(mysql_driver)); - if (mysql_real_query(mysql_driver, query_string.data(), query_string.size())) + if (mysql_real_query(mysql_driver, query.data(), query.size())) { const auto err_no = mysql_errno(mysql_driver); switch (err_no) diff --git a/base/mysqlxx/ResultBase.cpp b/src/Common/mysqlxx/ResultBase.cpp similarity index 100% rename from base/mysqlxx/ResultBase.cpp rename to src/Common/mysqlxx/ResultBase.cpp diff --git a/base/mysqlxx/Row.cpp b/src/Common/mysqlxx/Row.cpp similarity index 66% rename from base/mysqlxx/Row.cpp rename to src/Common/mysqlxx/Row.cpp index aecec46e519..861a04f8ece 100644 --- a/base/mysqlxx/Row.cpp +++ b/src/Common/mysqlxx/Row.cpp @@ -21,4 +21,12 @@ Value Row::operator[] (const char * name) const throw Exception(std::string("Unknown column ") + name); } +enum enum_field_types Row::getFieldType(size_t i) +{ + if (i >= res->getNumFields()) + throw Exception(std::string("Array Index Overflow")); + MYSQL_FIELDS fields = res->getFields(); + return fields[i].type; +} + } diff --git a/base/mysqlxx/UseQueryResult.cpp b/src/Common/mysqlxx/UseQueryResult.cpp similarity index 100% rename from base/mysqlxx/UseQueryResult.cpp rename to src/Common/mysqlxx/UseQueryResult.cpp diff --git a/base/mysqlxx/Value.cpp b/src/Common/mysqlxx/Value.cpp similarity index 93% rename from base/mysqlxx/Value.cpp rename to src/Common/mysqlxx/Value.cpp index ed66167e8ea..85b63b722a2 100644 --- a/base/mysqlxx/Value.cpp +++ b/src/Common/mysqlxx/Value.cpp @@ -156,19 +156,21 @@ void Value::throwException(const char * text) const { static constexpr size_t preview_length = 1000; - std::stringstream info; - info << text; + std::string info(text); if (!isNull()) { - info << ": "; - info.write(m_data, m_length); + info.append(": "); + info.append(m_data, m_length); } if (res && res->getQuery()) - info << ", query: " << res->getQuery()->str().substr(0, preview_length); + { + info.append(", query: "); + info.append(res->getQuery()->str().substr(0, preview_length)); + } - throw CannotParseValue(info.str()); + throw CannotParseValue(info); } } diff --git a/base/mysqlxx/Connection.h b/src/Common/mysqlxx/mysqlxx/Connection.h similarity index 99% rename from base/mysqlxx/Connection.h rename to src/Common/mysqlxx/mysqlxx/Connection.h index 65955136eb1..548e75a1fef 100644 --- a/base/mysqlxx/Connection.h +++ b/src/Common/mysqlxx/mysqlxx/Connection.h @@ -154,7 +154,7 @@ public: bool ping(); /// Creates query. It can be set with query string or later. - Query query(const std::string & str = ""); + Query query(const std::string & str); /// Get MySQL C API MYSQL object. MYSQL * getDriver(); diff --git a/base/mysqlxx/Exception.h b/src/Common/mysqlxx/mysqlxx/Exception.h similarity index 100% rename from base/mysqlxx/Exception.h rename to src/Common/mysqlxx/mysqlxx/Exception.h diff --git a/base/mysqlxx/Null.h b/src/Common/mysqlxx/mysqlxx/Null.h similarity index 100% rename from base/mysqlxx/Null.h rename to src/Common/mysqlxx/mysqlxx/Null.h diff --git a/base/mysqlxx/Pool.h b/src/Common/mysqlxx/mysqlxx/Pool.h similarity index 100% rename from base/mysqlxx/Pool.h rename to src/Common/mysqlxx/mysqlxx/Pool.h diff --git a/base/mysqlxx/PoolFactory.h b/src/Common/mysqlxx/mysqlxx/PoolFactory.h similarity index 100% rename from base/mysqlxx/PoolFactory.h rename to src/Common/mysqlxx/mysqlxx/PoolFactory.h diff --git a/base/mysqlxx/PoolWithFailover.h b/src/Common/mysqlxx/mysqlxx/PoolWithFailover.h similarity index 93% rename from base/mysqlxx/PoolWithFailover.h rename to src/Common/mysqlxx/mysqlxx/PoolWithFailover.h index 2bd5ec9f30a..17870d141e1 100644 --- a/base/mysqlxx/PoolWithFailover.h +++ b/src/Common/mysqlxx/mysqlxx/PoolWithFailover.h @@ -6,6 +6,7 @@ #define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS 1 #define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS 16 #define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3 +#define MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_CONNECTION_WAIT_TIMEOUT 5 /// in seconds namespace mysqlxx @@ -121,7 +122,9 @@ namespace mysqlxx unsigned default_connections_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, unsigned max_connections_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS, size_t max_tries_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, - uint64_t wait_timeout_ = UINT64_MAX); + uint64_t wait_timeout_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_CONNECTION_WAIT_TIMEOUT, + size_t connect_timeout = MYSQLXX_DEFAULT_TIMEOUT, + size_t rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT); PoolWithFailover(const PoolWithFailover & other); diff --git a/base/mysqlxx/Query.h b/src/Common/mysqlxx/mysqlxx/Query.h similarity index 74% rename from base/mysqlxx/Query.h rename to src/Common/mysqlxx/mysqlxx/Query.h index 036e8952bc3..49aa3f223e7 100644 --- a/base/mysqlxx/Query.h +++ b/src/Common/mysqlxx/mysqlxx/Query.h @@ -13,9 +13,7 @@ namespace mysqlxx * Ссылается на Connection. Если уничтожить Connection, то Query станет некорректным и пользоваться им будет нельзя. * * Пример использования: - * mysqlxx::Query query = connection.query(); - * query << "SELECT 1 AS x, 2 AS y, 3 AS z"; - * query << " LIMIT 1"; + * mysqlxx::Query query = connection.query("SELECT 1 AS x, 2 AS y, 3 AS z LIMIT 1"); * mysqlxx::UseQueryResult result = query.use(); * * while (mysqlxx::Row row = result.fetch()) @@ -29,14 +27,11 @@ namespace mysqlxx class Query { public: - Query(Connection * conn_, const std::string & query_string = ""); + Query(Connection * conn_, const std::string & query_string); Query(const Query & other); Query & operator= (const Query & other); ~Query(); - /** Сбросить текст запроса. Это используется, если нужно написать новый запрос в том же объекте. */ - void reset(); - /** Выполнить запрос, результат которого не имеет значения (почти всё кроме SELECT). */ void execute(); @@ -54,24 +49,12 @@ public: /// Получить текст запроса (например, для вывода его в лог). См. ещё operator<< ниже. std::string str() const { - return query_buf.str(); - } - - auto rdbuf() const - { - return query_buf.rdbuf(); - } - - template - inline Query & operator<< (T && x) - { - query_buf << std::forward(x); - return *this; + return query; } private: Connection * conn; - std::ostringstream query_buf; + std::string query; void executeImpl(); }; @@ -80,7 +63,7 @@ private: /// Вывести текст запроса в ostream. inline std::ostream & operator<< (std::ostream & ostr, const Query & query) { - return ostr << query.rdbuf(); + return ostr << query.str(); } diff --git a/base/mysqlxx/ResultBase.h b/src/Common/mysqlxx/mysqlxx/ResultBase.h similarity index 100% rename from base/mysqlxx/ResultBase.h rename to src/Common/mysqlxx/mysqlxx/ResultBase.h diff --git a/base/mysqlxx/Row.h b/src/Common/mysqlxx/mysqlxx/Row.h similarity index 98% rename from base/mysqlxx/Row.h rename to src/Common/mysqlxx/mysqlxx/Row.h index d668fdbd29a..b11d7d628ef 100644 --- a/base/mysqlxx/Row.h +++ b/src/Common/mysqlxx/mysqlxx/Row.h @@ -79,6 +79,8 @@ public: */ operator private_bool_type() const { return row == nullptr ? nullptr : &Row::row; } + enum enum_field_types getFieldType(size_t i); + private: MYSQL_ROW row{}; ResultBase * res{}; diff --git a/base/mysqlxx/Transaction.h b/src/Common/mysqlxx/mysqlxx/Transaction.h similarity index 100% rename from base/mysqlxx/Transaction.h rename to src/Common/mysqlxx/mysqlxx/Transaction.h diff --git a/base/mysqlxx/Types.h b/src/Common/mysqlxx/mysqlxx/Types.h similarity index 94% rename from base/mysqlxx/Types.h rename to src/Common/mysqlxx/mysqlxx/Types.h index b5ed70916fa..5fd9aa8bbc8 100644 --- a/base/mysqlxx/Types.h +++ b/src/Common/mysqlxx/mysqlxx/Types.h @@ -16,6 +16,8 @@ using MYSQL_ROW = char**; struct st_mysql_field; using MYSQL_FIELD = st_mysql_field; +enum struct enum_field_types; + #endif namespace mysqlxx diff --git a/base/mysqlxx/UseQueryResult.h b/src/Common/mysqlxx/mysqlxx/UseQueryResult.h similarity index 100% rename from base/mysqlxx/UseQueryResult.h rename to src/Common/mysqlxx/mysqlxx/UseQueryResult.h diff --git a/base/mysqlxx/Value.h b/src/Common/mysqlxx/mysqlxx/Value.h similarity index 99% rename from base/mysqlxx/Value.h rename to src/Common/mysqlxx/mysqlxx/Value.h index 6d3b2e96ebd..797a65a63f9 100644 --- a/base/mysqlxx/Value.h +++ b/src/Common/mysqlxx/mysqlxx/Value.h @@ -10,10 +10,10 @@ #include #include -#include +#include #include -#include +#include namespace mysqlxx diff --git a/base/mysqlxx/mysqlxx.h b/src/Common/mysqlxx/mysqlxx/mysqlxx.h similarity index 94% rename from base/mysqlxx/mysqlxx.h rename to src/Common/mysqlxx/mysqlxx/mysqlxx.h index 0caadcbb720..785d4361fd7 100644 --- a/base/mysqlxx/mysqlxx.h +++ b/src/Common/mysqlxx/mysqlxx/mysqlxx.h @@ -3,8 +3,8 @@ #include #include #include -#include -#include +#include +#include #include @@ -23,7 +23,7 @@ * where values are stored consecutively as (non-zero-terminated) strings. * * 2. Too slow methods for converting values to numbers. - * In mysql++, it is done through std::stringstream. + * In mysql++, it is done through std::s*****stream (it is banned in our codebase). * This is slower than POSIX functions (strtoul, etc). * In turn, this is slower than simple hand-coded functions, * that doesn't respect locales and unused by MySQL number representations. diff --git a/base/mysqlxx/tests/CMakeLists.txt b/src/Common/mysqlxx/tests/CMakeLists.txt similarity index 100% rename from base/mysqlxx/tests/CMakeLists.txt rename to src/Common/mysqlxx/tests/CMakeLists.txt diff --git a/base/mysqlxx/tests/mysqlxx_pool_test.cpp b/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp similarity index 90% rename from base/mysqlxx/tests/mysqlxx_pool_test.cpp rename to src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp index 3dc23e4da85..61d6a117285 100644 --- a/base/mysqlxx/tests/mysqlxx_pool_test.cpp +++ b/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp @@ -2,7 +2,6 @@ #include #include -#include #include @@ -41,10 +40,7 @@ mysqlxx::Pool::Entry getWithFailover(mysqlxx::Pool & connections_pool) std::this_thread::sleep_for(1s); } - std::stringstream message; - message << "Connections to all replicas failed: " << connections_pool.getDescription(); - - throw Poco::Exception(message.str()); + throw Poco::Exception("Connections to all replicas failed: " + connections_pool.getDescription()); } } @@ -69,8 +65,7 @@ int main(int, char **) std::clog << "Preparing query (5s sleep) ..."; std::this_thread::sleep_for(5s); - mysqlxx::Query query = worker->query(); - query << test_query; + mysqlxx::Query query = worker->query(test_query); std::clog << "ok" << std::endl; std::clog << "Querying result (5s sleep) ..."; diff --git a/src/Common/parseRemoteDescription.cpp b/src/Common/parseRemoteDescription.cpp index 7c8053037ea..fa5d3a8fbd5 100644 --- a/src/Common/parseRemoteDescription.cpp +++ b/src/Common/parseRemoteDescription.cpp @@ -41,7 +41,7 @@ static void append(std::vector & to, const std::vector & what, s static bool parseNumber(const String & description, size_t l, size_t r, size_t & res) { res = 0; - for (size_t pos = l; pos < r; pos ++) + for (size_t pos = l; pos < r; ++pos) { if (!isNumericASCII(description[pos])) return false; diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index a621f05c517..1220c50b409 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include diff --git a/src/Common/tests/gtest_local_date_time_comparison.cpp b/src/Common/tests/gtest_local_date_time_comparison.cpp index 9f66da51c94..8aea710ea55 100644 --- a/src/Common/tests/gtest_local_date_time_comparison.cpp +++ b/src/Common/tests/gtest_local_date_time_comparison.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include void fillStackWithGarbage() diff --git a/src/Compression/CompressionFactoryAdditions.cpp b/src/Compression/CompressionFactoryAdditions.cpp index b5f00c60827..d87d0f8b4ee 100644 --- a/src/Compression/CompressionFactoryAdditions.cpp +++ b/src/Compression/CompressionFactoryAdditions.cpp @@ -117,7 +117,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( }; ISerialization::SubstreamPath path; - column_type->getDefaultSerialization()->enumerateStreams(path, callback, column_type, nullptr); + column_type->getDefaultSerialization()->enumerateStreams(path, callback, column_type); if (!result_codec) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find any substream with data type for type {}. It's a bug", column_type->getName()); diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 2c730ee16ed..74e093284a8 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -252,7 +252,7 @@ public: catch (const Exception & ex) { if (ex.code() == ErrorCodes::UNKNOWN_FORMAT_VERSION) - throw ex; + throw; result.error = true; LOG_WARNING(log, "Cannot completely read changelog on path {}, error: {}", filepath, ex.message()); diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 171fa2986eb..82ea100bccb 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -75,6 +75,17 @@ std::string checkAndGetSuperdigest(const String & user_and_digest) return user_and_digest; } +int32_t getValueOrMaxInt32AndLogWarning(uint64_t value, const std::string & name, Poco::Logger * log) +{ + if (value > std::numeric_limits::max()) + { + LOG_WARNING(log, "Got {} value for setting '{}' which is bigger than int32_t max value, lowering value to {}.", value, name, std::numeric_limits::max()); + return std::numeric_limits::max(); + } + + return static_cast(value); +} + } KeeperServer::KeeperServer( @@ -134,18 +145,18 @@ void KeeperServer::startup() } nuraft::raft_params params; - params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds(); - params.election_timeout_lower_bound_ = coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds(); - params.election_timeout_upper_bound_ = coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds(); - - params.reserved_log_items_ = coordination_settings->reserved_log_items; - params.snapshot_distance_ = coordination_settings->snapshot_distance; - params.stale_log_gap_ = coordination_settings->stale_log_gap; - params.fresh_log_gap_ = coordination_settings->fresh_log_gap; - params.client_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds(); + params.heart_beat_interval_ = getValueOrMaxInt32AndLogWarning(coordination_settings->heart_beat_interval_ms.totalMilliseconds(), "heart_beat_interval_ms", log); + params.election_timeout_lower_bound_ = getValueOrMaxInt32AndLogWarning(coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds(), "election_timeout_lower_bound_ms", log); + params.election_timeout_upper_bound_ = getValueOrMaxInt32AndLogWarning(coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds(), "election_timeout_upper_bound_ms", log); + params.reserved_log_items_ = getValueOrMaxInt32AndLogWarning(coordination_settings->reserved_log_items, "reserved_log_items", log); + params.snapshot_distance_ = getValueOrMaxInt32AndLogWarning(coordination_settings->snapshot_distance, "snapshot_distance", log); + params.stale_log_gap_ = getValueOrMaxInt32AndLogWarning(coordination_settings->stale_log_gap, "stale_log_gap", log); + params.fresh_log_gap_ = getValueOrMaxInt32AndLogWarning(coordination_settings->fresh_log_gap, "fresh_log_gap", log); + params.client_req_timeout_ = getValueOrMaxInt32AndLogWarning(coordination_settings->operation_timeout_ms.totalMilliseconds(), "operation_timeout_ms", log); params.auto_forwarding_ = coordination_settings->auto_forwarding; - params.auto_forwarding_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds() * 2; - params.max_append_size_ = coordination_settings->max_requests_batch_size; + params.auto_forwarding_req_timeout_ = std::max(coordination_settings->operation_timeout_ms.totalMilliseconds() * 2, std::numeric_limits::max()); + params.auto_forwarding_req_timeout_ = getValueOrMaxInt32AndLogWarning(coordination_settings->operation_timeout_ms.totalMilliseconds() * 2, "operation_timeout_ms", log); + params.max_append_size_ = getValueOrMaxInt32AndLogWarning(coordination_settings->max_requests_batch_size, "max_requests_batch_size", log); params.return_method_ = nuraft::raft_params::async_handler; diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index be6d4db4219..518d569ca67 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -55,7 +55,7 @@ namespace return "/"; } - void writeNode(const KeeperStorage::Node & node, WriteBuffer & out) + void writeNode(const KeeperStorage::Node & node, SnapshotVersion version, WriteBuffer & out) { writeBinary(node.data, out); @@ -76,6 +76,11 @@ namespace writeBinary(node.stat.pzxid, out); writeBinary(node.seq_num, out); + + if (version >= SnapshotVersion::V4) + { + writeBinary(node.size_bytes, out); + } } void readNode(KeeperStorage::Node & node, ReadBuffer & in, SnapshotVersion version, ACLMap & acl_map) @@ -124,6 +129,11 @@ namespace readBinary(node.stat.numChildren, in); readBinary(node.stat.pzxid, in); readBinary(node.seq_num, in); + + if (version >= SnapshotVersion::V4) + { + readBinary(node.size_bytes, in); + } } void serializeSnapshotMetadata(const SnapshotMetadataPtr & snapshot_meta, WriteBuffer & out) @@ -176,7 +186,7 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to serialize node with mzxid {}, but last snapshot index {}", node.stat.mzxid, snapshot.snapshot_meta->get_last_log_idx()); writeBinary(path, out); - writeNode(node, out); + writeNode(node, snapshot.version, out); /// Last iteration: check and exit here without iterator increment. Otherwise /// false positive race condition on list end is possible. diff --git a/src/Coordination/KeeperSnapshotManager.h b/src/Coordination/KeeperSnapshotManager.h index 2889ec493df..174864a0ceb 100644 --- a/src/Coordination/KeeperSnapshotManager.h +++ b/src/Coordination/KeeperSnapshotManager.h @@ -18,9 +18,10 @@ enum SnapshotVersion : uint8_t V1 = 1, /// with ACL map V2 = 2, /// with 64 bit buffer header V3 = 3, /// compress snapshots with ZSTD codec + V4 = 4, /// add Node size to snapshots }; -static constexpr auto CURRENT_SNAPSHOT_VERSION = SnapshotVersion::V3; +static constexpr auto CURRENT_SNAPSHOT_VERSION = SnapshotVersion::V4; /// What is stored in binary shapsnot struct SnapshotDeserializationResult diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 81bb3d0dd7d..a64a7d425f6 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -91,8 +91,7 @@ static bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, c static bool fixupACL( const std::vector & request_acls, const std::vector & current_ids, - std::vector & result_acls, - bool hash_acls) + std::vector & result_acls) { if (request_acls.empty()) return true; @@ -125,29 +124,12 @@ static bool fixupACL( return false; valid_found = true; - if (hash_acls) - new_acl.id = generateDigest(new_acl.id); result_acls.push_back(new_acl); } } return valid_found; } -uint64_t KeeperStorage::Node::sizeInBytes() const -{ - uint64_t total_size{0}; - for (const auto & child : children) - total_size += child.size(); - - total_size += data.size(); - - total_size += sizeof(acl_id); - total_size += sizeof(is_sequental); - total_size += sizeof(stat); - total_size += sizeof(seq_num); - return total_size; -} - static KeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type) { KeeperStorage::ResponsesForSessions result; @@ -325,7 +307,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr KeeperStorage::Node created_node; Coordination::ACLs node_acls; - if (!fixupACL(request.acls, session_auth_ids, node_acls, !request.restored_from_zookeeper_log)) + if (!fixupACL(request.acls, session_auth_ids, node_acls)) { response.error = Coordination::Error::ZINVALIDACL; return {response_ptr, {}}; @@ -354,6 +336,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr { parent.children.insert(child_path); + parent.size_bytes += child_path.size(); prev_parent_cversion = parent.stat.cversion; prev_parent_zxid = parent.stat.pzxid; @@ -391,6 +374,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr undo_parent.stat.cversion = prev_parent_cversion; undo_parent.stat.pzxid = prev_parent_zxid; undo_parent.children.erase(child_path); + undo_parent.size_bytes -= child_path.size(); }); }; @@ -524,6 +508,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr --parent.stat.numChildren; ++parent.stat.cversion; parent.children.erase(child_basename); + parent.size_bytes -= child_basename.size(); }); response.error = Coordination::Error::ZOK; @@ -543,6 +528,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr ++parent.stat.numChildren; --parent.stat.cversion; parent.children.insert(child_basename); + parent.size_bytes += child_basename.size(); }); }; } @@ -621,11 +607,11 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce auto itr = container.updateValue(request.path, [zxid, request] (KeeperStorage::Node & value) { - value.data = request.data; value.stat.version++; value.stat.mzxid = zxid; value.stat.mtime = std::chrono::system_clock::now().time_since_epoch() / std::chrono::milliseconds(1); value.stat.dataLength = request.data.length(); + value.size_bytes = value.size_bytes + request.data.size() - value.data.size(); value.data = request.data; }); @@ -789,7 +775,7 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr auto & session_auth_ids = storage.session_and_auth[session_id]; Coordination::ACLs node_acls; - if (!fixupACL(request.acls, session_auth_ids, node_acls, !request.restored_from_zookeeper_log)) + if (!fixupACL(request.acls, session_auth_ids, node_acls)) { response.error = Coordination::Error::ZINVALIDACL; return {response_ptr, {}}; @@ -1110,6 +1096,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina --parent.stat.numChildren; ++parent.stat.cversion; parent.children.erase(getBaseName(ephemeral_path)); + parent.size_bytes -= getBaseName(ephemeral_path).size(); }); auto responses = processWatchesImpl(ephemeral_path, watches, list_watches, Coordination::Event::DELETED); diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index 44dc1b2b43b..f61b17a88a6 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -35,9 +35,22 @@ public: Coordination::Stat stat{}; int32_t seq_num = 0; ChildrenSet children{}; + uint64_t size_bytes; // save size to avoid calculate every time + Node() + { + size_bytes = sizeof(size_bytes); + size_bytes += data.size(); + size_bytes += sizeof(acl_id); + size_bytes += sizeof(is_sequental); + size_bytes += sizeof(stat); + size_bytes += sizeof(seq_num); + } /// Object memory size - uint64_t sizeInBytes() const; + uint64_t sizeInBytes() const + { + return size_bytes; + } }; struct ResponseForSession diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index b324ba119fa..d274ee34a88 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -977,24 +977,24 @@ TEST_P(CoordinationTest, SnapshotableHashMapDataSize) world.disableSnapshotMode(); world.insert("world", n1); - EXPECT_EQ(world.getApproximateDataSize(), 94); + EXPECT_EQ(world.getApproximateDataSize(), 98); world.updateValue("world", [&](Node & value) { value = n2; }); - EXPECT_EQ(world.getApproximateDataSize(), 96); + EXPECT_EQ(world.getApproximateDataSize(), 98); world.erase("world"); EXPECT_EQ(world.getApproximateDataSize(), 0); world.enableSnapshotMode(); world.insert("world", n1); - EXPECT_EQ(world.getApproximateDataSize(), 94); + EXPECT_EQ(world.getApproximateDataSize(), 98); world.updateValue("world", [&](Node & value) { value = n2; }); - EXPECT_EQ(world.getApproximateDataSize(), 190); + EXPECT_EQ(world.getApproximateDataSize(), 196); world.clearOutdatedNodes(); - EXPECT_EQ(world.getApproximateDataSize(), 96); + EXPECT_EQ(world.getApproximateDataSize(), 98); world.erase("world"); - EXPECT_EQ(world.getApproximateDataSize(), 96); + EXPECT_EQ(world.getApproximateDataSize(), 98); world.clear(); EXPECT_EQ(world.getApproximateDataSize(), 0); diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 168ee346626..85eb6264220 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -9,6 +9,7 @@ #include #include +#include #include @@ -37,7 +38,7 @@ static ReturnType onError(const std::string & message [[maybe_unused]], int code template static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, const ColumnWithTypeAndName & expected, - const std::string & context_description, bool allow_remove_constants, int code) + const std::string & context_description, bool allow_materialize, int code) { if (actual.name != expected.name) return onError("Block structure mismatch in " + context_description + " stream: different names of columns:\n" @@ -52,11 +53,16 @@ static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, con const IColumn * actual_column = actual.column.get(); - /// If we allow to remove constants, and expected column is not const, then unwrap actual constant column. - if (allow_remove_constants && !isColumnConst(*expected.column)) + /// If we allow to materialize, and expected column is not const or sparse, then unwrap actual column. + if (allow_materialize) { - if (const auto * column_const = typeid_cast(actual_column)) - actual_column = &column_const->getDataColumn(); + if (!isColumnConst(*expected.column)) + if (const auto * column_const = typeid_cast(actual_column)) + actual_column = &column_const->getDataColumn(); + + if (!expected.column->isSparse()) + if (const auto * column_sparse = typeid_cast(actual_column)) + actual_column = &column_sparse->getValuesColumn(); } if (actual_column->getName() != expected.column->getName()) @@ -79,7 +85,7 @@ static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, con template -static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, const std::string & context_description, bool allow_remove_constants) +static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, const std::string & context_description, bool allow_materialize) { size_t columns = rhs.columns(); if (lhs.columns() != columns) @@ -93,11 +99,11 @@ static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, cons if constexpr (std::is_same_v) { - if (!checkColumnStructure(actual, expected, context_description, allow_remove_constants, ErrorCodes::LOGICAL_ERROR)) + if (!checkColumnStructure(actual, expected, context_description, allow_materialize, ErrorCodes::LOGICAL_ERROR)) return false; } else - checkColumnStructure(actual, expected, context_description, allow_remove_constants, ErrorCodes::LOGICAL_ERROR); + checkColumnStructure(actual, expected, context_description, allow_materialize, ErrorCodes::LOGICAL_ERROR); } return ReturnType(true); @@ -139,15 +145,17 @@ void Block::insert(size_t position, ColumnWithTypeAndName elem) if (elem.name.empty()) throw Exception("Column name in Block cannot be empty", ErrorCodes::AMBIGUOUS_COLUMN_NAME); - for (auto & name_pos : index_by_name) - if (name_pos.second >= position) - ++name_pos.second; - - auto [it, inserted] = index_by_name.emplace(elem.name, position); + auto [new_it, inserted] = index_by_name.emplace(elem.name, position); if (!inserted) - checkColumnStructure(data[it->second], elem, + checkColumnStructure(data[new_it->second], elem, "(columns with identical name must have identical structure)", true, ErrorCodes::AMBIGUOUS_COLUMN_NAME); + for (auto it = index_by_name.begin(); it != index_by_name.end(); ++it) + { + if (it->second >= position && (!inserted || it != new_it)) + ++it->second; + } + data.emplace(data.begin() + position, std::move(elem)); } @@ -203,7 +211,7 @@ void Block::eraseImpl(size_t position) for (auto it = index_by_name.begin(); it != index_by_name.end();) { if (it->second == position) - index_by_name.erase(it++); + it = index_by_name.erase(it); else { if (it->second > position) @@ -706,6 +714,11 @@ void Block::updateHash(SipHash & hash) const col.column->updateHashWithValue(row_no, hash); } +void convertToFullIfSparse(Block & block) +{ + for (auto & column : block) + column.column = recursiveRemoveSparse(column.column); +} ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & column) { @@ -729,7 +742,7 @@ Block materializeBlock(const Block & block) for (size_t i = 0; i < columns; ++i) { auto & element = res.getByPosition(i); - element.column = element.column->convertToFullColumnIfConst(); + element.column = recursiveRemoveSparse(element.column->convertToFullColumnIfConst()); } return res; @@ -738,7 +751,7 @@ Block materializeBlock(const Block & block) void materializeBlockInplace(Block & block) { for (size_t i = 0; i < block.columns(); ++i) - block.getByPosition(i).column = block.getByPosition(i).column->convertToFullColumnIfConst(); + block.getByPosition(i).column = recursiveRemoveSparse(block.getByPosition(i).column->convertToFullColumnIfConst()); } } diff --git a/src/Core/Block.h b/src/Core/Block.h index c0c9391e3b2..cad29dea7e6 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -193,6 +193,8 @@ void assertCompatibleHeader(const Block & actual, const Block & desired, const s /// Calculate difference in structure of blocks and write description into output strings. NOTE It doesn't compare values of constant columns. void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out_lhs_diff, std::string & out_rhs_diff); +void convertToFullIfSparse(Block & block); + /// Helps in-memory storages to extract columns from block. /// Properly handles cases, when column is a subcolumn and when it is compressed. ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & column); diff --git a/src/Core/Field.h b/src/Core/Field.h index a9fb73393cf..19573ed9831 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -808,11 +808,27 @@ auto & Field::safeGet() template T & Field::reinterpret() { + assert(which != Types::String); // See specialization for char using ValueType = std::decay_t; ValueType * MAY_ALIAS ptr = reinterpret_cast(&storage); return *ptr; } +// Specialize reinterpreting to char (used in ColumnUnique) to make sure Strings are reinterpreted correctly +// inline to avoid multiple definitions +template <> +inline char & Field::reinterpret() +{ + if (which == Types::String) + { + // For String we want to return a pointer to the data, not the start of the class + // as the layout of std::string depends on the STD version and options + char * ptr = reinterpret_cast(&storage)->data(); + return *ptr; + } + return *reinterpret_cast(&storage); +} + template T get(const Field & field) { diff --git a/src/Core/MySQL/Authentication.cpp b/src/Core/MySQL/Authentication.cpp index 4dd20ff585e..0492211c51f 100644 --- a/src/Core/MySQL/Authentication.cpp +++ b/src/Core/MySQL/Authentication.cpp @@ -71,7 +71,7 @@ Native41::Native41(const String & password_, const String & scramble_) const Poco::SHA1Engine::Digest & digest = engine3.digest(); scramble.resize(SCRAMBLE_LENGTH); - for (size_t i = 0; i < SCRAMBLE_LENGTH; i++) + for (size_t i = 0; i < SCRAMBLE_LENGTH; ++i) scramble[i] = static_cast(password_sha1[i] ^ digest[i]); } @@ -191,7 +191,7 @@ void Sha256Password::authenticate( } password.resize(plaintext_size); - for (int i = 0; i < plaintext_size; i++) + for (int i = 0; i < plaintext_size; ++i) { password[i] = plaintext[i] ^ static_cast(scramble[i % SCRAMBLE_LENGTH]); } diff --git a/src/Core/MySQL/MySQLGtid.cpp b/src/Core/MySQL/MySQLGtid.cpp index a441bccb076..bfd0bd02b45 100644 --- a/src/Core/MySQL/MySQLGtid.cpp +++ b/src/Core/MySQL/MySQLGtid.cpp @@ -41,7 +41,7 @@ void GTIDSets::parse(const String gtid_format) GTIDSet set; set.uuid = DB::parse(server_ids[0]); - for (size_t k = 1; k < server_ids.size(); k++) + for (size_t k = 1; k < server_ids.size(); ++k) { std::vector inters; boost::split(inters, server_ids[k], [](char c) { return c == '-'; }); @@ -74,7 +74,7 @@ void GTIDSets::update(const GTID & other) { if (set.uuid == other.uuid) { - for (auto i = 0U; i < set.intervals.size(); i++) + for (auto i = 0U; i < set.intervals.size(); ++i) { auto & current = set.intervals[i]; @@ -134,7 +134,7 @@ String GTIDSets::toString() const { WriteBufferFromOwnString buffer; - for (size_t i = 0; i < sets.size(); i++) + for (size_t i = 0; i < sets.size(); ++i) { GTIDSet set = sets[i]; writeUUIDText(set.uuid, buffer); diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index f734154f4ba..fb230f412f0 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -159,7 +159,7 @@ namespace MySQLReplication payload.ignore(1); column_count = readLengthEncodedNumber(payload); - for (auto i = 0U; i < column_count; i++) + for (auto i = 0U; i < column_count; ++i) { UInt8 v = 0x00; payload.readStrict(reinterpret_cast(&v), 1); @@ -188,7 +188,7 @@ namespace MySQLReplication { auto pos = 0; column_meta.reserve(column_count); - for (auto i = 0U; i < column_count; i++) + for (auto i = 0U; i < column_count; ++i) { UInt16 typ = column_type[i]; switch (typ) @@ -230,6 +230,7 @@ namespace MySQLReplication pos += 2; break; } + case MYSQL_TYPE_BIT: case MYSQL_TYPE_VARCHAR: case MYSQL_TYPE_VAR_STRING: { /// Little-Endian @@ -255,7 +256,7 @@ namespace MySQLReplication out << "Table Len: " << std::to_string(this->table_len) << '\n'; out << "Table: " << this->table << '\n'; out << "Column Count: " << this->column_count << '\n'; - for (auto i = 0U; i < column_count; i++) + for (UInt32 i = 0; i < column_count; ++i) { out << "Column Type [" << i << "]: " << std::to_string(column_type[i]) << ", Meta: " << column_meta[i] << '\n'; } @@ -312,7 +313,7 @@ namespace MySQLReplication UInt32 null_index = 0; UInt32 re_count = 0; - for (auto i = 0U; i < number_columns; i++) + for (UInt32 i = 0; i < number_columns; ++i) { if (bitmap[i]) re_count++; @@ -321,7 +322,7 @@ namespace MySQLReplication boost::dynamic_bitset<> columns_null_set; readBitmap(payload, columns_null_set, re_count); - for (auto i = 0U; i < number_columns; i++) + for (UInt32 i = 0; i < number_columns; ++i) { UInt32 field_len = 0; @@ -523,7 +524,7 @@ namespace MySQLReplication res += (val ^ (mask & compressed_integer_align_numbers[compressed_integers])); } - for (auto k = 0U; k < uncompressed_integers; k++) + for (size_t k = 0; k < uncompressed_integers; ++k) { UInt32 val = 0; readBigEndianStrict(payload, reinterpret_cast(&val), 4); @@ -536,7 +537,7 @@ namespace MySQLReplication size_t uncompressed_decimals = scale / digits_per_integer; size_t compressed_decimals = scale - (uncompressed_decimals * digits_per_integer); - for (auto k = 0U; k < uncompressed_decimals; k++) + for (size_t k = 0; k < uncompressed_decimals; ++k) { UInt32 val = 0; readBigEndianStrict(payload, reinterpret_cast(&val), 4); @@ -584,6 +585,15 @@ namespace MySQLReplication } break; } + case MYSQL_TYPE_BIT: + { + UInt32 bits = ((meta >> 8) * 8) + (meta & 0xff); + UInt32 size = (bits + 7) / 8; + UInt64 val = 0UL; + readBigEndianStrict(payload, reinterpret_cast(&val), size); + row.push_back(val); + break; + } case MYSQL_TYPE_VARCHAR: case MYSQL_TYPE_VAR_STRING: { @@ -669,7 +679,7 @@ namespace MySQLReplication header.dump(out); out << "Schema: " << this->schema << '\n'; out << "Table: " << this->table << '\n'; - for (auto i = 0U; i < rows.size(); i++) + for (size_t i = 0; i < rows.size(); ++i) { out << "Row[" << i << "]: " << applyVisitor(to_string, rows[i]) << '\n'; } diff --git a/src/Core/MySQL/PacketsProtocolText.cpp b/src/Core/MySQL/PacketsProtocolText.cpp index 0494a146c47..728e8061e87 100644 --- a/src/Core/MySQL/PacketsProtocolText.cpp +++ b/src/Core/MySQL/PacketsProtocolText.cpp @@ -15,7 +15,7 @@ namespace ProtocolText ResultSetRow::ResultSetRow(const Serializations & serializations, const Columns & columns_, int row_num_) : columns(columns_), row_num(row_num_) { - for (size_t i = 0; i < columns.size(); i++) + for (size_t i = 0; i < columns.size(); ++i) { if (columns[i]->isNullAt(row_num)) { @@ -39,7 +39,7 @@ size_t ResultSetRow::getPayloadSize() const void ResultSetRow::writePayloadImpl(WriteBuffer & buffer) const { - for (size_t i = 0; i < columns.size(); i++) + for (size_t i = 0; i < columns.size(); ++i) { if (columns[i]->isNullAt(row_num)) buffer.write(serialized[i].data(), 1); diff --git a/src/Core/NamesAndTypes.cpp b/src/Core/NamesAndTypes.cpp index b47f5a6823b..b9098d3308d 100644 --- a/src/Core/NamesAndTypes.cpp +++ b/src/Core/NamesAndTypes.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB @@ -43,6 +44,17 @@ String NameAndTypePair::getSubcolumnName() const return name.substr(*subcolumn_delimiter_position + 1, name.size() - *subcolumn_delimiter_position); } +String NameAndTypePair::dump() const +{ + WriteBufferFromOwnString out; + out << "name: " << name << "\n" + << "type: " << type->getName() << "\n" + << "name in storage: " << getNameInStorage() << "\n" + << "type in storage: " << getTypeInStorage()->getName(); + + return out.str(); +} + void NamesAndTypesList::readText(ReadBuffer & buf) { const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); diff --git a/src/Core/NamesAndTypes.h b/src/Core/NamesAndTypes.h index 58b5189db63..3ac9ad2fa02 100644 --- a/src/Core/NamesAndTypes.h +++ b/src/Core/NamesAndTypes.h @@ -40,6 +40,8 @@ public: return name == rhs.name && type->equals(*rhs.type); } + String dump() const; + String name; DataTypePtr type; diff --git a/src/Core/PostgreSQL/Connection.cpp b/src/Core/PostgreSQL/Connection.cpp index 75786a51d92..f97a35a9e92 100644 --- a/src/Core/PostgreSQL/Connection.cpp +++ b/src/Core/PostgreSQL/Connection.cpp @@ -12,10 +12,7 @@ Connection::Connection(const ConnectionInfo & connection_info_, bool replication , log(&Poco::Logger::get("PostgreSQLReplicaConnection")) { if (replication) - { - connection_info = std::make_pair( - fmt::format("{} replication=database", connection_info.first), connection_info.second); - } + connection_info = {fmt::format("{} replication=database", connection_info.connection_string), connection_info.host_port}; } void Connection::execWithRetry(const std::function & exec) @@ -61,11 +58,14 @@ void Connection::updateConnection() { if (connection) connection->close(); + /// Always throws if there is no connection. - connection = std::make_unique(connection_info.first); + connection = std::make_unique(connection_info.connection_string); + if (replication) connection->set_variable("default_transaction_isolation", "'repeatable read'"); - LOG_DEBUG(&Poco::Logger::get("PostgreSQLConnection"), "New connection to {}", connection_info.second); + + LOG_DEBUG(&Poco::Logger::get("PostgreSQLConnection"), "New connection to {}", connection_info.host_port); } void Connection::connect() diff --git a/src/Core/PostgreSQL/Connection.h b/src/Core/PostgreSQL/Connection.h index d65c38643c1..8c5609dc66b 100644 --- a/src/Core/PostgreSQL/Connection.h +++ b/src/Core/PostgreSQL/Connection.h @@ -8,19 +8,26 @@ #include #include -/* Methods to work with PostgreSQL connection object. +/** Methods to work with PostgreSQL connection object. * Should only be used in case there has to be a single connection object, which * is long-lived and there are no concurrent connection queries. - * Now only use case - for replication handler for replication from PostgreSQL. - * In all other integration engine use pool with failover. - **/ + */ namespace Poco { class Logger; } +namespace pqxx +{ + using ConnectionPtr = std::unique_ptr; +} + namespace postgres { -using ConnectionInfo = std::pair; -using ConnectionPtr = std::unique_ptr; + +struct ConnectionInfo +{ + String connection_string; + String host_port; /// For logs. +}; class Connection : private boost::noncopyable { @@ -33,14 +40,17 @@ public: void connect(); + void updateConnection(); + void tryUpdateConnection(); const ConnectionInfo & getConnectionInfo() { return connection_info; } -private: - void updateConnection(); + String getInfoForLog() const { return connection_info.host_port; } - ConnectionPtr connection; +private: + + pqxx::ConnectionPtr connection; ConnectionInfo connection_info; bool replication; @@ -48,6 +58,9 @@ private: Poco::Logger * log; }; + +using ConnectionPtr = std::unique_ptr; + } #endif diff --git a/src/Core/PostgreSQL/ConnectionHolder.h b/src/Core/PostgreSQL/ConnectionHolder.h index d0d64935e91..38e321e222c 100644 --- a/src/Core/PostgreSQL/ConnectionHolder.h +++ b/src/Core/PostgreSQL/ConnectionHolder.h @@ -7,12 +7,12 @@ #include #include #include +#include "Connection.h" namespace postgres { -using ConnectionPtr = std::unique_ptr; using Pool = BorrowedObjectPool; using PoolPtr = std::shared_ptr; @@ -28,8 +28,12 @@ public: pqxx::connection & get() { - assert(connection != nullptr); - return *connection; + return connection->getRef(); + } + + void update() + { + connection->updateConnection(); } private: diff --git a/src/Core/PostgreSQL/PoolWithFailover.cpp b/src/Core/PostgreSQL/PoolWithFailover.cpp index 3addb511c3b..844c60087e0 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.cpp +++ b/src/Core/PostgreSQL/PoolWithFailover.cpp @@ -32,9 +32,9 @@ PoolWithFailover::PoolWithFailover( { for (const auto & replica_configuration : configurations) { - auto connection_string = formatConnectionString(replica_configuration.database, - replica_configuration.host, replica_configuration.port, replica_configuration.username, replica_configuration.password).first; - replicas_with_priority[priority].emplace_back(connection_string, pool_size, getConnectionForLog(replica_configuration.host, replica_configuration.port)); + auto connection_info = formatConnectionString(replica_configuration.database, + replica_configuration.host, replica_configuration.port, replica_configuration.username, replica_configuration.password); + replicas_with_priority[priority].emplace_back(connection_info, pool_size); } } } @@ -52,8 +52,8 @@ PoolWithFailover::PoolWithFailover( for (const auto & [host, port] : configuration.addresses) { LOG_DEBUG(&Poco::Logger::get("PostgreSQLPoolWithFailover"), "Adding address host: {}, port: {} to connection pool", host, port); - auto connection_string = formatConnectionString(configuration.database, host, port, configuration.username, configuration.password).first; - replicas_with_priority[0].emplace_back(connection_string, pool_size, getConnectionForLog(host, port)); + auto connection_string = formatConnectionString(configuration.database, host, port, configuration.username, configuration.password); + replicas_with_priority[0].emplace_back(connection_string, pool_size); } } @@ -83,16 +83,18 @@ ConnectionHolderPtr PoolWithFailover::get() try { /// Create a new connection or reopen an old connection if it became invalid. - if (!connection || !connection->is_open()) + if (!connection) { - connection = std::make_unique(replica.connection_string); - LOG_DEBUG(log, "New connection to {}:{}", connection->hostname(), connection->port()); + connection = std::make_unique(replica.connection_info); + LOG_DEBUG(log, "New connection to {}", connection->getInfoForLog()); } + + connection->connect(); } catch (const pqxx::broken_connection & pqxx_error) { LOG_ERROR(log, "Connection error: {}", pqxx_error.what()); - error_message << "Try " << try_idx + 1 << ". Connection to `" << replica.name_for_log << "` failed: " << pqxx_error.what() << "\n"; + error_message << "Try " << try_idx + 1 << ". Connection to `" << replica.connection_info.host_port << "` failed: " << pqxx_error.what() << "\n"; replica.pool->returnObject(std::move(connection)); continue; diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h index c59010a5d43..e6f691ed2dd 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.h +++ b/src/Core/PostgreSQL/PoolWithFailover.h @@ -44,12 +44,11 @@ public: private: struct PoolHolder { - String connection_string; + ConnectionInfo connection_info; PoolPtr pool; - String name_for_log; - PoolHolder(const String & connection_string_, size_t pool_size, const String & name_for_log_) - : connection_string(connection_string_), pool(std::make_shared(pool_size)), name_for_log(name_for_log_) {} + PoolHolder(const ConnectionInfo & connection_info_, size_t pool_size) + : connection_info(connection_info_), pool(std::make_shared(pool_size)) {} }; /// Highest priority is 0, the bigger the number in map, the less the priority diff --git a/src/Core/PostgreSQL/Utils.cpp b/src/Core/PostgreSQL/Utils.cpp index 60b13218202..b4ad19c819a 100644 --- a/src/Core/PostgreSQL/Utils.cpp +++ b/src/Core/PostgreSQL/Utils.cpp @@ -17,7 +17,7 @@ ConnectionInfo formatConnectionString(String dbname, String host, UInt16 port, S << " user=" << DB::quote << user << " password=" << DB::quote << password << " connect_timeout=10"; - return std::make_pair(out.str(), host + ':' + DB::toString(port)); + return {out.str(), host + ':' + DB::toString(port)}; } String getConnectionForLog(const String & host, UInt16 port) diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp index 1c3230ec826..f4d47049554 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp @@ -85,7 +85,7 @@ void insertPostgreSQLValue( assert_cast(column).insertData(value.data(), value.size()); break; case ExternalResultDescription::ValueType::vtUUID: - assert_cast(column).insert(parse(value.data(), value.size())); + assert_cast(column).insertValue(parse(value.data(), value.size())); break; case ExternalResultDescription::ValueType::vtDate: assert_cast(column).insertValue(UInt16{LocalDate{std::string(value)}.getDayNum()}); diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 36820788b91..93f44b02ce3 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -44,6 +44,8 @@ #define DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS 54451 +#define DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION 54454 + /// Version of ClickHouse TCP protocol. /// /// Should be incremented manually on protocol changes. @@ -51,7 +53,6 @@ /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). - -#define DBMS_TCP_PROTOCOL_VERSION 54453 +#define DBMS_TCP_PROTOCOL_VERSION 54455 #define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449 diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 11c625007d9..8daf39d9928 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -117,6 +117,16 @@ void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfigura } } +std::vector Settings::getAllRegisteredNames() const +{ + std::vector all_settings; + for (const auto & setting_field : all()) + { + all_settings.push_back(setting_field.getName()); + } + return all_settings; +} + IMPLEMENT_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS) } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index cafa83a7f8d..12c56003b24 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -45,7 +46,6 @@ class IColumn; M(UInt64, max_insert_threads, 0, "The maximum number of threads to execute the INSERT SELECT query. Values 0 or 1 means that INSERT SELECT is not run in parallel. Higher values will lead to higher memory usage. Parallel INSERT SELECT has effect only if the SELECT part is run on parallel, see 'max_threads' setting.", 0) \ M(UInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \ M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \ - M(MaxThreads, max_alter_threads, 0, "The maximum number of threads to execute the ALTER requests. By default, it is determined automatically.", 0) \ M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \ M(UInt64, max_distributed_connections, 1024, "The maximum number of connections for distributed processing of one query (should be greater than max_threads).", 0) \ M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "Which part of the query can be read into RAM for parsing (the remaining data for INSERT, if any, is read later)", 0) \ @@ -55,7 +55,7 @@ class IColumn; M(Milliseconds, connect_timeout_with_failover_secure_ms, 100, "Connection timeout for selecting first healthy replica (for secure connections).", 0) \ M(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "", 0) \ M(Seconds, send_timeout, DBMS_DEFAULT_SEND_TIMEOUT_SEC, "", 0) \ - M(Seconds, drain_timeout, 3, "", 0) \ + M(Seconds, drain_timeout, 3, "Timeout for draining remote connections, -1 means synchronous drain w/o ignoring errors", 0) \ M(Seconds, tcp_keep_alive_timeout, 290 /* less than DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC */, "The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes", 0) \ M(Milliseconds, hedged_connection_timeout_ms, 100, "Connection timeout for establishing connection with replica for Hedged requests", 0) \ M(Milliseconds, receive_data_timeout_ms, 2000, "Connection timeout for receiving first packet of data or packet with positive progress from replica", 0) \ @@ -500,8 +500,12 @@ class IColumn; M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \ M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \ M(UInt64, distributed_ddl_entry_format_version, 1, "Version of DDL entry to write into ZooKeeper", 0) \ + \ M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \ M(UInt64, external_storage_max_read_bytes, 0, "Limit maximum number of bytes when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \ + M(UInt64, external_storage_connect_timeout_sec, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, "Connect timeout in seconds. Now supported only for MySQL", 0) \ + M(UInt64, external_storage_rw_timeout_sec, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "Read/write timeout in seconds. Now supported only for MySQL", 0) \ + \ M(UnionMode, union_default_mode, UnionMode::Unspecified, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0) \ M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \ M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \ @@ -571,7 +575,7 @@ class IColumn; MAKE_OBSOLETE(M, UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60) \ MAKE_OBSOLETE(M, UInt64, merge_tree_clear_old_parts_interval_seconds, 1) \ MAKE_OBSOLETE(M, UInt64, partial_merge_join_optimizations, 0) \ - + MAKE_OBSOLETE(M, MaxThreads, max_alter_threads, 0) \ /** The section above is for obsolete settings. Do not add anything there. */ @@ -593,6 +597,7 @@ class IColumn; M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \ M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ + M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \ M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ \ @@ -674,7 +679,7 @@ DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(SettingsTraits, LIST_OF_SETTINGS) /** Settings of query execution. * These settings go to users.xml. */ -struct Settings : public BaseSettings +struct Settings : public BaseSettings, public IHints<2, Settings> { /// For initialization from empty initializer-list to be "value initialization", not "aggregate initialization" in C++14. /// http://en.cppreference.com/w/cpp/language/aggregate_initialization @@ -698,6 +703,8 @@ struct Settings : public BaseSettings /// Check that there is no user-level settings at the top level in config. /// This is a common source of mistake (user don't know where to write user-level setting). static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path); + + std::vector getAllRegisteredNames() const override; }; /* diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index e500bf2858a..39fbfb62917 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -66,6 +66,7 @@ public: bool shouldAlignRightInPrettyFormats() const override { return false; } SerializationPtr doGetDefaultSerialization() const override; + bool supportsSparseSerialization() const override { return false; } bool isVersioned() const { return function->isVersioned(); } diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index 9b93e5feb16..f0f78849e06 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h index c0585095eeb..dc8c99b06bc 100644 --- a/src/DataTypes/DataTypeDecimalBase.h +++ b/src/DataTypes/DataTypeDecimalBase.h @@ -59,6 +59,7 @@ class DataTypeDecimalBase : public IDataType public: using FieldType = T; using ColumnType = ColumnDecimal; + static constexpr auto type_id = TypeId; static constexpr bool is_parametric = true; diff --git a/src/DataTypes/DataTypeEnum.h b/src/DataTypes/DataTypeEnum.h index 92c72b87afa..2f607fc2aa6 100644 --- a/src/DataTypes/DataTypeEnum.h +++ b/src/DataTypes/DataTypeEnum.h @@ -38,6 +38,7 @@ class DataTypeEnum final : public IDataTypeEnum, public EnumValues public: using FieldType = Type; using ColumnType = ColumnVector; + static constexpr auto type_id = sizeof(FieldType) == 1 ? TypeIndex::Enum8 : TypeIndex::Enum16; using typename EnumValues::Values; static constexpr bool is_parametric = true; @@ -52,7 +53,7 @@ public: std::string doGetName() const override { return type_name; } const char * getFamilyName() const override; - TypeIndex getTypeId() const override { return sizeof(FieldType) == 1 ? TypeIndex::Enum8 : TypeIndex::Enum16; } + TypeIndex getTypeId() const override { return type_id; } FieldType readValue(ReadBuffer & istr) const { diff --git a/src/DataTypes/DataTypeFixedString.h b/src/DataTypes/DataTypeFixedString.h index f88d2f5337a..a53fde42b29 100644 --- a/src/DataTypes/DataTypeFixedString.h +++ b/src/DataTypes/DataTypeFixedString.h @@ -10,6 +10,8 @@ namespace DB { +class ColumnFixedString; + namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; @@ -22,7 +24,10 @@ private: size_t n; public: + using ColumnType = ColumnFixedString; + static constexpr bool is_parametric = true; + static constexpr auto type_id = TypeIndex::FixedString; DataTypeFixedString(size_t n_) : n(n_) { @@ -33,7 +38,7 @@ public: } std::string doGetName() const override; - TypeIndex getTypeId() const override { return TypeIndex::FixedString; } + TypeIndex getTypeId() const override { return type_id; } const char * getFamilyName() const override { return "FixedString"; } diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h index 7f4286046d9..38b2109eec6 100644 --- a/src/DataTypes/DataTypeLowCardinality.h +++ b/src/DataTypes/DataTypeLowCardinality.h @@ -51,6 +51,7 @@ public: bool isNullable() const override { return false; } bool onlyNull() const override { return false; } bool lowCardinality() const override { return true; } + bool supportsSparseSerialization() const override { return false; } bool isLowCardinalityNullable() const override { return dictionary_type->isNullable(); } static MutableColumnUniquePtr createColumnUnique(const IDataType & keys_type); diff --git a/src/DataTypes/DataTypeNumberBase.h b/src/DataTypes/DataTypeNumberBase.h index 95975051600..59dc26ed13a 100644 --- a/src/DataTypes/DataTypeNumberBase.h +++ b/src/DataTypes/DataTypeNumberBase.h @@ -20,6 +20,7 @@ class DataTypeNumberBase : public IDataType public: static constexpr bool is_parametric = false; static constexpr auto family_name = TypeName; + static constexpr auto type_id = TypeId; using FieldType = T; using ColumnType = ColumnVector; diff --git a/src/DataTypes/DataTypeString.h b/src/DataTypes/DataTypeString.h index fd674505bc0..5f3bde43a13 100644 --- a/src/DataTypes/DataTypeString.h +++ b/src/DataTypes/DataTypeString.h @@ -6,10 +6,13 @@ namespace DB { +class ColumnString; + class DataTypeString final : public IDataType { public: using FieldType = String; + using ColumnType = ColumnString; static constexpr bool is_parametric = false; static constexpr auto type_id = TypeIndex::String; diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 0660f371258..ad6d4e2943b 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -6,8 +6,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -152,6 +154,20 @@ MutableColumnPtr DataTypeTuple::createColumn() const return ColumnTuple::create(std::move(tuple_columns)); } +MutableColumnPtr DataTypeTuple::createColumn(const ISerialization & serialization) const +{ + const auto & element_serializations = + assert_cast(serialization).getElementsSerializations(); + + size_t size = elems.size(); + assert(element_serializations.size() == size); + MutableColumns tuple_columns(size); + for (size_t i = 0; i < size; ++i) + tuple_columns[i] = elems[i]->createColumn(*element_serializations[i]->getNested()); + + return ColumnTuple::create(std::move(tuple_columns)); +} + Field DataTypeTuple::getDefault() const { return Tuple(collections::map(elems, [] (const DataTypePtr & elem) { return elem->getDefault(); })); @@ -248,21 +264,33 @@ SerializationPtr DataTypeTuple::doGetDefaultSerialization() const return std::make_shared(std::move(serializations), use_explicit_names); } -SerializationPtr DataTypeTuple::getSerialization(const String & column_name, const StreamExistenceCallback & callback) const +SerializationPtr DataTypeTuple::getSerialization(const SerializationInfo & info) const { SerializationTuple::ElementSerializations serializations(elems.size()); + const auto & info_tuple = assert_cast(info); bool use_explicit_names = have_explicit_names && serialize_names; + for (size_t i = 0; i < elems.size(); ++i) { String elem_name = use_explicit_names ? names[i] : toString(i + 1); - auto subcolumn_name = Nested::concatenateName(column_name, elem_name); - auto serializaion = elems[i]->getSerialization(subcolumn_name, callback); - serializations[i] = std::make_shared(serializaion, elem_name); + auto serialization = elems[i]->getSerialization(*info_tuple.getElementInfo(i)); + serializations[i] = std::make_shared(serialization, elem_name); } return std::make_shared(std::move(serializations), use_explicit_names); } +MutableSerializationInfoPtr DataTypeTuple::createSerializationInfo(const SerializationInfo::Settings & settings) const +{ + MutableSerializationInfos infos; + infos.reserve(elems.size()); + for (const auto & elem : elems) + infos.push_back(elem->createSerializationInfo(settings)); + + return std::make_shared(std::move(infos), settings); +} + + static DataTypePtr create(const ASTPtr & arguments) { if (!arguments || arguments->children.empty()) diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index d168d73efbf..c56e87ca22d 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -36,8 +36,10 @@ public: const char * getFamilyName() const override { return "Tuple"; } bool canBeInsideNullable() const override { return false; } + bool supportsSparseSerialization() const override { return true; } MutableColumnPtr createColumn() const override; + MutableColumnPtr createColumn(const ISerialization & serialization) const override; Field getDefault() const override; void insertDefaultInto(IColumn & column) const override; @@ -52,9 +54,9 @@ public: size_t getMaximumSizeOfValueInMemory() const override; size_t getSizeOfValueInMemory() const override; - SerializationPtr getSerialization(const String & column_name, const StreamExistenceCallback & callback) const override; - SerializationPtr doGetDefaultSerialization() const override; + SerializationPtr getSerialization(const SerializationInfo & info) const override; + MutableSerializationInfoPtr createSerializationInfo(const SerializationInfo::Settings & settings) const override; const DataTypePtr & getElement(size_t i) const { return elems[i]; } const DataTypes & getElements() const { return elems; } diff --git a/src/DataTypes/DataTypeUUID.h b/src/DataTypes/DataTypeUUID.h index 5ed7a912607..af9f1f35ca5 100644 --- a/src/DataTypes/DataTypeUUID.h +++ b/src/DataTypes/DataTypeUUID.h @@ -15,9 +15,10 @@ public: using FieldType = UUID; using ColumnType = ColumnVector; + static constexpr auto type_id = TypeIndex::UUID; const char * getFamilyName() const override { return "UUID"; } - TypeIndex getTypeId() const override { return TypeIndex::UUID; } + TypeIndex getTypeId() const override { return type_id; } Field getDefault() const override; diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index fef4c34d8b0..0c9a410077f 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -86,6 +86,7 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerAlias("INT UNSIGNED", "UInt32", DataTypeFactory::CaseInsensitive); factory.registerAlias("INTEGER UNSIGNED", "UInt32", DataTypeFactory::CaseInsensitive); factory.registerAlias("BIGINT UNSIGNED", "UInt64", DataTypeFactory::CaseInsensitive); + factory.registerAlias("BIT", "UInt64", DataTypeFactory::CaseInsensitive); } } diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 669876c792d..edc9e4159f4 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -10,6 +11,8 @@ #include #include #include +#include +#include namespace DB @@ -40,6 +43,15 @@ void IDataType::updateAvgValueSizeHint(const IColumn & column, double & avg_valu } } +MutableColumnPtr IDataType::createColumn(const ISerialization & serialization) const +{ + auto column = createColumn(); + if (serialization.getKind() == ISerialization::Kind::SPARSE) + return ColumnSparse::create(std::move(column)); + + return column; +} + ColumnPtr IDataType::createColumnConst(size_t size, const Field & field) const { auto column = createColumn(); @@ -65,9 +77,7 @@ size_t IDataType::getSizeOfValueInMemory() const void IDataType::forEachSubcolumn( const SubcolumnCallback & callback, - const SerializationPtr & serialization, - const DataTypePtr & type, - const ColumnPtr & column) + const SubstreamData & data) { ISerialization::StreamCallback callback_with_data = [&](const auto & subpath) { @@ -76,66 +86,59 @@ void IDataType::forEachSubcolumn( if (!subpath[i].visited && ISerialization::hasSubcolumnForPath(subpath, i + 1)) { auto name = ISerialization::getSubcolumnNameForStream(subpath, i + 1); - auto data = ISerialization::createFromPath(subpath, i); - callback(subpath, name, data); + auto subdata = ISerialization::createFromPath(subpath, i); + callback(subpath, name, subdata); } subpath[i].visited = true; } }; - ISerialization::SubstreamPath path; - serialization->enumerateStreams(path, callback_with_data, type, column); + SubstreamPath path; + data.serialization->enumerateStreams(path, callback_with_data, data); } -DataTypePtr IDataType::tryGetSubcolumnType(const String & subcolumn_name) const +template +Ptr IDataType::getForSubcolumn( + const String & subcolumn_name, + const SubstreamData & data, + Ptr SubstreamData::*member, + bool throw_if_null) const { - DataTypePtr res; - forEachSubcolumn([&](const auto &, const auto & name, const auto & data) + Ptr res; + forEachSubcolumn([&](const auto &, const auto & name, const auto & subdata) { if (name == subcolumn_name) - res = data.type; - }, getDefaultSerialization(), getPtr(), nullptr); + res = subdata.*member; + }, data); + + if (!res && throw_if_null) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); return res; } +DataTypePtr IDataType::tryGetSubcolumnType(const String & subcolumn_name) const +{ + SubstreamData data = { getDefaultSerialization(), getPtr(), nullptr, nullptr }; + return getForSubcolumn(subcolumn_name, data, &SubstreamData::type, false); +} + DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const { - auto subcolumn_type = tryGetSubcolumnType(subcolumn_name); - if (subcolumn_type) - return subcolumn_type; - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); + SubstreamData data = { getDefaultSerialization(), getPtr(), nullptr, nullptr }; + return getForSubcolumn(subcolumn_name, data, &SubstreamData::type); } SerializationPtr IDataType::getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const { - SerializationPtr res; - forEachSubcolumn([&](const auto &, const auto & name, const auto & data) - { - if (name == subcolumn_name) - res = data.serialization; - }, serialization, nullptr, nullptr); - - if (res) - return res; - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); + SubstreamData data = { serialization, nullptr, nullptr, nullptr }; + return getForSubcolumn(subcolumn_name, data, &SubstreamData::serialization); } ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const { - ColumnPtr res; - forEachSubcolumn([&](const auto &, const auto & name, const auto & data) - { - if (name == subcolumn_name) - res = data.column; - }, getDefaultSerialization(), nullptr, column); - - if (res) - return res; - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); + SubstreamData data = { getDefaultSerialization(), nullptr, column, nullptr }; + return getForSubcolumn(subcolumn_name, data, &SubstreamData::column); } Names IDataType::getSubcolumnNames() const @@ -144,7 +147,7 @@ Names IDataType::getSubcolumnNames() const forEachSubcolumn([&](const auto &, const auto & name, const auto &) { res.push_back(name); - }, getDefaultSerialization(), nullptr, nullptr); + }, { getDefaultSerialization(), nullptr, nullptr, nullptr }); return res; } @@ -163,6 +166,12 @@ void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const custom_serialization = std::move(custom_desc_->serialization); } +MutableSerializationInfoPtr IDataType::createSerializationInfo( + const SerializationInfo::Settings & settings) const +{ + return std::make_shared(ISerialization::Kind::DEFAULT, settings); +} + SerializationPtr IDataType::getDefaultSerialization() const { if (custom_serialization) @@ -171,22 +180,48 @@ SerializationPtr IDataType::getDefaultSerialization() const return doGetDefaultSerialization(); } +SerializationPtr IDataType::getSparseSerialization() const +{ + return std::make_shared(getDefaultSerialization()); +} + +SerializationPtr IDataType::getSerialization(ISerialization::Kind kind) const +{ + if (supportsSparseSerialization() && kind == ISerialization::Kind::SPARSE) + return getSparseSerialization(); + + return getDefaultSerialization(); +} + +SerializationPtr IDataType::getSerialization(const SerializationInfo & info) const +{ + return getSerialization(info.getKind()); +} + // static -SerializationPtr IDataType::getSerialization(const NameAndTypePair & column, const IDataType::StreamExistenceCallback & callback) +SerializationPtr IDataType::getSerialization(const NameAndTypePair & column, const SerializationInfo & info) { if (column.isSubcolumn()) { const auto & type_in_storage = column.getTypeInStorage(); - auto default_serialization = type_in_storage->getDefaultSerialization(); - return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), default_serialization); + auto serialization = type_in_storage->getSerialization(info); + return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), serialization); } - return column.type->getSerialization(column.name, callback); + return column.type->getSerialization(info); } -SerializationPtr IDataType::getSerialization(const String &, const StreamExistenceCallback &) const +// static +SerializationPtr IDataType::getSerialization(const NameAndTypePair & column) { - return getDefaultSerialization(); + if (column.isSubcolumn()) + { + const auto & type_in_storage = column.getTypeInStorage(); + auto serialization = type_in_storage->getDefaultSerialization(); + return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), serialization); + } + + return column.type->getDefaultSerialization(); } } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index fc42d678d57..e74df5c327a 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -6,7 +6,8 @@ #include #include #include - +#include +#include namespace DB { @@ -27,7 +28,6 @@ using DataTypePtr = std::shared_ptr; using DataTypes = std::vector; struct NameAndTypePair; -class SerializationInfo; struct DataTypeWithConstInfo { @@ -84,45 +84,54 @@ public: SerializationPtr getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const; ColumnPtr getSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const; + using SubstreamData = ISerialization::SubstreamData; + using SubstreamPath = ISerialization::SubstreamPath; + using SubcolumnCallback = std::function; + const SubstreamData &)>; static void forEachSubcolumn( const SubcolumnCallback & callback, - const SerializationPtr & serialization, - const DataTypePtr & type, - const ColumnPtr & column); + const SubstreamData & data); Names getSubcolumnNames() const; - /// Returns default serialization of data type. + virtual MutableSerializationInfoPtr createSerializationInfo( + const SerializationInfo::Settings & settings) const; + + /// TODO: support more types. + virtual bool supportsSparseSerialization() const { return !haveSubtypes(); } + SerializationPtr getDefaultSerialization() const; + SerializationPtr getSparseSerialization() const; - /// Asks whether the stream with given name exists in table. - /// If callback returned true for all streams, which are required for - /// one of serialization types, that serialization will be chosen for reading. - /// If callback always returned false, the default serialization will be chosen. - using StreamExistenceCallback = std::function; + /// Chooses serialization according to serialization kind. + SerializationPtr getSerialization(ISerialization::Kind kind) const; - /// Chooses serialization for reading of one column or subcolumns by - /// checking existence of substreams using callback. - static SerializationPtr getSerialization( - const NameAndTypePair & column, - const StreamExistenceCallback & callback = [](const String &) { return false; }); + /// Chooses serialization according to collected information about content of column. + virtual SerializationPtr getSerialization(const SerializationInfo & info) const; - virtual SerializationPtr getSerialization(const String & column_name, const StreamExistenceCallback & callback) const; + /// Chooses between subcolumn serialization and regular serialization according to @column. + /// This method typically should be used to get serialization for reading column or subcolumn. + static SerializationPtr getSerialization(const NameAndTypePair & column, const SerializationInfo & info); + + static SerializationPtr getSerialization(const NameAndTypePair & column); protected: virtual String doGetName() const { return getFamilyName(); } virtual SerializationPtr doGetDefaultSerialization() const = 0; public: - /** Create empty column for corresponding type. + /** Create empty column for corresponding type and default serialization. */ virtual MutableColumnPtr createColumn() const = 0; + /** Create empty column for corresponding type and serialization. + */ + virtual MutableColumnPtr createColumn(const ISerialization & serialization) const; + /** Create ColumnConst for corresponding type, with specified size and value. */ ColumnPtr createColumnConst(size_t size, const Field & field) const; @@ -292,6 +301,14 @@ protected: public: const IDataTypeCustomName * getCustomName() const { return custom_name.get(); } const ISerialization * getCustomSerialization() const { return custom_serialization.get(); } + +private: + template + Ptr getForSubcolumn( + const String & subcolumn_name, + const SubstreamData & data, + Ptr SubstreamData::*member, + bool throw_if_null = true) const; }; @@ -495,6 +512,11 @@ inline bool isCompilableType(const DataTypePtr & data_type) return data_type->isValueRepresentedByNumber() && !isDecimal(data_type); } +inline bool isBool(const DataTypePtr & data_type) +{ + return data_type->getName() == "Bool"; +} + template constexpr bool IsDataTypeDecimal = false; template constexpr bool IsDataTypeNumber = false; template constexpr bool IsDataTypeDateOrDateTime = false; diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index 10ef35b7e7c..b35a0713519 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -36,18 +36,18 @@ std::string concatenateName(const std::string & nested_table_name, const std::st /** Name can be treated as compound if it contains dot (.) in the middle. */ -std::pair splitName(const std::string & name) +std::pair splitName(const std::string & name, bool reverse) { - auto idx = name.find_first_of('.'); + auto idx = (reverse ? name.find_last_of('.') : name.find_first_of('.')); if (idx == std::string::npos || idx == 0 || idx + 1 == name.size()) return {name, {}}; return {name.substr(0, idx), name.substr(idx + 1)}; } -std::pair splitName(const std::string_view & name) +std::pair splitName(const std::string_view & name, bool reverse) { - auto idx = name.find_first_of('.'); + auto idx = (reverse ? name.find_last_of('.') : name.find_first_of('.')); if (idx == std::string::npos || idx == 0 || idx + 1 == name.size()) return {name, {}}; diff --git a/src/DataTypes/NestedUtils.h b/src/DataTypes/NestedUtils.h index 9ed48920ce2..2ca5c17dc74 100644 --- a/src/DataTypes/NestedUtils.h +++ b/src/DataTypes/NestedUtils.h @@ -11,8 +11,9 @@ namespace Nested { std::string concatenateName(const std::string & nested_table_name, const std::string & nested_field_name); - std::pair splitName(const std::string & name); - std::pair splitName(const std::string_view & name); + /// Splits name of compound identifier by first/last dot (depending on 'reverse' parameter). + std::pair splitName(const std::string & name, bool reverse = false); + std::pair splitName(const std::string_view & name, bool reverse = false); /// Returns the prefix of the name to the first '.'. Or the name is unchanged if there is no dot. std::string extractTableName(const std::string & nested_name); diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index 6fa18eee061..5cdc037d5cb 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -16,12 +16,43 @@ namespace ErrorCodes { extern const int MULTIPLE_STREAMS_REQUIRED; extern const int UNEXPECTED_DATA_AFTER_PARSED_VALUE; + extern const int LOGICAL_ERROR; +} + +ISerialization::Kind ISerialization::getKind(const IColumn & column) +{ + if (column.isSparse()) + return Kind::SPARSE; + + return Kind::DEFAULT; +} + +String ISerialization::kindToString(Kind kind) +{ + switch (kind) + { + case Kind::DEFAULT: + return "Default"; + case Kind::SPARSE: + return "Sparse"; + } + __builtin_unreachable(); +} + +ISerialization::Kind ISerialization::stringToKind(const String & str) +{ + if (str == "Default") + return Kind::DEFAULT; + else if (str == "Sparse") + return Kind::SPARSE; + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown serialization kind '{}'", str); } String ISerialization::Substream::toString() const { if (type == TupleElement) - return fmt::format("TupleElement({}, escape_tuple_delimiter={})", + return fmt::format("TupleElement({}, escape_tuple_delimiter = {})", tuple_element_name, escape_tuple_delimiter ? "true" : "false"); return String(magic_enum::enum_name(type)); @@ -44,18 +75,22 @@ String ISerialization::SubstreamPath::toString() const void ISerialization::enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const + const SubstreamData & data) const { path.push_back(Substream::Regular); - path.back().data = {type, column, getPtr(), nullptr}; + path.back().data = data; callback(path); path.pop_back(); } void ISerialization::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const { - enumerateStreams(path, callback, nullptr, nullptr); + enumerateStreams(path, callback, {getPtr(), nullptr, nullptr, nullptr}); +} + +void ISerialization::enumerateStreams(SubstreamPath & path, const StreamCallback & callback, const DataTypePtr & type) const +{ + enumerateStreams(path, callback, {getPtr(), type, nullptr, nullptr}); } void ISerialization::serializeBinaryBulk(const IColumn & column, WriteBuffer &, size_t, size_t) const @@ -147,11 +182,23 @@ String ISerialization::getFileNameForStream(const NameAndTypePair & column, cons return getFileNameForStream(column.getNameInStorage(), path); } +static size_t isOffsetsOfNested(const ISerialization::SubstreamPath & path) +{ + if (path.empty()) + return false; + + for (const auto & elem : path) + if (elem.type == ISerialization::Substream::ArrayElements) + return false; + + return path.back().type == ISerialization::Substream::ArraySizes; +} + String ISerialization::getFileNameForStream(const String & name_in_storage, const SubstreamPath & path) { String stream_name; auto nested_storage_name = Nested::extractTableName(name_in_storage); - if (name_in_storage != nested_storage_name && (path.size() == 1 && path[0].type == ISerialization::Substream::ArraySizes)) + if (name_in_storage != nested_storage_name && isOffsetsOfNested(path)) stream_name = escapeForFileName(nested_storage_name); else stream_name = escapeForFileName(name_in_storage); @@ -242,10 +289,9 @@ ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath assert(prefix_len < path.size()); SubstreamData res = path[prefix_len].data; - res.creator.reset(); for (ssize_t i = static_cast(prefix_len) - 1; i >= 0; --i) { - const auto & creator = path[i].data.creator; + const auto & creator = path[i].creator; if (creator) { res.type = res.type ? creator->create(res.type) : res.type; diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 6338bb8a437..b1fd4d0a9da 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -2,15 +2,25 @@ #include #include +#include +#include #include #include #include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +class IDataType; + class ReadBuffer; class WriteBuffer; class ProtobufReader; @@ -22,19 +32,40 @@ using DataTypePtr = std::shared_ptr; class ISerialization; using SerializationPtr = std::shared_ptr; +class SerializationInfo; +using SerializationInfoPtr = std::shared_ptr; + class Field; struct FormatSettings; struct NameAndTypePair; +/** Represents serialization of data type. + * Has methods to serialize/deserialize column in binary and several text formats. + * Every data type has default serialization, but can be serialized in different representations. + * Default serialization can be wrapped to one of the special kind of serializations. + * Currently there is only one special serialization: Sparse. + * Each serialization has its own implementation of IColumn as its in-memory representation. + */ class ISerialization : private boost::noncopyable, public std::enable_shared_from_this { public: ISerialization() = default; virtual ~ISerialization() = default; + enum class Kind : UInt8 + { + DEFAULT = 0, + SPARSE = 1, + }; + + virtual Kind getKind() const { return Kind::DEFAULT; } SerializationPtr getPtr() const { return shared_from_this(); } + static Kind getKind(const IColumn & column); + static String kindToString(Kind kind); + static Kind stringToKind(const String & str); + /** Binary serialization for range of values in column - for writing to disk/network, etc. * * Some data types are represented in multiple streams while being serialized. @@ -70,10 +101,10 @@ public: struct SubstreamData { + SerializationPtr serialization; DataTypePtr type; ColumnPtr column; - SerializationPtr serialization; - SubcolumnCreatorPtr creator; + SerializationInfoPtr serialization_info; }; struct Substream @@ -108,6 +139,9 @@ public: /// Data for current substream. SubstreamData data; + /// Creator of subcolumn for current substream. + SubcolumnCreatorPtr creator = nullptr; + /// Flag, that may help to traverse substream paths. mutable bool visited = false; @@ -130,13 +164,14 @@ public: virtual void enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const; + const SubstreamData & data) const; void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const; void enumerateStreams(const StreamCallback & callback, SubstreamPath && path) const { enumerateStreams(callback, path); } void enumerateStreams(const StreamCallback & callback) const { enumerateStreams(callback, {}); } + void enumerateStreams(SubstreamPath & path, const StreamCallback & callback, const DataTypePtr & type) const; + using OutputStreamGetter = std::function; using InputStreamGetter = std::function; @@ -300,16 +335,41 @@ public: static ColumnPtr getFromSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path); static bool isSpecialCompressionAllowed(const SubstreamPath & path); - static size_t getArrayLevel(const SubstreamPath & path); + static size_t getArrayLevel(const SubstreamPath & path); static bool hasSubcolumnForPath(const SubstreamPath & path, size_t prefix_len); static SubstreamData createFromPath(const SubstreamPath & path, size_t prefix_len); protected: + template + State * checkAndGetState(const StatePtr & state) const; + [[noreturn]] void throwUnexpectedDataAfterParsedValue(IColumn & column, ReadBuffer & istr, const FormatSettings &, const String & type_name) const; }; using SerializationPtr = std::shared_ptr; using Serializations = std::vector; +using SerializationByName = std::unordered_map; + +template +State * ISerialization::checkAndGetState(const StatePtr & state) const +{ + if (!state) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Got empty state for {}", demangle(typeid(*this).name())); + + auto * state_concrete = typeid_cast(state.get()); + if (!state_concrete) + { + auto & state_ref = *state; + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Invalid State for {}. Expected: {}, got {}", + demangle(typeid(*this).name()), + demangle(typeid(State).name()), + demangle(typeid(state_ref).name())); + } + + return state_concrete; +} } diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index 956ada2436f..e3b535a2a11 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -198,33 +198,38 @@ ColumnPtr SerializationArray::SubcolumnCreator::create(const ColumnPtr & prev) c void SerializationArray::enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const + const SubstreamData & data) const { - const auto * type_array = type ? &assert_cast(*type) : nullptr; - const auto * column_array = column ? &assert_cast(*column) : nullptr; + const auto * type_array = data.type ? &assert_cast(*data.type) : nullptr; + const auto * column_array = data.column ? &assert_cast(*data.column) : nullptr; auto offsets_column = column_array ? column_array->getOffsetsPtr() : nullptr; path.push_back(Substream::ArraySizes); path.back().data = { - type ? std::make_shared() : nullptr, - offsets_column ? arrayOffsetsToSizes(*offsets_column) : nullptr, std::make_shared( std::make_shared>(), "size" + std::to_string(getArrayLevel(path)), false), - nullptr, + data.type ? std::make_shared() : nullptr, + offsets_column ? arrayOffsetsToSizes(*offsets_column) : nullptr, + data.serialization_info, }; callback(path); path.back() = Substream::ArrayElements; - path.back().data = {type, column, getPtr(), std::make_shared(offsets_column)}; + path.back().data = data; + path.back().creator = std::make_shared(offsets_column); - auto next_type = type_array ? type_array->getNestedType() : nullptr; - auto next_column = column_array ? column_array->getDataPtr() : nullptr; + SubstreamData next_data = + { + nested, + type_array ? type_array->getNestedType() : nullptr, + column_array ? column_array->getDataPtr() : nullptr, + data.serialization_info, + }; - nested->enumerateStreams(path, callback, next_type, next_column); + nested->enumerateStreams(path, callback, next_data); path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h index f766083623d..cd8cac54881 100644 --- a/src/DataTypes/Serializations/SerializationArray.h +++ b/src/DataTypes/Serializations/SerializationArray.h @@ -38,8 +38,7 @@ public: void enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const override; + const SubstreamData & data) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationBool.cpp b/src/DataTypes/Serializations/SerializationBool.cpp index def2b565afc..1efacaaecc5 100644 --- a/src/DataTypes/Serializations/SerializationBool.cpp +++ b/src/DataTypes/Serializations/SerializationBool.cpp @@ -6,69 +6,47 @@ #include #include #include +#include + +#include namespace DB { namespace ErrorCodes { - extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING; extern const int ILLEGAL_COLUMN; + extern const int CANNOT_PARSE_BOOL; } -SerializationBool::SerializationBool(const SerializationPtr &nested_) - : SerializationCustomSimpleText(nested_) +namespace { -} -void SerializationBool::serializeText(const IColumn &column, size_t row_num, WriteBuffer &ostr, const FormatSettings &) const +constexpr char str_true[5] = "true"; +constexpr char str_false[6] = "false"; + +const ColumnUInt8 * checkAndGetSerializeColumnType(const IColumn & column) { - const auto *col = checkAndGetColumn(&column); - if (!col) + const auto * col = checkAndGetColumn(&column); + if (!checkAndGetColumn(&column)) throw Exception("Bool type can only serialize columns of type UInt8." + column.getName(), ErrorCodes::ILLEGAL_COLUMN); - - if (col->getData()[row_num]) - ostr.write(str_true, sizeof(str_true) - 1); - else - ostr.write(str_false, sizeof(str_false) - 1); + return col; } -void SerializationBool::deserializeText(IColumn &column, ReadBuffer &istr, const FormatSettings & settings, bool whole) const +ColumnUInt8 * checkAndGetDeserializeColumnType(IColumn & column) { - ColumnUInt8 *col = typeid_cast(&column); - if (!col) - { + auto * col = typeid_cast(&column); + if (!checkAndGetColumn(&column)) throw Exception("Bool type can only deserialize columns of type UInt8." + column.getName(), ErrorCodes::ILLEGAL_COLUMN); - } - - if (!istr.eof()) - { - bool value = false; - - if (*istr.position() == 't' || *istr.position() == 'f' || *istr.position() == 'T' || *istr.position() == 'F') - readBoolTextWord(value, istr, true); - else if (*istr.position() == '1' || *istr.position() == '0') - readBoolText(value, istr); - else - throw Exception("Invalid boolean value, should be true/false, TRUE/FALSE, 1/0.", - ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING); - col->insert(value); - } - else - throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING); - - if (whole && !istr.eof()) - throwUnexpectedDataAfterParsedValue(column, istr, settings, "Bool"); + return col; } -void SerializationBool::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +void serializeCustom(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) { - const auto *col = checkAndGetColumn(&column); - if (!col) - throw Exception("Bool type can only serialize columns of type UInt8." + column.getName(), - ErrorCodes::ILLEGAL_COLUMN); + const auto * col = checkAndGetSerializeColumnType(column); + if (col->getData()[row_num]) { writeString(settings.bool_true_representation, ostr); @@ -79,91 +57,278 @@ void SerializationBool::serializeTextEscaped(const IColumn & column, size_t row_ } } +void serializeSimple(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) +{ + const auto * col = checkAndGetSerializeColumnType(column); + + if (col->getData()[row_num]) + ostr.write(str_true, sizeof(str_true) - 1); + else + ostr.write(str_false, sizeof(str_false) - 1); +} + +bool tryDeserializeAllVariants(ColumnUInt8 * column, ReadBuffer & istr) +{ + if (checkCharCaseInsensitive('1', istr)) + { + column->insert(true); + } + else if (checkCharCaseInsensitive('0', istr)) + { + column->insert(false); + } + /// 'True' and 'T' + else if (checkCharCaseInsensitive('t', istr)) + { + /// Check if it's just short form `T` or full form `True` + if (checkCharCaseInsensitive('r', istr)) + { + if (!checkStringCaseInsensitive("ue", istr)) + return false; + } + column->insert(true); + } + /// 'False' and 'F' + else if (checkCharCaseInsensitive('f', istr)) + { + /// Check if it's just short form `F` or full form `False` + if (checkCharCaseInsensitive('a', istr)) + { + if (!checkStringCaseInsensitive("lse", istr)) + return false; + } + column->insert(false); + } + /// 'Yes' and 'Y' + else if (checkCharCaseInsensitive('y', istr)) + { + /// Check if it's just short form `Y` or full form `Yes` + if (checkCharCaseInsensitive('e', istr)) + { + if (!checkCharCaseInsensitive('s', istr)) + return false; + } + column->insert(true); + } + /// 'No' and 'N' + else if (checkCharCaseInsensitive('n', istr)) + { + /// Check if it's just short form `N` or full form `No` + checkCharCaseInsensitive('o', istr); + column->insert(false); + } + /// 'On' and 'Off' + else if (checkCharCaseInsensitive('o', istr)) + { + if (checkCharCaseInsensitive('n', istr)) + column->insert(true); + else if (checkStringCaseInsensitive("ff", istr)) + { + column->insert(false); + } + else + return false; + } + /// 'Enable' and 'Enabled' + else if (checkStringCaseInsensitive("enable", istr)) + { + /// Check if it's 'enable' or 'enabled' + checkCharCaseInsensitive('d', istr); + column->insert(true); + } + /// 'Disable' and 'Disabled' + else if (checkStringCaseInsensitive("disable", istr)) + { + /// Check if it's 'disable' or 'disabled' + checkCharCaseInsensitive('d', istr); + column->insert(false); + } + else + { + return false; + } + + return true; +} + +void deserializeImpl( + IColumn & column, ReadBuffer & istr, const FormatSettings & settings, std::function check_end_of_value) +{ + ColumnUInt8 * col = checkAndGetDeserializeColumnType(column); + + PeekableReadBuffer buf(istr); + buf.setCheckpoint(); + if (checkString(settings.bool_true_representation, buf) && check_end_of_value(buf)) + { + col->insert(true); + return; + } + + buf.rollbackToCheckpoint(); + if (checkString(settings.bool_false_representation, buf) && check_end_of_value(buf)) + { + col->insert(false); + buf.dropCheckpoint(); + if (buf.hasUnreadData()) + throw Exception( + ErrorCodes::CANNOT_PARSE_BOOL, + "Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if " + "bool_true_representation or bool_false_representation contains some delimiters of input format"); + return; + } + + buf.rollbackToCheckpoint(); + if (tryDeserializeAllVariants(col, buf) && check_end_of_value(buf)) + { + buf.dropCheckpoint(); + if (buf.hasUnreadData()) + throw Exception( + ErrorCodes::CANNOT_PARSE_BOOL, + "Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if " + "bool_true_representation or bool_false_representation contains some delimiters of input format"); + return; + } + + buf.makeContinuousMemoryFromCheckpointToPos(); + buf.rollbackToCheckpoint(); + throw Exception( + ErrorCodes::CANNOT_PARSE_BOOL, + "Cannot parse boolean value here: '{}', should be '{}' or '{}' controlled by setting bool_true_representation and " + "bool_false_representation or one of " + "True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0", + String(buf.position(), std::min(10lu, buf.available())), + settings.bool_true_representation, settings.bool_false_representation); +} + +} + + +SerializationBool::SerializationBool(const SerializationPtr &nested_) + : SerializationWrapper(nested_) +{ +} + +void SerializationBool::serializeText(const IColumn & column, size_t row_num, WriteBuffer &ostr, const FormatSettings & settings) const +{ + serializeCustom(column, row_num, ostr, settings); +} + +void SerializationBool::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeCustom(column, row_num, ostr, settings); +} + void SerializationBool::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { if (istr.eof()) - throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING); - String input; - readEscapedString(input, istr); - deserializeFromString(column, input, settings); + throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_BOOL); + + deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; }); } void SerializationBool::serializeTextJSON(const IColumn &column, size_t row_num, WriteBuffer &ostr, const FormatSettings &settings) const { - serializeText(column, row_num, ostr, settings); + serializeSimple(column, row_num, ostr, settings); } void SerializationBool::deserializeTextJSON(IColumn &column, ReadBuffer &istr, const FormatSettings &) const { - ColumnUInt8 *col = typeid_cast(&column); - if (!col) - { - throw Exception("Bool type can only deserialize columns of type UInt8." + column.getName(), - ErrorCodes::ILLEGAL_COLUMN); - } + if (istr.eof()) + throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_BOOL); - if (!istr.eof()) - { - bool value = false; + ColumnUInt8 * col = checkAndGetDeserializeColumnType(column); + bool value = false; - if (*istr.position() == 't' || *istr.position() == 'f') - readBoolTextWord(value, istr); - else if (*istr.position() == '1' || *istr.position() == '0') - readBoolText(value, istr); - else - throw Exception("Invalid boolean value, should be true/false, 1/0.", - ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING); - col->insert(value); - } + if (*istr.position() == 't' || *istr.position() == 'f') + readBoolTextWord(value, istr); + else if (*istr.position() == '1' || *istr.position() == '0') + readBoolText(value, istr); else - throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING); + throw Exception("Invalid boolean value, should be true/false, 1/0.", + ErrorCodes::CANNOT_PARSE_BOOL); + col->insert(value); } void SerializationBool::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - serializeTextEscaped(column, row_num, ostr, settings); + serializeCustom(column, row_num, ostr, settings); } void SerializationBool::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { if (istr.eof()) - throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING); - String input; - readCSVString(input, istr, settings.csv); - deserializeFromString(column, input, settings); + throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_BOOL); + + deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n'; }); } void SerializationBool::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - serializeTextEscaped(column, row_num, ostr, settings); + serializeCustom(column, row_num, ostr, settings); } void SerializationBool::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { if (istr.eof()) - throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING); - String input; - readString(input, istr); - deserializeFromString(column, input, settings); + throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_BOOL); + + deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; }); } -void SerializationBool::deserializeFromString(IColumn & column, String & input, const FormatSettings & settings) +void SerializationBool::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { - ColumnUInt8 * col = typeid_cast(&column); - if (!col) - { - throw Exception("Bool type can only deserialize columns of type UInt8." + column.getName(), ErrorCodes::ILLEGAL_COLUMN); - } + serializeSimple(column, row_num, ostr, settings); +} - if (settings.bool_true_representation == input) +void SerializationBool::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + if (istr.eof()) + throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_BOOL); + + auto * col = checkAndGetDeserializeColumnType(column); + + char symbol = toLowerIfAlphaASCII(*istr.position()); + switch (symbol) { - col->insert(true); + case 't': + assertStringCaseInsensitive("true", istr); + col->insert(true); + break; + case 'f': + assertStringCaseInsensitive("false", istr); + col->insert(false); + break; + case '1': + col->insert(true); + break; + case '0': + col->insert(false); + break; + case '\'': + ++istr.position(); + deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return !buf.eof() && *buf.position() == '\''; }); + assertChar('\'', istr); + break; + default: + throw Exception( + ErrorCodes::CANNOT_PARSE_BOOL, + "Cannot parse boolean value here: '{}', should be true/false, 1/0 or on of " + "True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0 in quotes", + String(istr.position(), std::min(10ul, istr.available()))); } - else if (settings.bool_false_representation == input) - { - col->insert(false); - } - else - throw Exception("Invalid boolean value, should be " + settings.bool_true_representation + " or " + settings.bool_false_representation + " controlled by setting bool_true_representation and bool_false_representation.", ErrorCodes::ILLEGAL_COLUMN); } + +void SerializationBool::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + if (istr.eof()) + throw Exception("Expected boolean value but get EOF.", ErrorCodes::CANNOT_PARSE_BOOL); + + deserializeImpl(column, istr, settings, [&](ReadBuffer & buf){ return buf.eof(); }); +} + +void SerializationBool::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeSimple(column, row_num, ostr, settings); +} + } diff --git a/src/DataTypes/Serializations/SerializationBool.h b/src/DataTypes/Serializations/SerializationBool.h index eda37864db5..a9f4c6404b3 100644 --- a/src/DataTypes/Serializations/SerializationBool.h +++ b/src/DataTypes/Serializations/SerializationBool.h @@ -1,26 +1,23 @@ #pragma once -#include +#include +#include +#include namespace DB { -class SerializationBool final : public SerializationCustomSimpleText +class SerializationBool final : public SerializationWrapper { -private: - static constexpr char str_true[5] = "true"; - static constexpr char str_false[6] = "false"; - public: SerializationBool(const SerializationPtr & nested_); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings,bool whole) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; - void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; @@ -29,8 +26,12 @@ public: void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; -protected: - static void deserializeFromString(IColumn & column, String & input, const FormatSettings & settings); + void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + + void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; }; } diff --git a/src/DataTypes/Serializations/SerializationCustomSimpleText.h b/src/DataTypes/Serializations/SerializationCustomSimpleText.h index f1b24f65b22..ba7c712f86c 100644 --- a/src/DataTypes/Serializations/SerializationCustomSimpleText.h +++ b/src/DataTypes/Serializations/SerializationCustomSimpleText.h @@ -10,7 +10,7 @@ class WriteBuffer; struct FormatSettings; class IColumn; -/** Simple IDataTypeCustomTextSerialization that uses serializeText/deserializeText +/** Simple ISerialization that uses serializeText/deserializeText * for all serialization and deserialization. */ class SerializationCustomSimpleText : public SerializationWrapper { diff --git a/src/DataTypes/Serializations/SerializationDateTime.cpp b/src/DataTypes/Serializations/SerializationDateTime.cpp index ce64bfd785a..b4269fb0f8c 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/DataTypes/Serializations/SerializationDateTime64.cpp b/src/DataTypes/Serializations/SerializationDateTime64.cpp index aeba7e7e341..b9ed5bd4a02 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime64.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/DataTypes/Serializations/SerializationInfo.cpp b/src/DataTypes/Serializations/SerializationInfo.cpp new file mode 100644 index 00000000000..42d3d14b672 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationInfo.cpp @@ -0,0 +1,222 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CORRUPTED_DATA; +} + +namespace +{ + +constexpr auto KEY_VERSION = "version"; +constexpr auto KEY_NUM_ROWS = "num_rows"; +constexpr auto KEY_COLUMNS = "columns"; +constexpr auto KEY_NUM_DEFAULTS = "num_defaults"; +constexpr auto KEY_KIND = "kind"; +constexpr auto KEY_NAME = "name"; + +} + +void SerializationInfo::Data::add(const IColumn & column) +{ + size_t rows = column.size(); + double ratio = column.getRatioOfDefaultRows(ColumnSparse::DEFAULT_ROWS_SEARCH_SAMPLE_RATIO); + + num_rows += rows; + num_defaults += static_cast(ratio * rows); +} + +void SerializationInfo::Data::add(const Data & other) +{ + num_rows += other.num_rows; + num_defaults += other.num_defaults; +} + +SerializationInfo::SerializationInfo(ISerialization::Kind kind_, const Settings & settings_) + : settings(settings_) + , kind(kind_) +{ +} + +void SerializationInfo::add(const IColumn & column) +{ + data.add(column); + if (settings.choose_kind) + kind = chooseKind(data, settings); +} + +void SerializationInfo::add(const SerializationInfo & other) +{ + data.add(other.data); + if (settings.choose_kind) + kind = chooseKind(data, settings); +} + +void SerializationInfo::replaceData(const SerializationInfo & other) +{ + data = other.data; +} + +MutableSerializationInfoPtr SerializationInfo::clone() const +{ + auto res = std::make_shared(kind, settings); + res->data = data; + return res; +} + +void SerializationInfo::serialializeKindBinary(WriteBuffer & out) const +{ + writeBinary(static_cast(kind), out); +} + +void SerializationInfo::deserializeFromKindsBinary(ReadBuffer & in) +{ + UInt8 kind_num; + readBinary(kind_num, in); + auto maybe_kind = magic_enum::enum_cast(kind_num); + if (!maybe_kind) + throw Exception(ErrorCodes::CORRUPTED_DATA, "Unknown serialization kind " + std::to_string(kind_num)); + + kind = *maybe_kind; +} + +Poco::JSON::Object SerializationInfo::toJSON() const +{ + Poco::JSON::Object object; + object.set(KEY_KIND, ISerialization::kindToString(kind)); + object.set(KEY_NUM_DEFAULTS, data.num_defaults); + object.set(KEY_NUM_ROWS, data.num_rows); + return object; +} + +void SerializationInfo::fromJSON(const Poco::JSON::Object & object) +{ + if (!object.has(KEY_KIND) || !object.has(KEY_NUM_DEFAULTS) || !object.has(KEY_NUM_ROWS)) + throw Exception(ErrorCodes::CORRUPTED_DATA, + "Missed field '{}' or '{}' or '{}' in SerializationInfo of columns", + KEY_KIND, KEY_NUM_DEFAULTS, KEY_NUM_ROWS); + + data.num_rows = object.getValue(KEY_NUM_ROWS); + data.num_defaults = object.getValue(KEY_NUM_DEFAULTS); + kind = ISerialization::stringToKind(object.getValue(KEY_KIND)); +} + +ISerialization::Kind SerializationInfo::chooseKind(const Data & data, const Settings & settings) +{ + double ratio = data.num_rows ? std::min(static_cast(data.num_defaults) / data.num_rows, 1.0) : 0.0; + return ratio > settings.ratio_of_defaults_for_sparse ? ISerialization::Kind::SPARSE : ISerialization::Kind::DEFAULT; +} + +SerializationInfoByName::SerializationInfoByName( + const NamesAndTypesList & columns, + const SerializationInfo::Settings & settings) +{ + if (settings.isAlwaysDefault()) + return; + + for (const auto & column : columns) + if (column.type->supportsSparseSerialization()) + emplace(column.name, column.type->createSerializationInfo(settings)); +} + +void SerializationInfoByName::add(const Block & block) +{ + for (const auto & column : block) + { + auto it = find(column.name); + if (it == end()) + continue; + + it->second->add(*column.column); + } +} + +void SerializationInfoByName::add(const SerializationInfoByName & other) +{ + for (const auto & [name, info] : other) + { + auto it = find(name); + if (it == end()) + continue; + + it->second->add(*info); + } +} + +void SerializationInfoByName::writeJSON(WriteBuffer & out) const +{ + Poco::JSON::Object object; + object.set(KEY_VERSION, SERIALIZATION_INFO_VERSION); + + Poco::JSON::Array column_infos; + for (const auto & [name, info] : *this) + { + auto info_json = info->toJSON(); + info_json.set(KEY_NAME, name); + column_infos.add(std::move(info_json)); + } + + object.set(KEY_COLUMNS, std::move(column_infos)); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + oss.exceptions(std::ios::failbit); + Poco::JSON::Stringifier::stringify(object, oss); + + return writeString(oss.str(), out); +} + +void SerializationInfoByName::readJSON(ReadBuffer & in) +{ + String json_str; + readString(json_str, in); + + Poco::JSON::Parser parser; + auto object = parser.parse(json_str).extract(); + + if (!object->has(KEY_VERSION)) + throw Exception(ErrorCodes::CORRUPTED_DATA, "Missed version of serialization infos"); + + if (object->getValue(KEY_VERSION) > SERIALIZATION_INFO_VERSION) + throw Exception(ErrorCodes::CORRUPTED_DATA, + "Unknown version of serialization infos ({}). Should be less or equal than {}", + object->getValue(KEY_VERSION), SERIALIZATION_INFO_VERSION); + + if (object->has(KEY_COLUMNS)) + { + auto array = object->getArray(KEY_COLUMNS); + for (const auto & elem : *array) + { + auto elem_object = elem.extract(); + + if (!elem_object->has(KEY_NAME)) + throw Exception(ErrorCodes::CORRUPTED_DATA, + "Missed field '{}' in SerializationInfo of columns", KEY_NAME); + + auto name = elem_object->getValue(KEY_NAME); + auto it = find(name); + + if (it == end()) + throw Exception(ErrorCodes::CORRUPTED_DATA, + "There is no column {} in serialization infos", name); + + it->second->fromJSON(*elem_object); + } + } +} + +} diff --git a/src/DataTypes/Serializations/SerializationInfo.h b/src/DataTypes/Serializations/SerializationInfo.h new file mode 100644 index 00000000000..f7af5d77217 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationInfo.h @@ -0,0 +1,96 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class ReadBuffer; +class WriteBuffer; +class NamesAndTypesList; +class Block; + +constexpr auto SERIALIZATION_INFO_VERSION = 0; + +/** Contains information about kind of serialization of column and its subcolumns. + * Also contains information about content of columns, + * that helps to choose kind of serialization of column. + * + * Currently has only information about number of default rows, + * that helps to choose sparse serialization. + * + * Should be extended, when new kinds of serialization will be implemented. + */ +class SerializationInfo +{ +public: + struct Data + { + size_t num_rows = 0; + size_t num_defaults = 0; + + void add(const IColumn & column); + void add(const Data & other); + }; + + struct Settings + { + const double ratio_of_defaults_for_sparse = 1.0; + const bool choose_kind = false; + + bool isAlwaysDefault() const { return ratio_of_defaults_for_sparse >= 1.0; } + }; + + SerializationInfo(ISerialization::Kind kind_, const Settings & settings_); + + virtual ~SerializationInfo() = default; + + virtual bool hasCustomSerialization() const { return kind != ISerialization::Kind::DEFAULT; } + + virtual void add(const IColumn & column); + virtual void add(const SerializationInfo & other); + virtual void replaceData(const SerializationInfo & other); + virtual std::shared_ptr clone() const; + + virtual void serialializeKindBinary(WriteBuffer & out) const; + virtual void deserializeFromKindsBinary(ReadBuffer & in); + + virtual Poco::JSON::Object toJSON() const; + virtual void fromJSON(const Poco::JSON::Object & object); + + const Settings & getSettings() const { return settings; } + const Data & getData() const { return data; } + ISerialization::Kind getKind() const { return kind; } + + static ISerialization::Kind chooseKind(const Data & data, const Settings & settings); + +protected: + const Settings settings; + + ISerialization::Kind kind; + Data data; +}; + +using SerializationInfoPtr = std::shared_ptr; +using MutableSerializationInfoPtr = std::shared_ptr; + +using SerializationInfos = std::vector; +using MutableSerializationInfos = std::vector; + +class SerializationInfoByName : public std::unordered_map +{ +public: + SerializationInfoByName() = default; + SerializationInfoByName(const NamesAndTypesList & columns, const SerializationInfo::Settings & settings); + + void add(const Block & block); + void add(const SerializationInfoByName & other); + + void writeJSON(WriteBuffer & out) const; + void readJSON(ReadBuffer & in); +}; + +} diff --git a/src/DataTypes/Serializations/SerializationInfoTuple.cpp b/src/DataTypes/Serializations/SerializationInfoTuple.cpp new file mode 100644 index 00000000000..378bed2af53 --- /dev/null +++ b/src/DataTypes/Serializations/SerializationInfoTuple.cpp @@ -0,0 +1,114 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CORRUPTED_DATA; + extern const int THERE_IS_NO_COLUMN; +} + +SerializationInfoTuple::SerializationInfoTuple( + MutableSerializationInfos elems_, const Settings & settings_) + : SerializationInfo(ISerialization::Kind::DEFAULT, settings_) + , elems(std::move(elems_)) +{ +} + +bool SerializationInfoTuple::hasCustomSerialization() const +{ + return std::any_of(elems.begin(), elems.end(), [](const auto & elem) { return elem->hasCustomSerialization(); }); +} + +void SerializationInfoTuple::add(const IColumn & column) +{ + SerializationInfo::add(column); + + const auto & column_tuple = assert_cast(column); + const auto & right_elems = column_tuple.getColumns(); + assert(elems.size() == right_elems.size()); + + for (size_t i = 0; i < elems.size(); ++i) + elems[i]->add(*right_elems[i]); +} + +void SerializationInfoTuple::add(const SerializationInfo & other) +{ + SerializationInfo::add(other); + + const auto & info_tuple = assert_cast(other); + assert(elems.size() == info_tuple.elems.size()); + + for (size_t i = 0; i < elems.size(); ++i) + elems[i]->add(*info_tuple.elems[i]); +} + +void SerializationInfoTuple::replaceData(const SerializationInfo & other) +{ + SerializationInfo::add(other); + + const auto & info_tuple = assert_cast(other); + assert(elems.size() == info_tuple.elems.size()); + + for (size_t i = 0; i < elems.size(); ++i) + elems[i]->replaceData(*info_tuple.elems[i]); +} + +MutableSerializationInfoPtr SerializationInfoTuple::clone() const +{ + MutableSerializationInfos elems_cloned; + elems_cloned.reserve(elems.size()); + for (const auto & elem : elems) + elems_cloned.push_back(elem->clone()); + + return std::make_shared(std::move(elems_cloned), settings); +} + +void SerializationInfoTuple::serialializeKindBinary(WriteBuffer & out) const +{ + SerializationInfo::serialializeKindBinary(out); + for (const auto & elem : elems) + elem->serialializeKindBinary(out); +} + +void SerializationInfoTuple::deserializeFromKindsBinary(ReadBuffer & in) +{ + SerializationInfo::deserializeFromKindsBinary(in); + for (const auto & elem : elems) + elem->deserializeFromKindsBinary(in); +} + +Poco::JSON::Object SerializationInfoTuple::toJSON() const +{ + auto object = SerializationInfo::toJSON(); + Poco::JSON::Array subcolumns; + for (const auto & elem : elems) + subcolumns.add(elem->toJSON()); + + object.set("subcolumns", std::move(subcolumns)); + return object; +} + +void SerializationInfoTuple::fromJSON(const Poco::JSON::Object & object) +{ + SerializationInfo::fromJSON(object); + + if (!object.has("subcolumns")) + throw Exception(ErrorCodes::CORRUPTED_DATA, + "Missed field '{}' in SerializationInfo of columns SerializationInfoTuple"); + + auto subcolumns = object.getArray("subcolumns"); + if (elems.size() != subcolumns->size()) + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, + "Mismatched number of subcolumns between JSON and SerializationInfoTuple." + "Expected: {}, got: {}", elems.size(), subcolumns->size()); + + for (size_t i = 0; i < elems.size(); ++i) + elems[i]->fromJSON(*subcolumns->getObject(i)); +} + +} diff --git a/src/DataTypes/Serializations/SerializationInfoTuple.h b/src/DataTypes/Serializations/SerializationInfoTuple.h new file mode 100644 index 00000000000..d196f80393e --- /dev/null +++ b/src/DataTypes/Serializations/SerializationInfoTuple.h @@ -0,0 +1,31 @@ +#pragma once +#include + +namespace DB +{ + +class SerializationInfoTuple : public SerializationInfo +{ +public: + SerializationInfoTuple(MutableSerializationInfos elems_, const Settings & settings_); + + bool hasCustomSerialization() const override; + void add(const IColumn & column) override; + void add(const SerializationInfo & other) override; + void replaceData(const SerializationInfo & other) override; + + MutableSerializationInfoPtr clone() const override; + void serialializeKindBinary(WriteBuffer & out) const override; + void deserializeFromKindsBinary(ReadBuffer & in) override; + + Poco::JSON::Object toJSON() const override; + void fromJSON(const Poco::JSON::Object & object) override; + + MutableSerializationInfoPtr getElementInfo(size_t i) const { return elems[i]; } + ISerialization::Kind getElementKind(size_t i) const { return elems[i]->getKind(); } + +private: + MutableSerializationInfos elems; +}; + +} diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index d83a6c0ee83..c79f588e46c 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -43,23 +43,25 @@ SerializationLowCardinality::SerializationLowCardinality(const DataTypePtr & dic void SerializationLowCardinality::enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const + const SubstreamData & data) const { - const auto * column_lc = column ? &getColumnLowCardinality(*column) : nullptr; + const auto * column_lc = data.column ? &getColumnLowCardinality(*data.column) : nullptr; - SubstreamData data; - data.type = type ? dictionary_type : nullptr; - data.column = column_lc ? column_lc->getDictionary().getNestedColumn() : nullptr; - data.serialization = dict_inner_serialization; + SubstreamData dict_data = + { + dict_inner_serialization, + data.type ? dictionary_type : nullptr, + column_lc ? column_lc->getDictionary().getNestedColumn() : nullptr, + data.serialization_info, + }; path.push_back(Substream::DictionaryKeys); - path.back().data = data; + path.back().data = dict_data; - dict_inner_serialization->enumerateStreams(path, callback, data.type, data.column); + dict_inner_serialization->enumerateStreams(path, callback, dict_data); path.back() = Substream::DictionaryIndexes; - path.back().data = {type, column, getPtr(), nullptr}; + path.back().data = data; callback(path); path.pop_back(); @@ -222,42 +224,6 @@ struct DeserializeStateLowCardinality : public ISerialization::DeserializeBinary explicit DeserializeStateLowCardinality(UInt64 key_version_) : key_version(key_version_) {} }; -static SerializeStateLowCardinality * checkAndGetLowCardinalitySerializeState( - ISerialization::SerializeBinaryBulkStatePtr & state) -{ - if (!state) - throw Exception("Got empty state for SerializationLowCardinality.", ErrorCodes::LOGICAL_ERROR); - - auto * low_cardinality_state = typeid_cast(state.get()); - if (!low_cardinality_state) - { - auto & state_ref = *state; - throw Exception("Invalid SerializeBinaryBulkState for SerializationLowCardinality. Expected: " - + demangle(typeid(SerializeStateLowCardinality).name()) + ", got " - + demangle(typeid(state_ref).name()), ErrorCodes::LOGICAL_ERROR); - } - - return low_cardinality_state; -} - -static DeserializeStateLowCardinality * checkAndGetLowCardinalityDeserializeState( - ISerialization::DeserializeBinaryBulkStatePtr & state) -{ - if (!state) - throw Exception("Got empty state for SerializationLowCardinality.", ErrorCodes::LOGICAL_ERROR); - - auto * low_cardinality_state = typeid_cast(state.get()); - if (!low_cardinality_state) - { - auto & state_ref = *state; - throw Exception("Invalid DeserializeBinaryBulkState for SerializationLowCardinality. Expected: " - + demangle(typeid(DeserializeStateLowCardinality).name()) + ", got " - + demangle(typeid(state_ref).name()), ErrorCodes::LOGICAL_ERROR); - } - - return low_cardinality_state; -} - void SerializationLowCardinality::serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const @@ -282,7 +248,7 @@ void SerializationLowCardinality::serializeBinaryBulkStateSuffix( SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { - auto * low_cardinality_state = checkAndGetLowCardinalitySerializeState(state); + auto * low_cardinality_state = checkAndGetState(state); KeysSerializationVersion::checkVersion(low_cardinality_state->key_version.value); if (low_cardinality_state->shared_dictionary && settings.low_cardinality_max_dictionary_size) @@ -521,7 +487,7 @@ void SerializationLowCardinality::serializeBinaryBulkWithMultipleStreams( const ColumnLowCardinality & low_cardinality_column = typeid_cast(column); - auto * low_cardinality_state = checkAndGetLowCardinalitySerializeState(state); + auto * low_cardinality_state = checkAndGetState(state); auto & global_dictionary = low_cardinality_state->shared_dictionary; KeysSerializationVersion::checkVersion(low_cardinality_state->key_version.value); @@ -620,7 +586,7 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( if (!indexes_stream) throw Exception("Got empty stream for SerializationLowCardinality indexes.", ErrorCodes::LOGICAL_ERROR); - auto * low_cardinality_state = checkAndGetLowCardinalityDeserializeState(state); + auto * low_cardinality_state = checkAndGetState(state); KeysSerializationVersion::checkVersion(low_cardinality_state->key_version.value); auto read_dictionary = [this, low_cardinality_state, keys_stream]() @@ -670,6 +636,9 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( if (!low_cardinality_state->index_type.need_global_dictionary) { + if (additional_keys == nullptr) + throw Exception("No additional keys found.", ErrorCodes::INCORRECT_DATA); + ColumnPtr keys_column = additional_keys; if (low_cardinality_state->null_map) keys_column = ColumnNullable::create(additional_keys, low_cardinality_state->null_map); @@ -696,6 +665,9 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams( if (!maps.additional_keys_map->empty()) { + if (additional_keys == nullptr) + throw Exception("No additional keys found.", ErrorCodes::INCORRECT_DATA); + auto used_add_keys = additional_keys->index(*maps.additional_keys_map, 0); if (dictionary_type->isNullable()) diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.h b/src/DataTypes/Serializations/SerializationLowCardinality.h index af26405fcfa..5f8a2a95a25 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.h +++ b/src/DataTypes/Serializations/SerializationLowCardinality.h @@ -20,8 +20,7 @@ public: void enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const override; + const SubstreamData & data) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index d909b455441..3f17061a744 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -250,13 +250,17 @@ void SerializationMap::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c void SerializationMap::enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const + const SubstreamData & data) const { - auto next_type = type ? assert_cast(*type).getNestedType() : nullptr; - auto next_column = column ? assert_cast(*column).getNestedColumnPtr() : nullptr; + SubstreamData next_data = + { + nested, + data.type ? assert_cast(*data.type).getNestedType() : nullptr, + data.column ? assert_cast(*data.column).getNestedColumnPtr() : nullptr, + data.serialization_info, + }; - nested->enumerateStreams(path, callback, next_type, next_column); + nested->enumerateStreams(path, callback, next_data); } void SerializationMap::serializeBinaryBulkStatePrefix( diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index b679a8cf4c6..93b3e179499 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -34,8 +34,7 @@ public: void enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const override; + const SubstreamData & data) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationNamed.cpp b/src/DataTypes/Serializations/SerializationNamed.cpp index 4ef4d4527f8..097e9cedfbe 100644 --- a/src/DataTypes/Serializations/SerializationNamed.cpp +++ b/src/DataTypes/Serializations/SerializationNamed.cpp @@ -6,12 +6,13 @@ namespace DB void SerializationNamed::enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const + const SubstreamData & data) const { addToPath(path); - path.back().data = {type, column, getPtr(), std::make_shared(name, escape_delimiter)}; - nested_serialization->enumerateStreams(path, callback, type, column); + path.back().data = data; + path.back().creator = std::make_shared(name, escape_delimiter); + + nested_serialization->enumerateStreams(path, callback, data); path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationNamed.h b/src/DataTypes/Serializations/SerializationNamed.h index 20dd15a20ba..91db0cf67f4 100644 --- a/src/DataTypes/Serializations/SerializationNamed.h +++ b/src/DataTypes/Serializations/SerializationNamed.h @@ -23,8 +23,7 @@ public: void enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const override; + const SubstreamData & data) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 261d0ff3c5d..a6273deaa30 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -40,30 +40,35 @@ ColumnPtr SerializationNullable::SubcolumnCreator::create(const ColumnPtr & prev void SerializationNullable::enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const + const SubstreamData & data) const { - const auto * type_nullable = type ? &assert_cast(*type) : nullptr; - const auto * column_nullable = column ? &assert_cast(*column) : nullptr; + const auto * type_nullable = data.type ? &assert_cast(*data.type) : nullptr; + const auto * column_nullable = data.column ? &assert_cast(*data.column) : nullptr; path.push_back(Substream::NullMap); path.back().data = { + std::make_shared(std::make_shared>(), "null", false), type_nullable ? std::make_shared() : nullptr, column_nullable ? column_nullable->getNullMapColumnPtr() : nullptr, - std::make_shared(std::make_shared>(), "null", false), - nullptr, + data.serialization_info, }; callback(path); path.back() = Substream::NullableElements; - path.back().data = {type, column, getPtr(), std::make_shared(path.back().data.column)}; + path.back().creator = std::make_shared(path.back().data.column); + path.back().data = data; - auto next_type = type_nullable ? type_nullable->getNestedType() : nullptr; - auto next_column = column_nullable ? column_nullable->getNestedColumnPtr() : nullptr; + SubstreamData next_data = + { + nested, + type_nullable ? type_nullable->getNestedType() : nullptr, + column_nullable ? column_nullable->getNestedColumnPtr() : nullptr, + data.serialization_info, + }; - nested->enumerateStreams(path, callback, next_type, next_column); + nested->enumerateStreams(path, callback, next_data); path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index c514234127c..eb3e9bfb430 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -16,8 +16,7 @@ public: void enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const override; + const SubstreamData & data) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationSparse.cpp b/src/DataTypes/Serializations/SerializationSparse.cpp new file mode 100644 index 00000000000..64db248c5fc --- /dev/null +++ b/src/DataTypes/Serializations/SerializationSparse.cpp @@ -0,0 +1,380 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; +} + +namespace +{ + +/// 2^62, because VarInt supports only values < 2^63. +constexpr auto END_OF_GRANULE_FLAG = 1ULL << 62; + +struct DeserializeStateSparse : public ISerialization::DeserializeBinaryBulkState +{ + /// Number of default values, that remain from previous read. + size_t num_trailing_defaults = 0; + /// Do we have non-default value after @num_trailing_defaults? + bool has_value_after_defaults = false; + ISerialization::DeserializeBinaryBulkStatePtr nested; + + void reset() + { + num_trailing_defaults = 0; + has_value_after_defaults = false; + } +}; + +void serializeOffsets(const IColumn::Offsets & offsets, WriteBuffer & ostr, size_t start, size_t end) +{ + size_t size = offsets.size(); + for (size_t i = 0; i < size; ++i) + { + size_t group_size = offsets[i] - start; + writeVarUInt(group_size, ostr); + start += group_size + 1; + } + + size_t group_size = start < end ? end - start : 0; + group_size |= END_OF_GRANULE_FLAG; + writeVarUInt(group_size, ostr); +} + + +/// Returns number of read rows. +/// @start is the size of column before reading offsets. +size_t deserializeOffsets(IColumn::Offsets & offsets, + ReadBuffer & istr, size_t start, size_t limit, DeserializeStateSparse & state) +{ + if (limit && state.num_trailing_defaults >= limit) + { + state.num_trailing_defaults -= limit; + return limit; + } + + /// Just try to guess number of offsets. + offsets.reserve(offsets.size() + + static_cast(limit * (1.0 - ColumnSparse::DEFAULT_RATIO_FOR_SPARSE_SERIALIZATION))); + + bool first = true; + size_t total_rows = state.num_trailing_defaults; + if (state.has_value_after_defaults) + { + offsets.push_back(start + state.num_trailing_defaults); + first = false; + + state.has_value_after_defaults = false; + state.num_trailing_defaults = 0; + ++total_rows; + } + + size_t group_size; + while (!istr.eof()) + { + readVarUInt(group_size, istr); + + bool end_of_granule = group_size & END_OF_GRANULE_FLAG; + group_size &= ~END_OF_GRANULE_FLAG; + + size_t next_total_rows = total_rows + group_size; + group_size += state.num_trailing_defaults; + + if (limit && next_total_rows >= limit) + { + /// If it was not last group in granule, + /// we have to add current non-default value at further reads. + state.num_trailing_defaults = next_total_rows - limit; + state.has_value_after_defaults = !end_of_granule; + return limit; + } + + if (end_of_granule) + { + state.has_value_after_defaults = false; + state.num_trailing_defaults = group_size; + } + else + { + /// If we add value to column for first time in current read, + /// start from column's current size, because it can have some defaults after last offset, + /// otherwise just start from previous offset. + size_t start_of_group = start; + if (!first && !offsets.empty()) + start_of_group = offsets.back() + 1; + if (first) + first = false; + + offsets.push_back(start_of_group + group_size); + + state.num_trailing_defaults = 0; + state.has_value_after_defaults = false; + ++next_total_rows; + } + + total_rows = next_total_rows; + } + + return total_rows; +} + +} + +SerializationSparse::SerializationSparse(const SerializationPtr & nested_) + : nested(nested_) +{ +} + +SerializationPtr SerializationSparse::SubcolumnCreator::create(const SerializationPtr & prev) const +{ + return std::make_shared(prev); +} + +ColumnPtr SerializationSparse::SubcolumnCreator::create(const ColumnPtr & prev) const +{ + return ColumnSparse::create(prev, offsets, size); +} + +void SerializationSparse::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + const SubstreamData & data) const +{ + const auto * column_sparse = data.column ? &assert_cast(*data.column) : nullptr; + + size_t column_size = column_sparse ? column_sparse->size() : 0; + + path.push_back(Substream::SparseOffsets); + path.back().data = + { + std::make_shared>(), + data.type ? std::make_shared() : nullptr, + column_sparse ? column_sparse->getOffsetsPtr() : nullptr, + data.serialization_info, + }; + + callback(path); + + path.back() = Substream::SparseElements; + path.back().creator = std::make_shared(path.back().data.column, column_size); + path.back().data = data; + + SubstreamData next_data = + { + nested, + data.type, + column_sparse ? column_sparse->getValuesPtr() : nullptr, + data.serialization_info, + }; + + nested->enumerateStreams(path, callback, next_data); + path.pop_back(); +} + +void SerializationSparse::serializeBinaryBulkStatePrefix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const +{ + settings.path.push_back(Substream::SparseElements); + nested->serializeBinaryBulkStatePrefix(settings, state); + settings.path.pop_back(); +} + +void SerializationSparse::serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const +{ + size_t size = column.size(); + + auto offsets_column = DataTypeNumber().createColumn(); + auto & offsets_data = assert_cast &>(*offsets_column).getData(); + column.getIndicesOfNonDefaultRows(offsets_data, offset, limit); + + settings.path.push_back(Substream::SparseOffsets); + if (auto * stream = settings.getter(settings.path)) + { + size_t end = limit && offset + limit < size ? offset + limit : size; + serializeOffsets(offsets_data, *stream, offset, end); + } + + if (!offsets_data.empty()) + { + settings.path.back() = Substream::SparseElements; + if (const auto * column_sparse = typeid_cast(&column)) + { + const auto & values = column_sparse->getValuesColumn(); + size_t begin = column_sparse->getValueIndex(offsets_data[0]); + size_t end = column_sparse->getValueIndex(offsets_data.back()); + nested->serializeBinaryBulkWithMultipleStreams(values, begin, end - begin + 1, settings, state); + } + else + { + auto values = column.index(*offsets_column, 0); + nested->serializeBinaryBulkWithMultipleStreams(*values, 0, values->size(), settings, state); + } + } + + settings.path.pop_back(); +} + +void SerializationSparse::serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const +{ + settings.path.push_back(Substream::SparseElements); + nested->serializeBinaryBulkStateSuffix(settings, state); + settings.path.pop_back(); +} + +void SerializationSparse::deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state) const +{ + auto state_sparse = std::make_shared(); + + settings.path.push_back(Substream::SparseElements); + nested->deserializeBinaryBulkStatePrefix(settings, state_sparse->nested); + settings.path.pop_back(); + + state = std::move(state_sparse); +} + +void SerializationSparse::deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const +{ + auto * state_sparse = checkAndGetState(state); + + if (!settings.continuous_reading) + state_sparse->reset(); + + auto mutable_column = column->assumeMutable(); + auto & column_sparse = assert_cast(*mutable_column); + auto & offsets_data = column_sparse.getOffsetsData(); + + size_t old_size = offsets_data.size(); + + size_t read_rows = 0; + settings.path.push_back(Substream::SparseOffsets); + if (auto * stream = settings.getter(settings.path)) + read_rows = deserializeOffsets(offsets_data, *stream, column_sparse.size(), limit, *state_sparse); + + auto & values_column = column_sparse.getValuesPtr(); + size_t values_limit = offsets_data.size() - old_size; + + settings.path.back() = Substream::SparseElements; + nested->deserializeBinaryBulkWithMultipleStreams(values_column, values_limit, settings, state_sparse->nested, cache); + settings.path.pop_back(); + + if (offsets_data.size() + 1 != values_column->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent sizes of values and offsets in SerializationSparse." + " Offsets size: {}, values size: {}", offsets_data.size(), values_column->size()); + + /// 'insertManyDefaults' just increases size of column. + column_sparse.insertManyDefaults(read_rows); + column = std::move(mutable_column); +} + +/// All methods below just wrap nested serialization. + +void SerializationSparse::serializeBinary(const Field & field, WriteBuffer & ostr) const +{ + nested->serializeBinary(field, ostr); +} + +void SerializationSparse::deserializeBinary(Field & field, ReadBuffer & istr) const +{ + nested->deserializeBinary(field, istr); +} + +void SerializationSparse::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +{ + const auto & column_sparse = assert_cast(column); + nested->serializeBinary(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr); +} + +void SerializationSparse::deserializeBinary(IColumn &, ReadBuffer &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeBinary' is not implemented for SerializationSparse"); +} + +void SerializationSparse::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & column_sparse = assert_cast(column); + nested->serializeTextEscaped(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings); +} + +void SerializationSparse::deserializeTextEscaped(IColumn &, ReadBuffer &, const FormatSettings &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeTextEscaped' is not implemented for SerializationSparse"); +} + +void SerializationSparse::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & column_sparse = assert_cast(column); + nested->serializeTextQuoted(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings); +} + +void SerializationSparse::deserializeTextQuoted(IColumn &, ReadBuffer &, const FormatSettings &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeTextQuoted' is not implemented for SerializationSparse"); +} + +void SerializationSparse::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & column_sparse = assert_cast(column); + nested->serializeTextCSV(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings); +} + +void SerializationSparse::deserializeTextCSV(IColumn &, ReadBuffer &, const FormatSettings &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeTextCSV' is not implemented for SerializationSparse"); +} + +void SerializationSparse::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & column_sparse = assert_cast(column); + nested->serializeText(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings); +} + +void SerializationSparse::deserializeWholeText(IColumn &, ReadBuffer &, const FormatSettings &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeWholeText' is not implemented for SerializationSparse"); +} + +void SerializationSparse::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & column_sparse = assert_cast(column); + nested->serializeTextJSON(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings); +} + +void SerializationSparse::deserializeTextJSON(IColumn &, ReadBuffer &, const FormatSettings &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'deserializeTextJSON' is not implemented for SerializationSparse"); +} + +void SerializationSparse::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + const auto & column_sparse = assert_cast(column); + nested->serializeTextXML(column_sparse.getValuesColumn(), column_sparse.getValueIndex(row_num), ostr, settings); +} + +} diff --git a/src/DataTypes/Serializations/SerializationSparse.h b/src/DataTypes/Serializations/SerializationSparse.h new file mode 100644 index 00000000000..51d9df2cb5d --- /dev/null +++ b/src/DataTypes/Serializations/SerializationSparse.h @@ -0,0 +1,103 @@ +#pragma once + +#include + +namespace DB +{ + + +/** Serialization for sparse representation. + * Only '{serialize,deserialize}BinaryBulk' makes sense. + * Format: + * Values and offsets are written to separate substreams. + * There are written only non-default values. + * + * Offsets have position independent format: as i-th offset there + * is written number of default values, that precedes the i-th non-default value. + * Offsets are written in VarInt encoding. + * Additionally at the end of every call of 'serializeBinaryBulkWithMultipleStreams' + * there is written number of default values in the suffix of part of column, + * that we currently writing. This value also marked with a flag, that means the end of portion of data. + * This value is used, e.g. to allow independent reading of granules in MergeTree. + */ +class SerializationSparse final : public ISerialization +{ +public: + SerializationSparse(const SerializationPtr & nested_); + + Kind getKind() const override { return Kind::SPARSE; } + + virtual void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + const SubstreamData & data) const override; + + void serializeBinaryBulkStatePrefix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void serializeBinaryBulkStateSuffix( + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + void deserializeBinaryBulkStatePrefix( + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state) const override; + + /// Allows to write ColumnSparse and other columns in sparse serialization. + void serializeBinaryBulkWithMultipleStreams( + const IColumn & column, + size_t offset, + size_t limit, + SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkStatePtr & state) const override; + + /// Allows to read only ColumnSparse. + void deserializeBinaryBulkWithMultipleStreams( + ColumnPtr & column, + size_t limit, + DeserializeBinaryBulkSettings & settings, + DeserializeBinaryBulkStatePtr & state, + SubstreamsCache * cache) const override; + + void serializeBinary(const Field & field, WriteBuffer & ostr) const override; + void deserializeBinary(Field & field, ReadBuffer & istr) const override; + + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + + void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; + +private: + struct SubcolumnCreator : public ISubcolumnCreator + { + const ColumnPtr offsets; + const size_t size; + + SubcolumnCreator(const ColumnPtr & offsets_, size_t size_) + : offsets(offsets_), size(size_) {} + + DataTypePtr create(const DataTypePtr & prev) const override { return prev; } + SerializationPtr create(const SerializationPtr & prev) const override; + ColumnPtr create(const ColumnPtr & prev) const override; + }; + + SerializationPtr nested; +}; + +} diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index cb4e7f9666e..cd5a6b65a3c 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -16,7 +17,6 @@ namespace ErrorCodes { extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; extern const int NOT_FOUND_COLUMN_IN_BLOCK; - extern const int LOGICAL_ERROR; } @@ -260,7 +260,7 @@ void SerializationTuple::serializeTextCSV(const IColumn & column, size_t row_num for (const auto i : collections::range(0, elems.size())) { if (i != 0) - writeChar(',', ostr); + writeChar(settings.csv.tuple_delimiter, ostr); elems[i]->serializeTextCSV(extractElementColumn(column, i), row_num, ostr, settings); } } @@ -275,7 +275,7 @@ void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, if (i != 0) { skipWhitespaceIfAny(istr); - assertChar(settings.csv.delimiter, istr); + assertChar(settings.csv.tuple_delimiter, istr); skipWhitespaceIfAny(istr); } elems[i]->deserializeTextCSV(extractElementColumn(column, i), istr, settings); @@ -286,18 +286,23 @@ void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, void SerializationTuple::enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const + const SubstreamData & data) const { - const auto * type_tuple = type ? &assert_cast(*type) : nullptr; - const auto * column_tuple = column ? &assert_cast(*column) : nullptr; + const auto * type_tuple = data.type ? &assert_cast(*data.type) : nullptr; + const auto * column_tuple = data.column ? &assert_cast(*data.column) : nullptr; + const auto * info_tuple = data.serialization_info ? &assert_cast(*data.serialization_info) : nullptr; for (size_t i = 0; i < elems.size(); ++i) { - auto next_type = type_tuple ? type_tuple->getElement(i) : nullptr; - auto next_column = column_tuple ? column_tuple->getColumnPtr(i) : nullptr; + SubstreamData next_data = + { + elems[i], + type_tuple ? type_tuple->getElement(i) : nullptr, + column_tuple ? column_tuple->getColumnPtr(i) : nullptr, + info_tuple ? info_tuple->getElementInfo(i) : nullptr, + }; - elems[i]->enumerateStreams(path, callback, next_type, next_column); + elems[i]->enumerateStreams(path, callback, next_data); } } @@ -311,39 +316,6 @@ struct DeserializeBinaryBulkStateTuple : public ISerialization::DeserializeBinar std::vector states; }; -static SerializeBinaryBulkStateTuple * checkAndGetTupleSerializeState(ISerialization::SerializeBinaryBulkStatePtr & state) -{ - if (!state) - throw Exception("Got empty state for DataTypeTuple.", ErrorCodes::LOGICAL_ERROR); - - auto * tuple_state = typeid_cast(state.get()); - if (!tuple_state) - { - auto & state_ref = *state; - throw Exception("Invalid SerializeBinaryBulkState for DataTypeTuple. Expected: " - + demangle(typeid(SerializeBinaryBulkStateTuple).name()) + ", got " - + demangle(typeid(state_ref).name()), ErrorCodes::LOGICAL_ERROR); - } - - return tuple_state; -} - -static DeserializeBinaryBulkStateTuple * checkAndGetTupleDeserializeState(ISerialization::DeserializeBinaryBulkStatePtr & state) -{ - if (!state) - throw Exception("Got empty state for DataTypeTuple.", ErrorCodes::LOGICAL_ERROR); - - auto * tuple_state = typeid_cast(state.get()); - if (!tuple_state) - { - auto & state_ref = *state; - throw Exception("Invalid DeserializeBinaryBulkState for DataTypeTuple. Expected: " - + demangle(typeid(DeserializeBinaryBulkStateTuple).name()) + ", got " - + demangle(typeid(state_ref).name()), ErrorCodes::LOGICAL_ERROR); - } - - return tuple_state; -} void SerializationTuple::serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, @@ -362,7 +334,7 @@ void SerializationTuple::serializeBinaryBulkStateSuffix( SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { - auto * tuple_state = checkAndGetTupleSerializeState(state); + auto * tuple_state = checkAndGetState(state); for (size_t i = 0; i < elems.size(); ++i) elems[i]->serializeBinaryBulkStateSuffix(settings, tuple_state->states[i]); @@ -388,7 +360,7 @@ void SerializationTuple::serializeBinaryBulkWithMultipleStreams( SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { - auto * tuple_state = checkAndGetTupleSerializeState(state); + auto * tuple_state = checkAndGetState(state); for (const auto i : collections::range(0, elems.size())) { @@ -404,7 +376,7 @@ void SerializationTuple::deserializeBinaryBulkWithMultipleStreams( DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const { - auto * tuple_state = checkAndGetTupleDeserializeState(state); + auto * tuple_state = checkAndGetState(state); auto mutable_column = column->assumeMutable(); auto & column_tuple = assert_cast(*mutable_column); diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h index 0eb178f8301..e82d8473645 100644 --- a/src/DataTypes/Serializations/SerializationTuple.h +++ b/src/DataTypes/Serializations/SerializationTuple.h @@ -13,7 +13,9 @@ public: using ElementSerializations = std::vector; SerializationTuple(const ElementSerializations & elems_, bool have_explicit_names_) - : elems(elems_), have_explicit_names(have_explicit_names_) {} + : elems(elems_), have_explicit_names(have_explicit_names_) + { + } void serializeBinary(const Field & field, WriteBuffer & ostr) const override; void deserializeBinary(Field & field, ReadBuffer & istr) const override; @@ -34,8 +36,7 @@ public: void enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const override; + const SubstreamData & data) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, @@ -63,6 +64,8 @@ public: DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const override; + const ElementSerializations & getElementsSerializations() const { return elems; } + private: ElementSerializations elems; bool have_explicit_names; diff --git a/src/DataTypes/Serializations/SerializationWrapper.cpp b/src/DataTypes/Serializations/SerializationWrapper.cpp index c0829ab1b26..271c53dfcf1 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.cpp +++ b/src/DataTypes/Serializations/SerializationWrapper.cpp @@ -7,10 +7,9 @@ namespace DB void SerializationWrapper::enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const + const SubstreamData & data) const { - nested_serialization->enumerateStreams(path, callback, type, column); + nested_serialization->enumerateStreams(path, callback, data); } void SerializationWrapper::serializeBinaryBulkStatePrefix( diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h index c48278d53db..4cdcffc21a8 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.h +++ b/src/DataTypes/Serializations/SerializationWrapper.h @@ -16,11 +16,14 @@ protected: public: SerializationWrapper(const SerializationPtr & nested_serialization_) : nested_serialization(nested_serialization_) {} + const SerializationPtr & getNested() const { return nested_serialization; } + + Kind getKind() const override { return nested_serialization->getKind(); } + void enumerateStreams( SubstreamPath & path, const StreamCallback & callback, - DataTypePtr type, - ColumnPtr column) const override; + const SubstreamData & data) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/TimezoneMixin.h b/src/DataTypes/TimezoneMixin.h index e6e9f7a7989..03ecde5dd0a 100644 --- a/src/DataTypes/TimezoneMixin.h +++ b/src/DataTypes/TimezoneMixin.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include class DateLUTImpl; diff --git a/src/DataTypes/convertMySQLDataType.cpp b/src/DataTypes/convertMySQLDataType.cpp index 1b5e20bddce..ee897de9597 100644 --- a/src/DataTypes/convertMySQLDataType.cpp +++ b/src/DataTypes/convertMySQLDataType.cpp @@ -91,6 +91,10 @@ DataTypePtr convertMySQLDataType(MultiEnum type_support, res = std::make_shared(scale); } } + else if (type_name == "bit") + { + res = std::make_shared(); + } else if (type_support.isSet(MySQLDataTypesSupport::DECIMAL) && (type_name == "numeric" || type_name == "decimal")) { if (precision <= DecimalUtils::max_precision) diff --git a/src/DataTypes/tests/gtest_split_name.cpp b/src/DataTypes/tests/gtest_split_name.cpp new file mode 100644 index 00000000000..04ce4d5e108 --- /dev/null +++ b/src/DataTypes/tests/gtest_split_name.cpp @@ -0,0 +1,32 @@ +#include + +#include + +using namespace DB; + +TEST(SplitName, forward) +{ + ASSERT_EQ(Nested::splitName(String("abc")), (std::pair{"abc", ""})); + ASSERT_EQ(Nested::splitName(String("a.b")), (std::pair{"a", "b"})); + ASSERT_EQ(Nested::splitName(String("a.b.c")), (std::pair{"a", "b.c"})); + ASSERT_EQ(Nested::splitName(String("a.1")), (std::pair{"a", "1"})); + ASSERT_EQ(Nested::splitName(String("a.1.b")), (std::pair{"a", "1.b"})); + ASSERT_EQ(Nested::splitName(String("1.a")), (std::pair{"1", "a"})); + ASSERT_EQ(Nested::splitName(String("a.b1.b2")), (std::pair{"a", "b1.b2"})); + ASSERT_EQ(Nested::splitName(String("a.b1.2a.3a")), (std::pair{"a", "b1.2a.3a"})); + ASSERT_EQ(Nested::splitName(String("..")), (std::pair{"..", ""})); +} + +TEST(SplitName, reverse) +{ + ASSERT_EQ(Nested::splitName(String("abc"), true), (std::pair{"abc", ""})); + ASSERT_EQ(Nested::splitName(String("a.b"), true), (std::pair{"a", "b"})); + ASSERT_EQ(Nested::splitName(String("a.b.c"), true), (std::pair{"a.b", "c"})); + ASSERT_EQ(Nested::splitName(String("a.1"), true), (std::pair{"a", "1"})); + ASSERT_EQ(Nested::splitName(String("a.1a.b"), true), (std::pair{"a.1a", "b"})); + ASSERT_EQ(Nested::splitName(String("1a.b"), true), (std::pair{"1a", "b"})); + ASSERT_EQ(Nested::splitName(String("a.b1.b2"), true), (std::pair{"a.b1", "b2"})); + ASSERT_EQ(Nested::splitName(String("a.b1.2a.3a"), true), (std::pair{"a.b1.2a", "3a"})); + ASSERT_EQ(Nested::splitName(String("a.b1.b2.b3"), true), (std::pair{"a.b1.b2", "b3"})); + ASSERT_EQ(Nested::splitName(String(".."), true), (std::pair{"..", ""})); +} diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index db7da95fb27..82766c1e384 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -29,10 +29,13 @@ namespace return nullptr; DictionaryStructure dictionary_structure = ExternalDictionariesLoader::getDictionaryStructure(*load_result.config); + auto comment = load_result.config->config->getString("dictionary.comment", ""); + return StorageDictionary::create( StorageID(database_name, load_result.name), load_result.name, dictionary_structure, + comment, StorageDictionary::Location::DictionaryDatabase, context); } diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index a4c8f3cdb77..3f6cb49fda7 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -23,6 +23,8 @@ # include # include # include +# include +# include # include # include #endif @@ -117,7 +119,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String static const std::unordered_set engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL", "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite"}; - static const std::unordered_set engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL"}; + static const std::unordered_set engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"}; bool engine_may_have_arguments = engines_with_arguments.contains(engine_name); if (engine_define->engine->arguments && !engine_may_have_arguments) @@ -198,13 +200,15 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String if (engine_name == "MySQL") { auto mysql_database_settings = std::make_unique(); - auto mysql_pool = mysqlxx::PoolWithFailover(configuration.database, configuration.addresses, configuration.username, configuration.password); + MySQLSettings mysql_settings; + auto mysql_pool = createMySQLPoolWithFailover(configuration, mysql_settings); mysql_database_settings->loadFromQueryContext(context); mysql_database_settings->loadFromQuery(*engine_define); /// higher priority return std::make_shared( - context, database_name, metadata_path, engine_define, configuration.database, std::move(mysql_database_settings), std::move(mysql_pool)); + context, database_name, metadata_path, engine_define, configuration.database, + std::move(mysql_database_settings), std::move(mysql_pool), create.attach); } MySQLClient client(configuration.host, configuration.port, configuration.username, configuration.password); diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index ffb39f5b113..1c3f417b431 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -30,27 +30,33 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo auto & ast_create_query = query->as(); bool has_structure = ast_create_query.columns_list && ast_create_query.columns_list->columns; + if (ast_create_query.as_table_function && !has_structure) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function" " and doesn't have structure in metadata", backQuote(ast_create_query.getTable())); - assert(has_structure); - ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns); - ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices); - ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints); - ASTPtr new_projections = InterpreterCreateQuery::formatProjections(metadata.projections); + if (!has_structure && !ast_create_query.is_dictionary) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot alter table {} metadata doesn't have structure", backQuote(ast_create_query.getTable())); - ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns); - ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices); - ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints); - ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->projections, new_projections); + if (!ast_create_query.is_dictionary) + { + ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns); + ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices); + ASTPtr new_constraints = InterpreterCreateQuery::formatConstraints(metadata.constraints); + ASTPtr new_projections = InterpreterCreateQuery::formatProjections(metadata.projections); + + ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns); + ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices); + ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints); + ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->projections, new_projections); + } if (metadata.select.select_query) { query->replace(ast_create_query.select, metadata.select.select_query); } - /// MaterializedView is one type of CREATE query without storage. + /// MaterializedView, Dictionary are types of CREATE query without storage. if (ast_create_query.storage) { ASTStorage & storage_ast = *ast_create_query.storage; diff --git a/src/Databases/MySQL/ConnectionMySQLSettings.cpp b/src/Databases/MySQL/ConnectionMySQLSettings.cpp index 1026d14018b..87da701e481 100644 --- a/src/Databases/MySQL/ConnectionMySQLSettings.cpp +++ b/src/Databases/MySQL/ConnectionMySQLSettings.cpp @@ -14,7 +14,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -IMPLEMENT_SETTINGS_TRAITS(ConnectionMySQLSettingsTraits, LIST_OF_CONNECTION_MYSQL_SETTINGS) +IMPLEMENT_SETTINGS_TRAITS(ConnectionMySQLSettingsTraits, LIST_OF_MYSQL_DATABASE_SETTINGS) void ConnectionMySQLSettings::loadFromQuery(ASTStorage & storage_def) { diff --git a/src/Databases/MySQL/ConnectionMySQLSettings.h b/src/Databases/MySQL/ConnectionMySQLSettings.h index f05985a0cda..8b17d4e7898 100644 --- a/src/Databases/MySQL/ConnectionMySQLSettings.h +++ b/src/Databases/MySQL/ConnectionMySQLSettings.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -17,7 +18,11 @@ class ASTStorage; #define APPLY_FOR_IMMUTABLE_CONNECTION_MYSQL_SETTINGS(M) \ M(mysql_datatypes_support_level) -DECLARE_SETTINGS_TRAITS(ConnectionMySQLSettingsTraits, LIST_OF_CONNECTION_MYSQL_SETTINGS) +#define LIST_OF_MYSQL_DATABASE_SETTINGS(M) \ + LIST_OF_CONNECTION_MYSQL_SETTINGS(M) \ + LIST_OF_MYSQL_SETTINGS(M) + +DECLARE_SETTINGS_TRAITS(ConnectionMySQLSettingsTraits, LIST_OF_MYSQL_DATABASE_SETTINGS) /** Settings for the MySQL database engine. diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index f62e06aff8d..cc6d808a564 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -53,7 +53,8 @@ DatabaseMySQL::DatabaseMySQL( const ASTStorage * database_engine_define_, const String & database_name_in_mysql_, std::unique_ptr settings_, - mysqlxx::PoolWithFailover && pool) + mysqlxx::PoolWithFailover && pool, + bool attach) : IDatabase(database_name_) , WithContext(context_->getGlobalContext()) , metadata_path(metadata_path_) @@ -62,7 +63,19 @@ DatabaseMySQL::DatabaseMySQL( , database_settings(std::move(settings_)) , mysql_pool(std::move(pool)) { - empty(); /// test database is works fine. + try + { + /// Test that the database is working fine; it will also fetch tables. + empty(); + } + catch (...) + { + if (attach) + tryLogCurrentException("DatabaseMySQL"); + else + throw; + } + thread = ThreadFromGlobalPool{&DatabaseMySQL::cleanOutdatedTables, this}; } diff --git a/src/Databases/MySQL/DatabaseMySQL.h b/src/Databases/MySQL/DatabaseMySQL.h index e57ac442db1..1ee090ecd52 100644 --- a/src/Databases/MySQL/DatabaseMySQL.h +++ b/src/Databases/MySQL/DatabaseMySQL.h @@ -45,7 +45,8 @@ public: const ASTStorage * database_engine_define, const String & database_name_in_mysql, std::unique_ptr settings_, - mysqlxx::PoolWithFailover && pool); + mysqlxx::PoolWithFailover && pool, + bool attach); String getEngineName() const override { return "MySQL"; } diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index 723457fba5b..7da25298cf2 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -110,12 +110,12 @@ std::exception_ptr CacheDictionary::getLastException() cons } template -const IDictionarySource * CacheDictionary::getSource() const +DictionarySourcePtr CacheDictionary::getSource() const { /// Mutex required here because of the getSourceAndUpdateIfNeeded() function /// which is used from another thread. std::lock_guard lock(source_mutex); - return source_ptr.get(); + return source_ptr; } template @@ -602,6 +602,7 @@ void CacheDictionary::update(CacheDictionaryUpdateUnitPtr update_queue; diff --git a/src/Dictionaries/CassandraDictionarySource.h b/src/Dictionaries/CassandraDictionarySource.h index 35419d3ea7d..76ad2316366 100644 --- a/src/Dictionaries/CassandraDictionarySource.h +++ b/src/Dictionaries/CassandraDictionarySource.h @@ -61,7 +61,7 @@ public: DictionarySourcePtr clone() const override { - return std::make_unique(dict_struct, configuration, sample_block); + return std::make_shared(dict_struct, configuration, sample_block); } Pipe loadIds(const std::vector & ids) override; diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 1ddcdd96454..6abd5f317e2 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -17,7 +17,6 @@ #include "DictionaryStructure.h" #include "ExternalQueryBuilder.h" #include "readInvalidateQuery.h" -#include "writeParenthesisedString.h" #include "DictionaryFactory.h" #include "DictionarySourceHelpers.h" diff --git a/src/Dictionaries/ClickHouseDictionarySource.h b/src/Dictionaries/ClickHouseDictionarySource.h index be09fa415fd..cdcc0ee824f 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.h +++ b/src/Dictionaries/ClickHouseDictionarySource.h @@ -60,7 +60,7 @@ public: bool hasUpdateField() const override; - DictionarySourcePtr clone() const override { return std::make_unique(*this); } + DictionarySourcePtr clone() const override { return std::make_shared(*this); } std::string toString() const override; diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 26fbb6f193f..b59e29c327e 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -411,7 +412,7 @@ public: if constexpr (key_type == DictionaryKeyType::Simple) { - key_columns[0] = key_columns[0]->convertToFullColumnIfConst(); + key_columns[0] = recursiveRemoveSparse(key_columns[0]->convertToFullColumnIfConst()); const auto * vector_col = checkAndGetColumn>(key_columns[0].get()); if (!vector_col) @@ -574,6 +575,8 @@ void mergeBlockWithPipe( while (executor.pull(block)) { + convertToFullIfSparse(block); + Columns block_key_columns; block_key_columns.reserve(key_columns_size); @@ -633,7 +636,7 @@ static const PaddedPODArray & getColumnVectorData( PaddedPODArray & backup_storage) { bool is_const_column = isColumnConst(*column); - auto full_column = column->convertToFullColumnIfConst(); + auto full_column = recursiveRemoveSparse(column->convertToFullColumnIfConst()); auto vector_col = checkAndGetColumn>(full_column.get()); if (!vector_col) diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index 551f485e5bb..12c624a6859 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -75,6 +75,8 @@ Columns DirectDictionary::getColumns( Block block; while (executor.pull(block)) { + convertToFullIfSparse(block); + /// Split into keys columns and attribute columns for (size_t i = 0; i < dictionary_keys_size; ++i) block_key_columns.emplace_back(block.safeGetByPosition(i).column); diff --git a/src/Dictionaries/DirectDictionary.h b/src/Dictionaries/DirectDictionary.h index edf4c8d1d9a..4bf24e6ae98 100644 --- a/src/Dictionaries/DirectDictionary.h +++ b/src/Dictionaries/DirectDictionary.h @@ -58,7 +58,7 @@ public: return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone()); } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp index c09993c2a84..7a3550e7284 100644 --- a/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/src/Dictionaries/ExecutableDictionarySource.cpp @@ -1,10 +1,16 @@ #include "ExecutableDictionarySource.h" +#include + +#include + #include -#include +#include +#include #include #include +#include #include #include @@ -27,15 +33,46 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } +namespace +{ + + void updateCommandIfNeeded(String & command, bool execute_direct, ContextPtr context) + { + if (!execute_direct) + return; + + auto global_context = context->getGlobalContext(); + auto user_scripts_path = global_context->getUserScriptsPath(); + auto script_path = user_scripts_path + '/' + command; + + if (!fileOrSymlinkPathStartsWith(script_path, user_scripts_path)) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Executable file {} must be inside user scripts folder {}", + command, + user_scripts_path); + + if (!std::filesystem::exists(std::filesystem::path(script_path))) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Executable file {} does not exist inside user scripts folder {}", + command, + user_scripts_path); + + command = std::move(script_path); + } + +} + ExecutableDictionarySource::ExecutableDictionarySource( const DictionaryStructure & dict_struct_, const Configuration & configuration_, Block & sample_block_, + std::shared_ptr coordinator_, ContextPtr context_) : log(&Poco::Logger::get("ExecutableDictionarySource")) , dict_struct(dict_struct_) , configuration(configuration_) - , sample_block{sample_block_} + , sample_block(sample_block_) + , coordinator(std::move(coordinator_)) , context(context_) { /// Remove keys from sample_block for implicit_key dictionary because @@ -58,6 +95,7 @@ ExecutableDictionarySource::ExecutableDictionarySource(const ExecutableDictionar , dict_struct(other.dict_struct) , configuration(other.configuration) , sample_block(other.sample_block) + , coordinator(other.coordinator) , context(Context::createCopy(other.context)) { } @@ -69,11 +107,11 @@ Pipe ExecutableDictionarySource::loadAll() LOG_TRACE(log, "loadAll {}", toString()); - ShellCommand::Config config(configuration.command); - auto process = ShellCommand::execute(config); + const auto & coordinator_configuration = coordinator->getConfiguration(); + auto command = configuration.command; + updateCommandIfNeeded(command, coordinator_configuration.execute_direct, context); - Pipe pipe(std::make_unique(context, configuration.format, sample_block, std::move(process))); - return pipe; + return coordinator->createPipe(command, configuration.command_arguments, sample_block, context); } Pipe ExecutableDictionarySource::loadUpdatedAll() @@ -82,17 +120,32 @@ Pipe ExecutableDictionarySource::loadUpdatedAll() throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "ExecutableDictionarySource with implicit_key does not support loadUpdatedAll method"); time_t new_update_time = time(nullptr); - SCOPE_EXIT(update_time = new_update_time); - std::string command_with_update_field = configuration.command; + const auto & coordinator_configuration = coordinator->getConfiguration(); + auto command = configuration.command; + updateCommandIfNeeded(command, coordinator_configuration.execute_direct, context); + + auto command_arguments = configuration.command_arguments; + if (update_time) - command_with_update_field += " " + configuration.update_field + " " + DB::toString(LocalDateTime(update_time - configuration.update_lag)); + { + auto update_difference = DB::toString(LocalDateTime(update_time - configuration.update_lag)); - LOG_TRACE(log, "loadUpdatedAll {}", command_with_update_field); - ShellCommand::Config config(command_with_update_field); - auto process = ShellCommand::execute(config); - Pipe pipe(std::make_unique(context, configuration.format, sample_block, std::move(process))); - return pipe; + if (coordinator_configuration.execute_direct) + { + command_arguments.emplace_back(configuration.update_field); + command_arguments.emplace_back(std::move(update_difference)); + } + else + { + command += ' ' + configuration.update_field + ' ' + update_difference; + } + } + + update_time = new_update_time; + + LOG_TRACE(log, "loadUpdatedAll {}", command); + return coordinator->createPipe(command, command_arguments, sample_block, context); } Pipe ExecutableDictionarySource::loadIds(const std::vector & ids) @@ -113,27 +166,17 @@ Pipe ExecutableDictionarySource::loadKeys(const Columns & key_columns, const std Pipe ExecutableDictionarySource::getStreamForBlock(const Block & block) { - ShellCommand::Config config(configuration.command); - auto process = ShellCommand::execute(config); - auto * process_in = &process->in; + const auto & coordinator_configuration = coordinator->getConfiguration(); + String command = configuration.command; + updateCommandIfNeeded(command, coordinator_configuration.execute_direct, context); - ShellCommandSource::SendDataTask task = {[process_in, block, this]() - { - auto & out = *process_in; + auto source = std::make_shared(block); + auto shell_input_pipe = Pipe(std::move(source)); - if (configuration.send_chunk_header) - { - writeText(block.rows(), out); - writeChar('\n', out); - } + Pipes shell_input_pipes; + shell_input_pipes.emplace_back(std::move(shell_input_pipe)); - auto output_format = context->getOutputFormat(configuration.format, out, block.cloneEmpty()); - formatBlock(output_format, block); - out.close(); - }}; - std::vector tasks = {std::move(task)}; - - Pipe pipe(std::make_unique(context, configuration.format, sample_block, std::move(process), std::move(tasks))); + auto pipe = coordinator->createPipe(command, configuration.command_arguments, std::move(shell_input_pipes), sample_block, context); if (configuration.implicit_key) pipe.addTransform(std::make_shared(block, pipe.getHeader())); @@ -158,7 +201,7 @@ bool ExecutableDictionarySource::hasUpdateField() const DictionarySourcePtr ExecutableDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } std::string ExecutableDictionarySource::toString() const @@ -189,17 +232,40 @@ void registerDictionarySourceExecutable(DictionarySourceFactory & factory) std::string settings_config_prefix = config_prefix + ".executable"; + bool execute_direct = config.getBool(settings_config_prefix + ".execute_direct", false); + std::string command_value = config.getString(settings_config_prefix + ".command"); + std::vector command_arguments; + + if (execute_direct) + { + boost::split(command_arguments, command_value, [](char c) { return c == ' '; }); + + command_value = std::move(command_arguments[0]); + command_arguments.erase(command_arguments.begin()); + } + ExecutableDictionarySource::Configuration configuration { - .command = config.getString(settings_config_prefix + ".command"), - .format = config.getString(settings_config_prefix + ".format"), + .command = std::move(command_value), + .command_arguments = std::move(command_arguments), .update_field = config.getString(settings_config_prefix + ".update_field", ""), .update_lag = config.getUInt64(settings_config_prefix + ".update_lag", 1), .implicit_key = config.getBool(settings_config_prefix + ".implicit_key", false), - .send_chunk_header = config.getBool(settings_config_prefix + ".send_chunk_header", false) }; - return std::make_unique(dict_struct, configuration, sample_block, context); + ShellCommandSourceCoordinator::Configuration shell_command_coordinator_configration + { + .format = config.getString(settings_config_prefix + ".format"), + .command_termination_timeout_seconds = config.getUInt64(settings_config_prefix + ".command_termination_timeout", 10), + .command_read_timeout_milliseconds = config.getUInt64(settings_config_prefix + ".command_read_timeout", 10000), + .command_write_timeout_milliseconds = config.getUInt64(settings_config_prefix + ".command_write_timeout", 10000), + .is_executable_pool = false, + .send_chunk_header = config.getBool(settings_config_prefix + ".send_chunk_header", false), + .execute_direct = config.getBool(settings_config_prefix + ".execute_direct", false) + }; + + auto coordinator = std::make_shared(shell_command_coordinator_configration); + return std::make_unique(dict_struct, configuration, sample_block, std::move(coordinator), context); }; factory.registerSource("executable", create_table_source); diff --git a/src/Dictionaries/ExecutableDictionarySource.h b/src/Dictionaries/ExecutableDictionarySource.h index a7ffc8bebcb..6c5d2de3714 100644 --- a/src/Dictionaries/ExecutableDictionarySource.h +++ b/src/Dictionaries/ExecutableDictionarySource.h @@ -7,6 +7,7 @@ #include #include +#include namespace DB @@ -20,20 +21,19 @@ public: struct Configuration { std::string command; - std::string format; + std::vector command_arguments; std::string update_field; UInt64 update_lag; /// Implicit key means that the source script will return only values, /// and the correspondence to the requested keys is determined implicitly - by the order of rows in the result. bool implicit_key; - /// Send number_of_rows\n before sending chunk to process - bool send_chunk_header; }; ExecutableDictionarySource( const DictionaryStructure & dict_struct_, const Configuration & configuration_, Block & sample_block_, + std::shared_ptr coordinator_, ContextPtr context_); ExecutableDictionarySource(const ExecutableDictionarySource & other); @@ -69,6 +69,7 @@ private: const DictionaryStructure dict_struct; const Configuration configuration; Block sample_block; + std::shared_ptr coordinator; ContextPtr context; }; diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index dce2ce94b93..48ddeed7fa6 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -1,14 +1,20 @@ #include "ExecutablePoolDictionarySource.h" +#include + +#include + #include -#include +#include +#include #include +#include +#include +#include #include #include -#include -#include #include #include @@ -23,20 +29,19 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int DICTIONARY_ACCESS_DENIED; extern const int UNSUPPORTED_METHOD; - extern const int TIMEOUT_EXCEEDED; } ExecutablePoolDictionarySource::ExecutablePoolDictionarySource( const DictionaryStructure & dict_struct_, const Configuration & configuration_, Block & sample_block_, + std::shared_ptr coordinator_, ContextPtr context_) : dict_struct(dict_struct_) , configuration(configuration_) , sample_block(sample_block_) + , coordinator(std::move(coordinator_)) , context(context_) - /// If pool size == 0 then there is no size restrictions. Poco max size of semaphore is integer type. - , process_pool(std::make_shared(configuration.pool_size == 0 ? std::numeric_limits::max() : configuration.pool_size)) , log(&Poco::Logger::get("ExecutablePoolDictionarySource")) { /// Remove keys from sample_block for implicit_key dictionary because @@ -59,8 +64,8 @@ ExecutablePoolDictionarySource::ExecutablePoolDictionarySource(const ExecutableP : dict_struct(other.dict_struct) , configuration(other.configuration) , sample_block(other.sample_block) + , coordinator(other.coordinator) , context(Context::createCopy(other.context)) - , process_pool(std::make_shared(configuration.pool_size)) , log(&Poco::Logger::get("ExecutablePoolDictionarySource")) { } @@ -93,41 +98,47 @@ Pipe ExecutablePoolDictionarySource::loadKeys(const Columns & key_columns, const Pipe ExecutablePoolDictionarySource::getStreamForBlock(const Block & block) { - std::unique_ptr process; - bool result = process_pool->tryBorrowObject(process, [this]() + String command = configuration.command; + const auto & coordinator_configuration = coordinator->getConfiguration(); + + if (coordinator_configuration.execute_direct) { - ShellCommand::Config config(configuration.command); - config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, configuration.command_termination_timeout }; - auto shell_command = ShellCommand::execute(config); - return shell_command; - }, configuration.max_command_execution_time * 10000); + auto global_context = context->getGlobalContext(); + auto user_scripts_path = global_context->getUserScriptsPath(); + auto script_path = user_scripts_path + '/' + command; - if (!result) - throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, - "Could not get process from pool, max command execution timeout exceeded {} seconds", - configuration.max_command_execution_time); + if (!fileOrSymlinkPathStartsWith(script_path, user_scripts_path)) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Executable file {} must be inside user scripts folder {}", + command, + user_scripts_path); - size_t rows_to_read = block.rows(); - auto * process_in = &process->in; - ShellCommandSource::SendDataTask task = [process_in, block, this]() mutable - { - auto & out = *process_in; + if (!std::filesystem::exists(std::filesystem::path(script_path))) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Executable file {} does not exist inside user scripts folder {}", + command, + user_scripts_path); - if (configuration.send_chunk_header) - { - writeText(block.rows(), out); - writeChar('\n', out); - } + command = std::move(script_path); + } - auto output_format = context->getOutputFormat(configuration.format, out, block.cloneEmpty()); - formatBlock(output_format, block); - }; - std::vector tasks = {std::move(task)}; + auto source = std::make_shared(block); + auto shell_input_pipe = Pipe(std::move(source)); ShellCommandSourceConfiguration command_configuration; command_configuration.read_fixed_number_of_rows = true; - command_configuration.number_of_rows_to_read = rows_to_read; - Pipe pipe(std::make_unique(context, configuration.format, sample_block, std::move(process), std::move(tasks), command_configuration, process_pool)); + command_configuration.number_of_rows_to_read = block.rows(); + + Pipes shell_input_pipes; + shell_input_pipes.emplace_back(std::move(shell_input_pipe)); + + auto pipe = coordinator->createPipe( + command, + configuration.command_arguments, + std::move(shell_input_pipes), + sample_block, + context, + command_configuration); if (configuration.implicit_key) pipe.addTransform(std::make_shared(block, pipe.getHeader())); @@ -152,12 +163,13 @@ bool ExecutablePoolDictionarySource::hasUpdateField() const DictionarySourcePtr ExecutablePoolDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } std::string ExecutablePoolDictionarySource::toString() const { - return "ExecutablePool size: " + std::to_string(configuration.pool_size) + " command: " + configuration.command; + size_t pool_size = coordinator->getConfiguration().pool_size; + return "ExecutablePool size: " + std::to_string(pool_size) + " command: " + configuration.command; } void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory) @@ -189,18 +201,40 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory) if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds) max_command_execution_time = max_execution_time_seconds; + bool execute_direct = config.getBool(settings_config_prefix + ".execute_direct", false); + std::string command_value = config.getString(settings_config_prefix + ".command"); + std::vector command_arguments; + + if (execute_direct) + { + boost::split(command_arguments, command_value, [](char c) { return c == ' '; }); + + command_value = std::move(command_arguments[0]); + command_arguments.erase(command_arguments.begin()); + } + ExecutablePoolDictionarySource::Configuration configuration { - .command = config.getString(settings_config_prefix + ".command"), - .format = config.getString(settings_config_prefix + ".format"), - .pool_size = config.getUInt64(settings_config_prefix + ".size"), - .command_termination_timeout = config.getUInt64(settings_config_prefix + ".command_termination_timeout", 10), - .max_command_execution_time = max_command_execution_time, + .command = std::move(command_value), + .command_arguments = std::move(command_arguments), .implicit_key = config.getBool(settings_config_prefix + ".implicit_key", false), - .send_chunk_header = config.getBool(settings_config_prefix + ".send_chunk_header", false) }; - return std::make_unique(dict_struct, configuration, sample_block, context); + ShellCommandSourceCoordinator::Configuration shell_command_coordinator_configration + { + .format = config.getString(settings_config_prefix + ".format"), + .command_termination_timeout_seconds = config.getUInt64(settings_config_prefix + ".command_termination_timeout", 10), + .command_read_timeout_milliseconds = config.getUInt64(settings_config_prefix + ".command_read_timeout", 10000), + .command_write_timeout_milliseconds = config.getUInt64(settings_config_prefix + ".command_write_timeout", 10000), + .pool_size = config.getUInt64(settings_config_prefix + ".pool_size", 16), + .max_command_execution_time_seconds = max_command_execution_time, + .is_executable_pool = true, + .send_chunk_header = config.getBool(settings_config_prefix + ".send_chunk_header", false), + .execute_direct = execute_direct + }; + + auto coordinator = std::make_shared(shell_command_coordinator_configration); + return std::make_unique(dict_struct, configuration, sample_block, std::move(coordinator), context); }; factory.registerSource("executable_pool", create_table_source); diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.h b/src/Dictionaries/ExecutablePoolDictionarySource.h index 51215b6311b..b9b3b8efb1b 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.h +++ b/src/Dictionaries/ExecutablePoolDictionarySource.h @@ -28,21 +28,15 @@ public: struct Configuration { String command; - String format; - size_t pool_size; - size_t command_termination_timeout; - size_t max_command_execution_time; - /// Implicit key means that the source script will return only values, - /// and the correspondence to the requested keys is determined implicitly - by the order of rows in the result. + std::vector command_arguments; bool implicit_key; - /// Send number_of_rows\n before sending chunk to process - bool send_chunk_header; }; ExecutablePoolDictionarySource( const DictionaryStructure & dict_struct_, const Configuration & configuration_, Block & sample_block_, + std::shared_ptr coordinator_, ContextPtr context_); ExecutablePoolDictionarySource(const ExecutablePoolDictionarySource & other); @@ -77,8 +71,8 @@ private: const Configuration configuration; Block sample_block; + std::shared_ptr coordinator; ContextPtr context; - std::shared_ptr process_pool; Poco::Logger * log; }; diff --git a/src/Dictionaries/ExternalQueryBuilder.cpp b/src/Dictionaries/ExternalQueryBuilder.cpp index f513c7b2f61..1701f08fd67 100644 --- a/src/Dictionaries/ExternalQueryBuilder.cpp +++ b/src/Dictionaries/ExternalQueryBuilder.cpp @@ -1,14 +1,23 @@ #include "ExternalQueryBuilder.h" + +#include + #include #include #include -#include -#include "DictionaryStructure.h" -#include "writeParenthesisedString.h" +#include namespace DB { + +static inline void writeParenthesisedString(const String & s, WriteBuffer & buf) +{ + writeChar('(', buf); + writeString(s, buf); + writeChar(')', buf); +} + namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; diff --git a/src/Dictionaries/FileDictionarySource.h b/src/Dictionaries/FileDictionarySource.h index c8e37986b2f..8fe2d87d8b9 100644 --- a/src/Dictionaries/FileDictionarySource.h +++ b/src/Dictionaries/FileDictionarySource.h @@ -51,7 +51,7 @@ public: ///Not supported for FileDictionarySource bool hasUpdateField() const override { return false; } - DictionarySourcePtr clone() const override { return std::make_unique(*this); } + DictionarySourcePtr clone() const override { return std::make_shared(*this); } std::string toString() const override; diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index b6c5f10564b..de4ae66300a 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -328,6 +328,8 @@ void FlatDictionary::updateData() Block block; while (executor.pull(block)) { + convertToFullIfSparse(block); + /// We are using this to keep saved data if input stream consists of multiple blocks if (!update_field_loaded_block) update_field_loaded_block = std::make_shared(block.cloneEmpty()); diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h index 5c3a1d634d8..308cd72d55b 100644 --- a/src/Dictionaries/FlatDictionary.h +++ b/src/Dictionaries/FlatDictionary.h @@ -61,7 +61,7 @@ public: return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, configuration, update_field_loaded_block); } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index aba6b40f206..308570644d1 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -207,7 +207,7 @@ bool HTTPDictionarySource::hasUpdateField() const DictionarySourcePtr HTTPDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } std::string HTTPDictionarySource::toString() const diff --git a/src/Dictionaries/HTTPDictionarySource.h b/src/Dictionaries/HTTPDictionarySource.h index 35fbabecf2a..ce357814982 100644 --- a/src/Dictionaries/HTTPDictionarySource.h +++ b/src/Dictionaries/HTTPDictionarySource.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include "DictionaryStructure.h" #include "IDictionarySource.h" #include diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h index ca5d7cb1bf6..0d07c43477a 100644 --- a/src/Dictionaries/HashedArrayDictionary.h +++ b/src/Dictionaries/HashedArrayDictionary.h @@ -71,7 +71,7 @@ public: return std::make_shared>(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block); } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryLifetime & getLifetime() const override { return configuration.lifetime; } diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 1df152eec38..c48893bf24f 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -376,6 +376,8 @@ void HashedDictionary::updateData() Block block; while (executor.pull(block)) { + convertToFullIfSparse(block); + /// We are using this to keep saved data if input stream consists of multiple blocks if (!update_field_loaded_block) update_field_loaded_block = std::make_shared(block.cloneEmpty()); @@ -589,7 +591,9 @@ void HashedDictionary::loadData() } } else + { updateData(); + } if (configuration.require_nonempty && 0 == element_count) throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY, diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index 16be4e4c73e..6f63c5ec546 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -78,7 +78,7 @@ public: return std::make_shared>(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block); } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryLifetime & getLifetime() const override { return configuration.lifetime; } diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h index 66e35c8fa12..b1923306003 100644 --- a/src/Dictionaries/IDictionary.h +++ b/src/Dictionaries/IDictionary.h @@ -1,16 +1,16 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include - #include #include +#include +#include +#include +#include +#include +#include +#include + namespace DB { @@ -19,7 +19,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -struct IDictionary; +class IDictionary; using DictionaryPtr = std::unique_ptr; /** DictionaryKeyType provides IDictionary client information about @@ -47,8 +47,9 @@ enum class DictionarySpecialKeyType /** * Base class for Dictionaries implementation. */ -struct IDictionary : public IExternalLoadable +class IDictionary : public IExternalLoadable { +public: explicit IDictionary(const StorageID & dictionary_id_) : dictionary_id(dictionary_id_) , full_name(dictionary_id.getInternalDictionaryName()) @@ -99,7 +100,7 @@ struct IDictionary : public IExternalLoadable virtual double getLoadFactor() const = 0; - virtual const IDictionarySource * getSource() const = 0; + virtual DictionarySourcePtr getSource() const = 0; virtual const DictionaryStructure & getStructure() const = 0; @@ -200,7 +201,7 @@ struct IDictionary : public IExternalLoadable bool isModified() const override { - const auto * source = getSource(); + const auto source = getSource(); return source && source->isModified(); } diff --git a/src/Dictionaries/IDictionarySource.h b/src/Dictionaries/IDictionarySource.h index 5071b69d2bf..128595b815f 100644 --- a/src/Dictionaries/IDictionarySource.h +++ b/src/Dictionaries/IDictionarySource.h @@ -10,8 +10,7 @@ namespace DB { class IDictionarySource; -using DictionarySourcePtr = std::unique_ptr; -using SharedDictionarySourcePtr = std::shared_ptr; +using DictionarySourcePtr = std::shared_ptr; /** Data-provider interface for external dictionaries, * abstracts out the data source (file, MySQL, ClickHouse, external program, network request et cetera) diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h index ed0d8692d21..9f604b5aeb8 100644 --- a/src/Dictionaries/IPAddressDictionary.h +++ b/src/Dictionaries/IPAddressDictionary.h @@ -56,7 +56,7 @@ public: return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty); } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } diff --git a/src/Dictionaries/LibraryDictionarySource.cpp b/src/Dictionaries/LibraryDictionarySource.cpp index 42683fb884c..b79ee9be59a 100644 --- a/src/Dictionaries/LibraryDictionarySource.cpp +++ b/src/Dictionaries/LibraryDictionarySource.cpp @@ -129,7 +129,7 @@ Pipe LibraryDictionarySource::loadKeys(const Columns & key_columns, const std::v DictionarySourcePtr LibraryDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } diff --git a/src/Dictionaries/LibraryDictionarySource.h b/src/Dictionaries/LibraryDictionarySource.h index e1cb01c0a14..9d7590c20ad 100644 --- a/src/Dictionaries/LibraryDictionarySource.h +++ b/src/Dictionaries/LibraryDictionarySource.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include "DictionaryStructure.h" #include diff --git a/src/Dictionaries/MongoDBDictionarySource.h b/src/Dictionaries/MongoDBDictionarySource.h index 3625deca9c6..85531f89902 100644 --- a/src/Dictionaries/MongoDBDictionarySource.h +++ b/src/Dictionaries/MongoDBDictionarySource.h @@ -65,7 +65,7 @@ public: ///Not yet supported bool hasUpdateField() const override { return false; } - DictionarySourcePtr clone() const override { return std::make_unique(*this); } + DictionarySourcePtr clone() const override { return std::make_shared(*this); } std::string toString() const override; diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index f6de6ca0cc1..0bf5cc3cae0 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -14,6 +14,8 @@ #include #include #include +#include +#include namespace DB @@ -46,13 +48,17 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) auto settings_config_prefix = config_prefix + ".mysql"; std::shared_ptr pool; - ExternalDataSourceConfiguration configuration; + StorageMySQLConfiguration configuration; auto named_collection = created_from_ddl ? getExternalDataSourceConfiguration(config, settings_config_prefix, global_context) : std::nullopt; if (named_collection) { - configuration = *named_collection; - std::vector> addresses{std::make_pair(configuration.host, configuration.port)}; - pool = std::make_shared(configuration.database, addresses, configuration.username, configuration.password); + configuration.set(*named_collection); + configuration.addresses = {std::make_pair(configuration.host, configuration.port)}; + MySQLSettings mysql_settings; + const auto & settings = global_context->getSettingsRef(); + mysql_settings.connect_timeout = settings.external_storage_connect_timeout_sec; + mysql_settings.read_write_timeout = settings.external_storage_rw_timeout_sec; + pool = std::make_shared(createMySQLPoolWithFailover(configuration, mysql_settings)); } else { @@ -95,7 +101,7 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) # include # include # include -# include +# include # include # include "readInvalidateQuery.h" # include @@ -225,7 +231,7 @@ bool MySQLDictionarySource::hasUpdateField() const DictionarySourcePtr MySQLDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } std::string MySQLDictionarySource::toString() const diff --git a/src/Dictionaries/MySQLDictionarySource.h b/src/Dictionaries/MySQLDictionarySource.h index 37743ca2f62..90506ad1726 100644 --- a/src/Dictionaries/MySQLDictionarySource.h +++ b/src/Dictionaries/MySQLDictionarySource.h @@ -5,7 +5,7 @@ #include "config_core.h" #if USE_MYSQL -# include +# include # include # include "DictionaryStructure.h" # include "ExternalQueryBuilder.h" diff --git a/src/Dictionaries/PolygonDictionary.h b/src/Dictionaries/PolygonDictionary.h index 346160c342f..762c136b8e0 100644 --- a/src/Dictionaries/PolygonDictionary.h +++ b/src/Dictionaries/PolygonDictionary.h @@ -87,7 +87,7 @@ public: double getLoadFactor() const override { return 1.0; } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryStructure & getStructure() const override { return dict_struct; } diff --git a/src/Dictionaries/PolygonDictionaryUtils.cpp b/src/Dictionaries/PolygonDictionaryUtils.cpp index fced18a6f88..15267481c0b 100644 --- a/src/Dictionaries/PolygonDictionaryUtils.cpp +++ b/src/Dictionaries/PolygonDictionaryUtils.cpp @@ -151,7 +151,7 @@ void SlabsPolygonIndex::indexBuild(const std::vector & polygons) } } - for (size_t i = 0; i != all_edges.size(); i++) + for (size_t i = 0; i != all_edges.size(); ++i) { size_t l = edge_left[i]; size_t r = edge_right[i]; diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index c9fb8b86b77..0ac84b35048 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -161,7 +161,7 @@ bool PostgreSQLDictionarySource::supportsSelectiveLoad() const DictionarySourcePtr PostgreSQLDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } diff --git a/src/Dictionaries/PostgreSQLDictionarySource.h b/src/Dictionaries/PostgreSQLDictionarySource.h index 1cde2958107..87a87eac363 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.h +++ b/src/Dictionaries/PostgreSQLDictionarySource.h @@ -7,7 +7,7 @@ #if USE_LIBPQXX #include "ExternalQueryBuilder.h" #include -#include +#include #include #include diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 1605e2bab81..fca72d5d7cc 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -67,7 +67,7 @@ public: return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, update_field_loaded_block); } - const IDictionarySource * getSource() const override { return source_ptr.get(); } + DictionarySourcePtr getSource() const override { return source_ptr; } const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } diff --git a/src/Dictionaries/RedisDictionarySource.h b/src/Dictionaries/RedisDictionarySource.h index 053094e2303..eff97dede0c 100644 --- a/src/Dictionaries/RedisDictionarySource.h +++ b/src/Dictionaries/RedisDictionarySource.h @@ -76,7 +76,7 @@ namespace ErrorCodes bool hasUpdateField() const override { return false; } - DictionarySourcePtr clone() const override { return std::make_unique(*this); } + DictionarySourcePtr clone() const override { return std::make_shared(*this); } std::string toString() const override; diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index f827c0cd8d0..ab7cf65eb8b 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" @@ -162,7 +162,7 @@ bool XDBCDictionarySource::hasUpdateField() const DictionarySourcePtr XDBCDictionarySource::clone() const { - return std::make_unique(*this); + return std::make_shared(*this); } diff --git a/src/Dictionaries/writeParenthesisedString.cpp b/src/Dictionaries/writeParenthesisedString.cpp deleted file mode 100644 index 5e237aa1e6c..00000000000 --- a/src/Dictionaries/writeParenthesisedString.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "writeParenthesisedString.h" - -namespace DB -{ -void writeParenthesisedString(const String & s, WriteBuffer & buf) -{ - writeChar('(', buf); - writeString(s, buf); - writeChar(')', buf); -} - -} diff --git a/src/Dictionaries/writeParenthesisedString.h b/src/Dictionaries/writeParenthesisedString.h deleted file mode 100644 index ec61e944d38..00000000000 --- a/src/Dictionaries/writeParenthesisedString.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ -void writeParenthesisedString(const String & s, WriteBuffer & buf); - - -} diff --git a/src/Disks/BlobStorage/BlobStorageAuth.cpp b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp similarity index 80% rename from src/Disks/BlobStorage/BlobStorageAuth.cpp rename to src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp index 2378ffe12a7..94553ba04e9 100644 --- a/src/Disks/BlobStorage/BlobStorageAuth.cpp +++ b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -1,4 +1,4 @@ -#include +#include #if USE_AZURE_BLOB_STORAGE @@ -17,7 +17,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -struct BlobStorageEndpoint +struct AzureBlobStorageEndpoint { const String storage_account_url; const String container_name; @@ -41,18 +41,18 @@ void validateContainerName(const String & container_name) auto len = container_name.length(); if (len < 3 || len > 64) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Blob Storage container name is not valid, should have length between 3 and 64, but has length: {}", len); + "AzureBlob Storage container name is not valid, should have length between 3 and 64, but has length: {}", len); const auto * container_name_pattern_str = R"([a-z][a-z0-9-]+)"; static const RE2 container_name_pattern(container_name_pattern_str); if (!re2::RE2::FullMatch(container_name, container_name_pattern)) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Blob Storage container name is not valid, should follow the format: {}, got: {}", container_name_pattern_str, container_name); + "AzureBlob Storage container name is not valid, should follow the format: {}, got: {}", container_name_pattern_str, container_name); } -BlobStorageEndpoint processBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +AzureBlobStorageEndpoint processAzureBlobStorageEndpoint(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { String storage_account_url = config.getString(config_prefix + ".storage_account_url"); validateStorageAccountUrl(storage_account_url); @@ -86,7 +86,7 @@ std::shared_ptr getClientWithConnectionString( template -std::shared_ptr getBlobStorageClientWithAuth( +std::shared_ptr getAzureBlobStorageClientWithAuth( const String & url, const String & container_name, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { if (config.has(config_prefix + ".connection_string")) @@ -109,19 +109,19 @@ std::shared_ptr getBlobStorageClientWithAuth( } -std::shared_ptr getBlobContainerClient( +std::shared_ptr getAzureBlobContainerClient( const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { - auto endpoint = processBlobStorageEndpoint(config, config_prefix); + auto endpoint = processAzureBlobStorageEndpoint(config, config_prefix); auto container_name = endpoint.container_name; auto final_url = endpoint.storage_account_url + (endpoint.storage_account_url.back() == '/' ? "" : "/") + container_name; if (endpoint.container_already_exists.value_or(false)) - return getBlobStorageClientWithAuth(final_url, container_name, config, config_prefix); + return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix); - auto blob_service_client = getBlobStorageClientWithAuth(endpoint.storage_account_url, container_name, config, config_prefix); + auto blob_service_client = getAzureBlobStorageClientWithAuth(endpoint.storage_account_url, container_name, config, config_prefix); if (!endpoint.container_already_exists.has_value()) { @@ -132,7 +132,7 @@ std::shared_ptr getBlobContainerClient( for (const auto & blob_container : blob_containers) { if (blob_container.Name == endpoint.container_name) - return getBlobStorageClientWithAuth(final_url, container_name, config, config_prefix); + return getAzureBlobStorageClientWithAuth(final_url, container_name, config, config_prefix); } } diff --git a/src/Disks/BlobStorage/BlobStorageAuth.h b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h similarity index 76% rename from src/Disks/BlobStorage/BlobStorageAuth.h rename to src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h index 16a088fc960..1cef6105d41 100644 --- a/src/Disks/BlobStorage/BlobStorageAuth.h +++ b/src/Disks/AzureBlobStorage/AzureBlobStorageAuth.h @@ -12,7 +12,7 @@ namespace DB { -std::shared_ptr getBlobContainerClient( +std::shared_ptr getAzureBlobContainerClient( const Poco::Util::AbstractConfiguration & config, const String & config_prefix); } diff --git a/src/Disks/BlobStorage/DiskBlobStorage.cpp b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp similarity index 68% rename from src/Disks/BlobStorage/DiskBlobStorage.cpp rename to src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp index f33e698a50d..e2ee6ee0153 100644 --- a/src/Disks/BlobStorage/DiskBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp @@ -1,4 +1,4 @@ -#include +#include #if USE_AZURE_BLOB_STORAGE @@ -15,11 +15,11 @@ namespace DB namespace ErrorCodes { - extern const int BLOB_STORAGE_ERROR; + extern const int AZURE_BLOB_STORAGE_ERROR; } -DiskBlobStorageSettings::DiskBlobStorageSettings( +DiskAzureBlobStorageSettings::DiskAzureBlobStorageSettings( UInt64 max_single_part_upload_size_, UInt64 min_bytes_for_seek_, int max_single_read_retries_, @@ -32,11 +32,11 @@ DiskBlobStorageSettings::DiskBlobStorageSettings( thread_pool_size(thread_pool_size_) {} -class BlobStoragePathKeeper : public RemoteFSPathKeeper +class AzureBlobStoragePathKeeper : public RemoteFSPathKeeper { public: /// RemoteFSPathKeeper constructed with a placeholder argument for chunk_limit, it is unused in this class - BlobStoragePathKeeper() : RemoteFSPathKeeper(1000) {} + AzureBlobStoragePathKeeper() : RemoteFSPathKeeper(1000) {} void addPath(const String & path) override { @@ -47,19 +47,19 @@ public: }; -DiskBlobStorage::DiskBlobStorage( +DiskAzureBlobStorage::DiskAzureBlobStorage( const String & name_, DiskPtr metadata_disk_, std::shared_ptr blob_container_client_, SettingsPtr settings_, GetDiskSettings settings_getter_) : - IDiskRemote(name_, "", metadata_disk_, "DiskBlobStorage", settings_->thread_pool_size), + IDiskRemote(name_, "", metadata_disk_, "DiskAzureBlobStorage", settings_->thread_pool_size), blob_container_client(blob_container_client_), current_settings(std::move(settings_)), settings_getter(settings_getter_) {} -std::unique_ptr DiskBlobStorage::readFile( +std::unique_ptr DiskAzureBlobStorage::readFile( const String & path, const ReadSettings & read_settings, std::optional /*estimated_size*/) const @@ -67,11 +67,11 @@ std::unique_ptr DiskBlobStorage::readFile( auto settings = current_settings.get(); auto metadata = readMeta(path); - LOG_TRACE(log, "Read from file by path: {}", backQuote(metadata_disk->getPath() + path)); + LOG_TEST(log, "Read from file by path: {}", backQuote(metadata_disk->getPath() + path)); bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; - auto reader_impl = std::make_unique( + auto reader_impl = std::make_unique( path, blob_container_client, metadata, settings->max_single_read_retries, settings->max_single_download_retries, read_settings, threadpool_read); @@ -88,7 +88,7 @@ std::unique_ptr DiskBlobStorage::readFile( } -std::unique_ptr DiskBlobStorage::writeFile( +std::unique_ptr DiskAzureBlobStorage::writeFile( const String & path, size_t buf_size, WriteMode mode) @@ -96,38 +96,38 @@ std::unique_ptr DiskBlobStorage::writeFile( auto metadata = readOrCreateMetaForWriting(path, mode); auto blob_path = path + "_" + getRandomASCIIString(8); /// NOTE: path contains the tmp_* prefix in the blob name - LOG_TRACE(log, "{} to file by path: {}. Blob Storage path: {}", + LOG_TRACE(log, "{} to file by path: {}. AzureBlob Storage path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), blob_path); - auto buffer = std::make_unique( + auto buffer = std::make_unique( blob_container_client, blob_path, current_settings.get()->max_single_part_upload_size, buf_size); - return std::make_unique>(std::move(buffer), std::move(metadata), blob_path); + return std::make_unique>(std::move(buffer), std::move(metadata), blob_path); } -DiskType DiskBlobStorage::getType() const +DiskType DiskAzureBlobStorage::getType() const { - return DiskType::BlobStorage; + return DiskType::AzureBlobStorage; } -bool DiskBlobStorage::isRemote() const +bool DiskAzureBlobStorage::isRemote() const { return true; } -bool DiskBlobStorage::supportZeroCopyReplication() const +bool DiskAzureBlobStorage::supportZeroCopyReplication() const { return true; } -bool DiskBlobStorage::checkUniqueId(const String & id) const +bool DiskAzureBlobStorage::checkUniqueId(const String & id) const { Azure::Storage::Blobs::ListBlobsOptions blobs_list_options; blobs_list_options.Prefix = id; @@ -146,9 +146,9 @@ bool DiskBlobStorage::checkUniqueId(const String & id) const } -void DiskBlobStorage::removeFromRemoteFS(RemoteFSPathKeeperPtr fs_paths_keeper) +void DiskAzureBlobStorage::removeFromRemoteFS(RemoteFSPathKeeperPtr fs_paths_keeper) { - auto * paths_keeper = dynamic_cast(fs_paths_keeper.get()); + auto * paths_keeper = dynamic_cast(fs_paths_keeper.get()); if (paths_keeper) { @@ -158,25 +158,25 @@ void DiskBlobStorage::removeFromRemoteFS(RemoteFSPathKeeperPtr fs_paths_keeper) { auto delete_info = blob_container_client->DeleteBlob(path); if (!delete_info.Value.Deleted) - throw Exception(ErrorCodes::BLOB_STORAGE_ERROR, "Failed to delete file in Blob Storage: {}", path); + throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file in AzureBlob Storage: {}", path); } - catch (const Azure::Storage::StorageException& e) + catch (const Azure::Storage::StorageException & e) { LOG_INFO(log, "Caught an error while deleting file {} : {}", path, e.Message); - throw e; + throw; } } } } -RemoteFSPathKeeperPtr DiskBlobStorage::createFSPathKeeper() const +RemoteFSPathKeeperPtr DiskAzureBlobStorage::createFSPathKeeper() const { - return std::make_shared(); + return std::make_shared(); } -void DiskBlobStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &) +void DiskAzureBlobStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &) { auto new_settings = settings_getter(config, "storage_configuration.disks." + name, context); diff --git a/src/Disks/BlobStorage/DiskBlobStorage.h b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h similarity index 84% rename from src/Disks/BlobStorage/DiskBlobStorage.h rename to src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h index 16ba216df96..f90ede1add9 100644 --- a/src/Disks/BlobStorage/DiskBlobStorage.h +++ b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h @@ -1,14 +1,12 @@ #pragma once -#if !defined(ARCADIA_BUILD) #include -#endif #if USE_AZURE_BLOB_STORAGE #include -#include -#include +#include +#include #include #include @@ -18,9 +16,9 @@ namespace DB { -struct DiskBlobStorageSettings final +struct DiskAzureBlobStorageSettings final { - DiskBlobStorageSettings( + DiskAzureBlobStorageSettings( UInt64 max_single_part_upload_size_, UInt64 min_bytes_for_seek_, int max_single_read_retries, @@ -35,14 +33,14 @@ struct DiskBlobStorageSettings final }; -class DiskBlobStorage final : public IDiskRemote +class DiskAzureBlobStorage final : public IDiskRemote { public: - using SettingsPtr = std::unique_ptr; + using SettingsPtr = std::unique_ptr; using GetDiskSettings = std::function; - DiskBlobStorage( + DiskAzureBlobStorage( const String & name_, DiskPtr metadata_disk_, std::shared_ptr blob_container_client_, @@ -78,7 +76,7 @@ private: /// client used to access the files in the Blob Storage cloud std::shared_ptr blob_container_client; - MultiVersion current_settings; + MultiVersion current_settings; /// Gets disk settings from context. GetDiskSettings settings_getter; }; diff --git a/src/Disks/BlobStorage/registerDiskBlobStorage.cpp b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp similarity index 69% rename from src/Disks/BlobStorage/registerDiskBlobStorage.cpp rename to src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index 58b84a3f972..243452353d3 100644 --- a/src/Disks/BlobStorage/registerDiskBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -9,8 +9,8 @@ #include #include #include -#include -#include +#include +#include namespace DB @@ -62,9 +62,9 @@ void checkRemoveAccess(IDisk & disk) } -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr /*context*/) +std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr /*context*/) { - return std::make_unique( + return std::make_unique( config.getUInt64(config_prefix + ".max_single_part_upload_size", 100 * 1024 * 1024), config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".max_single_read_retries", 3), @@ -74,7 +74,7 @@ std::unique_ptr getSettings(const Poco::Util::AbstractC } -void registerDiskBlobStorage(DiskFactory & factory) +void registerDiskAzureBlobStorage(DiskFactory & factory) { auto creator = []( const String & name, @@ -85,33 +85,33 @@ void registerDiskBlobStorage(DiskFactory & factory) { auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); - std::shared_ptr blob_storage_disk = std::make_shared( + std::shared_ptr azure_blob_storage_disk = std::make_shared( name, metadata_disk, - getBlobContainerClient(config, config_prefix), + getAzureBlobContainerClient(config, config_prefix), getSettings(config, config_prefix, context), getSettings ); if (!config.getBool(config_prefix + ".skip_access_check", false)) { - checkWriteAccess(*blob_storage_disk); - checkReadAccess(*blob_storage_disk); - checkReadWithOffset(*blob_storage_disk); - checkRemoveAccess(*blob_storage_disk); + checkWriteAccess(*azure_blob_storage_disk); + checkReadAccess(*azure_blob_storage_disk); + checkReadWithOffset(*azure_blob_storage_disk); + checkRemoveAccess(*azure_blob_storage_disk); } - blob_storage_disk->startup(); + azure_blob_storage_disk->startup(); if (config.getBool(config_prefix + ".cache_enabled", true)) { String cache_path = config.getString(config_prefix + ".cache_path", context->getPath() + "disks/" + name + "/cache/"); - blob_storage_disk = wrapWithCache(blob_storage_disk, "blob-storage-cache", cache_path, metadata_path); + azure_blob_storage_disk = wrapWithCache(azure_blob_storage_disk, "azure-blob-storage-cache", cache_path, metadata_path); } - return std::make_shared(blob_storage_disk); + return std::make_shared(azure_blob_storage_disk); }; - factory.registerDiskType("blob_storage", creator); + factory.registerDiskType("azure_blob_storage", creator); } } @@ -121,7 +121,7 @@ void registerDiskBlobStorage(DiskFactory & factory) namespace DB { -void registerDiskBlobStorage(DiskFactory &) {} +void registerDiskAzureBlobStorage(DiskFactory &) {} } diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index e1e901f0d45..b09487c17bc 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -91,7 +91,7 @@ DiskCacheWrapper::readFile( if (!cache_file_predicate(path)) return DiskDecorator::readFile(path, settings, size); - LOG_DEBUG(log, "Read file {} from cache", backQuote(path)); + LOG_TEST(log, "Read file {} from cache", backQuote(path)); if (cache_disk->exists(path)) return cache_disk->readFile(path, settings, size); @@ -105,11 +105,11 @@ DiskCacheWrapper::readFile( { /// This thread will responsible for file downloading to cache. metadata->status = DOWNLOADING; - LOG_DEBUG(log, "File {} doesn't exist in cache. Will download it", backQuote(path)); + LOG_TEST(log, "File {} doesn't exist in cache. Will download it", backQuote(path)); } else if (metadata->status == DOWNLOADING) { - LOG_DEBUG(log, "Waiting for file {} download to cache", backQuote(path)); + LOG_TEST(log, "Waiting for file {} download to cache", backQuote(path)); metadata->condition.wait(lock, [metadata] { return metadata->status == DOWNLOADED || metadata->status == ERROR; }); } } @@ -134,7 +134,7 @@ DiskCacheWrapper::readFile( } cache_disk->moveFile(tmp_path, path); - LOG_DEBUG(log, "File {} downloaded to cache", backQuote(path)); + LOG_TEST(log, "File {} downloaded to cache", backQuote(path)); } catch (...) { @@ -163,7 +163,7 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode if (!cache_file_predicate(path)) return DiskDecorator::writeFile(path, buf_size, mode); - LOG_DEBUG(log, "Write file {} to cache", backQuote(path)); + LOG_TRACE(log, "Write file {} to cache", backQuote(path)); auto dir_path = directoryPath(path); if (!cache_disk->exists(dir_path)) diff --git a/src/Disks/DiskMemory.cpp b/src/Disks/DiskMemory.cpp index ea8bf719de6..834ed3e0c65 100644 --- a/src/Disks/DiskMemory.cpp +++ b/src/Disks/DiskMemory.cpp @@ -253,7 +253,7 @@ void DiskMemory::clearDirectory(const String & path) throw Exception( "Failed to clear directory '" + path + "'. " + iter->first + " is a directory", ErrorCodes::CANNOT_DELETE_DIRECTORY); - files.erase(iter++); + iter = files.erase(iter); } } diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h index a1dd276c51f..435f427b05a 100644 --- a/src/Disks/DiskType.h +++ b/src/Disks/DiskType.h @@ -10,10 +10,10 @@ enum class DiskType Local, RAM, S3, - BlobStorage, HDFS, Encrypted, WebServer, + AzureBlobStorage, }; inline String toString(DiskType disk_type) @@ -26,14 +26,14 @@ inline String toString(DiskType disk_type) return "memory"; case DiskType::S3: return "s3"; - case DiskType::BlobStorage: - return "blob_storage"; case DiskType::HDFS: return "hdfs"; case DiskType::Encrypted: return "encrypted"; case DiskType::WebServer: return "web"; + case DiskType::AzureBlobStorage: + return "azure_blob_storage"; } __builtin_unreachable(); } diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index 4d4a438f93b..41c407c10ee 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -75,7 +75,7 @@ std::unique_ptr DiskHDFS::readFile(const String & path, { auto metadata = readMeta(path); - LOG_TRACE(log, + LOG_TEST(log, "Read from file by path: {}. Existing HDFS objects: {}", backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size()); diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index e920e6fd5b9..848726f957d 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -177,7 +177,7 @@ IDiskRemote::Metadata IDiskRemote::createMeta(const String & path) const void IDiskRemote::removeMeta(const String & path, RemoteFSPathKeeperPtr fs_paths_keeper) { - LOG_DEBUG(log, "Remove file by path: {}", backQuote(metadata_disk->getPath() + path)); + LOG_TRACE(log, "Remove file by path: {}", backQuote(metadata_disk->getPath() + path)); if (!metadata_disk->isFile(path)) throw Exception(ErrorCodes::CANNOT_DELETE_DIRECTORY, "Path '{}' is a directory", path); @@ -464,7 +464,7 @@ bool IDiskRemote::tryReserve(UInt64 bytes) std::lock_guard lock(reservation_mutex); if (bytes == 0) { - LOG_DEBUG(log, "Reserving 0 bytes on remote_fs disk {}", backQuote(name)); + LOG_TRACE(log, "Reserving 0 bytes on remote_fs disk {}", backQuote(name)); ++reservation_count; return true; } @@ -473,7 +473,7 @@ bool IDiskRemote::tryReserve(UInt64 bytes) UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); if (unreserved_space >= bytes) { - LOG_DEBUG(log, "Reserving {} on disk {}, having unreserved {}.", + LOG_TRACE(log, "Reserving {} on disk {}, having unreserved {}.", ReadableSize(bytes), backQuote(name), ReadableSize(unreserved_space)); ++reservation_count; reserved_bytes += bytes; diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp index 23fd353a5f0..c8484e6088d 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp @@ -21,6 +21,8 @@ namespace ProfileEvents extern const Event RemoteFSUnusedPrefetches; extern const Event RemoteFSPrefetchedReads; extern const Event RemoteFSUnprefetchedReads; + extern const Event RemoteFSLazySeeks; + extern const Event RemoteFSSeeksWithReset; extern const Event RemoteFSBuffers; } @@ -152,11 +154,16 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() CurrentMetrics::Increment metric_increment{CurrentMetrics::AsynchronousReadWait}; Stopwatch watch; { - size = prefetch_future.get(); + auto result = prefetch_future.get(); + size = result.size; + auto offset = result.offset; + assert(offset < size); + if (size) { memory.swap(prefetch_buffer); - set(memory.data(), memory.size()); + size -= offset; + set(memory.data() + offset, size); working_buffer.resize(size); file_offset_of_buffer_end += size; } @@ -168,16 +175,23 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() else { ProfileEvents::increment(ProfileEvents::RemoteFSUnprefetchedReads); - size = readInto(memory.data(), memory.size()).get(); + auto result = readInto(memory.data(), memory.size()).get(); + size = result.size; + auto offset = result.offset; + assert(offset < size); if (size) { - set(memory.data(), memory.size()); + size -= offset; + set(memory.data() + offset, size); working_buffer.resize(size); file_offset_of_buffer_end += size; } } + if (file_offset_of_buffer_end != impl->offset()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected equality {} == {}. It's a bug", file_offset_of_buffer_end, impl->offset()); + prefetch_future = {}; return size; } @@ -231,18 +245,22 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence pos = working_buffer.end(); - /// Note: we read in range [file_offset_of_buffer_end, read_until_position). - if (read_until_position && file_offset_of_buffer_end < *read_until_position - && static_cast(file_offset_of_buffer_end) >= getPosition() - && static_cast(file_offset_of_buffer_end) < getPosition() + static_cast(min_bytes_for_seek)) + /** + * Lazy ignore. Save number of bytes to ignore and ignore it either for prefetch buffer or current buffer. + * Note: we read in range [file_offset_of_buffer_end, read_until_position). + */ + off_t file_offset_before_seek = impl->offset(); + if (impl->initialized() + && read_until_position && file_offset_of_buffer_end < *read_until_position + && static_cast(file_offset_of_buffer_end) > file_offset_before_seek + && static_cast(file_offset_of_buffer_end) < file_offset_before_seek + static_cast(min_bytes_for_seek)) { - /** - * Lazy ignore. Save number of bytes to ignore and ignore it either for prefetch buffer or current buffer. - */ - bytes_to_ignore = file_offset_of_buffer_end - getPosition(); + ProfileEvents::increment(ProfileEvents::RemoteFSLazySeeks); + bytes_to_ignore = file_offset_of_buffer_end - file_offset_before_seek; } else { + ProfileEvents::increment(ProfileEvents::RemoteFSSeeksWithReset); impl->reset(); } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 534258eaca6..4db0c9e3c71 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -9,7 +9,7 @@ #endif #if USE_AZURE_BLOB_STORAGE -#include +#include #endif #if USE_HDFS @@ -35,9 +35,9 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S #if USE_AZURE_BLOB_STORAGE -SeekableReadBufferPtr ReadBufferFromBlobStorageGather::createImplementationBuffer(const String & path, size_t read_until_position_) const +SeekableReadBufferPtr ReadBufferFromAzureBlobStorageGather::createImplementationBuffer(const String & path, size_t read_until_position_) const { - return std::make_unique(blob_container_client, path, max_single_read_retries, + return std::make_unique(blob_container_client, path, max_single_read_retries, max_single_download_retries, settings.remote_fs_buffer_size, threadpool_read, read_until_position_); } #endif @@ -65,7 +65,7 @@ ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(const RemoteMetadata } -size_t ReadBufferFromRemoteFSGather::readInto(char * data, size_t size, size_t offset, size_t ignore) +ReadBufferFromRemoteFSGather::ReadResult ReadBufferFromRemoteFSGather::readInto(char * data, size_t size, size_t offset, size_t ignore) { /** * Set `data` to current working and internal buffers. @@ -73,23 +73,24 @@ size_t ReadBufferFromRemoteFSGather::readInto(char * data, size_t size, size_t o */ set(data, size); - absolute_position = offset; + file_offset_of_buffer_end = offset; bytes_to_ignore = ignore; + if (bytes_to_ignore) + assert(initialized()); auto result = nextImpl(); - bytes_to_ignore = 0; if (result) - return working_buffer.size(); + return {working_buffer.size(), BufferBase::offset()}; - return 0; + return {0, 0}; } void ReadBufferFromRemoteFSGather::initialize() { /// One clickhouse file can be split into multiple files in remote fs. - auto current_buf_offset = absolute_position; + auto current_buf_offset = file_offset_of_buffer_end; for (size_t i = 0; i < metadata.remote_fs_objects.size(); ++i) { const auto & [file_path, size] = metadata.remote_fs_objects[i]; @@ -144,7 +145,6 @@ bool ReadBufferFromRemoteFSGather::nextImpl() return readImpl(); } - bool ReadBufferFromRemoteFSGather::readImpl() { swap(*current_buf); @@ -155,15 +155,26 @@ bool ReadBufferFromRemoteFSGather::readImpl() * we save how many bytes need to be ignored (new_offset - position() bytes). */ if (bytes_to_ignore) + { current_buf->ignore(bytes_to_ignore); + bytes_to_ignore = 0; + } - auto result = current_buf->next(); + bool result = current_buf->hasPendingData(); + if (result) + { + /// bytes_to_ignore already added. + file_offset_of_buffer_end += current_buf->available(); + } + else + { + result = current_buf->next(); + if (result) + file_offset_of_buffer_end += current_buf->buffer().size(); + } swap(*current_buf); - if (result) - absolute_position += working_buffer.size(); - return result; } @@ -180,7 +191,6 @@ void ReadBufferFromRemoteFSGather::reset() current_buf.reset(); } - String ReadBufferFromRemoteFSGather::getFileName() const { return canonical_path; diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 045ab43850d..ddd651f47a1 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -37,10 +37,20 @@ public: void setReadUntilPosition(size_t position) override; - size_t readInto(char * data, size_t size, size_t offset, size_t ignore = 0); + struct ReadResult + { + size_t size = 0; + size_t offset = 0; + }; + + ReadResult readInto(char * data, size_t size, size_t offset, size_t ignore = 0); size_t getFileSize() const; + size_t offset() const { return file_offset_of_buffer_end; } + + bool initialized() const { return current_buf != nullptr; } + protected: virtual SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const = 0; @@ -57,8 +67,13 @@ private: size_t current_buf_idx = 0; - size_t absolute_position = 0; + size_t file_offset_of_buffer_end = 0; + /** + * File: |___________________| + * Buffer: |~~~~~~~| + * file_offset_of_buffer_end: ^ + */ size_t bytes_to_ignore = 0; size_t read_until_position = 0; @@ -102,11 +117,11 @@ private: #if USE_AZURE_BLOB_STORAGE -/// Reads data from Blob Storage using paths stored in metadata. -class ReadBufferFromBlobStorageGather final : public ReadBufferFromRemoteFSGather +/// Reads data from AzureBlob Storage using paths stored in metadata. +class ReadBufferFromAzureBlobStorageGather final : public ReadBufferFromRemoteFSGather { public: - ReadBufferFromBlobStorageGather( + ReadBufferFromAzureBlobStorageGather( const String & path_, std::shared_ptr blob_container_client_, IDiskRemote::Metadata metadata_, diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp index 112124d9fd7..c21a55d68ac 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp @@ -20,7 +20,7 @@ ReadIndirectBufferFromRemoteFS::ReadIndirectBufferFromRemoteFS( off_t ReadIndirectBufferFromRemoteFS::getPosition() { - return impl->absolute_position - available(); + return impl->file_offset_of_buffer_end - available(); } @@ -35,29 +35,29 @@ off_t ReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence) if (whence == SEEK_CUR) { /// If position within current working buffer - shift pos. - if (!working_buffer.empty() && size_t(getPosition() + offset_) < impl->absolute_position) + if (!working_buffer.empty() && size_t(getPosition() + offset_) < impl->file_offset_of_buffer_end) { pos += offset_; return getPosition(); } else { - impl->absolute_position += offset_; + impl->file_offset_of_buffer_end += offset_; } } else if (whence == SEEK_SET) { /// If position within current working buffer - shift pos. if (!working_buffer.empty() - && size_t(offset_) >= impl->absolute_position - working_buffer.size() - && size_t(offset_) < impl->absolute_position) + && size_t(offset_) >= impl->file_offset_of_buffer_end - working_buffer.size() + && size_t(offset_) < impl->file_offset_of_buffer_end) { - pos = working_buffer.end() - (impl->absolute_position - offset_); + pos = working_buffer.end() - (impl->file_offset_of_buffer_end - offset_); return getPosition(); } else { - impl->absolute_position = offset_; + impl->file_offset_of_buffer_end = offset_; } } else @@ -66,7 +66,7 @@ off_t ReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence) impl->reset(); pos = working_buffer.end(); - return impl->absolute_position; + return impl->file_offset_of_buffer_end; } diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index 945b2d3eb7e..4be55ff3ecf 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -8,7 +8,6 @@ #include #include -#include #include #include @@ -28,7 +27,7 @@ namespace CurrentMetrics namespace DB { -size_t ThreadPoolRemoteFSReader::RemoteFSFileDescriptor::readInto(char * data, size_t size, size_t offset, size_t ignore) +ReadBufferFromRemoteFSGather::ReadResult ThreadPoolRemoteFSReader::RemoteFSFileDescriptor::readInto(char * data, size_t size, size_t offset, size_t ignore) { return reader->readInto(data, size, offset, ignore); } @@ -44,18 +43,18 @@ std::future ThreadPoolRemoteFSReader::submit(Reques { auto task = std::make_shared>([request] { - setThreadName("ThreadPoolRemoteFSRead"); + setThreadName("VFSRead"); CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; auto * remote_fs_fd = assert_cast(request.descriptor.get()); Stopwatch watch(CLOCK_MONOTONIC); - auto bytes_read = remote_fs_fd->readInto(request.buf, request.size, request.offset, request.ignore); + auto [bytes_read, offset] = remote_fs_fd->readInto(request.buf, request.size, request.offset, request.ignore); watch.stop(); ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, bytes_read); - return bytes_read; + return Result{ .size = bytes_read, .offset = offset }; }); auto future = task->get_future(); diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h index c300162e214..b2d5f11724a 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.h +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h @@ -3,12 +3,12 @@ #include #include #include +#include #include namespace DB { -class ReadBufferFromRemoteFSGather; class ThreadPoolRemoteFSReader : public IAsynchronousReader { @@ -28,9 +28,9 @@ public: struct ThreadPoolRemoteFSReader::RemoteFSFileDescriptor : public IFileDescriptor { public: - RemoteFSFileDescriptor(std::shared_ptr reader_) : reader(reader_) {} + explicit RemoteFSFileDescriptor(std::shared_ptr reader_) : reader(reader_) {} - size_t readInto(char * data, size_t size, size_t offset, size_t ignore = 0); + ReadBufferFromRemoteFSGather::ReadResult readInto(char * data, size_t size, size_t offset, size_t ignore = 0); private: std::shared_ptr reader; diff --git a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp index 6e10aefc7a2..87453440693 100644 --- a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp @@ -1,7 +1,7 @@ #include "WriteIndirectBufferFromRemoteFS.h" #include -#include +#include #include #include @@ -60,7 +60,7 @@ class WriteIndirectBufferFromRemoteFS; #if USE_AZURE_BLOB_STORAGE template -class WriteIndirectBufferFromRemoteFS; +class WriteIndirectBufferFromRemoteFS; #endif #if USE_HDFS diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 0e2f75505fa..201334cbd12 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -219,7 +219,7 @@ std::unique_ptr DiskS3::readFile(const String & path, co auto settings = current_settings.get(); auto metadata = readMeta(path); - LOG_TRACE(log, "Read from file by path: {}. Existing S3 objects: {}", + LOG_TEST(log, "Read from file by path: {}. Existing S3 objects: {}", backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size()); bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; @@ -355,7 +355,7 @@ void DiskS3::findLastRevision() /// Construct revision number from high to low bits. String revision; revision.reserve(64); - for (int bit = 0; bit < 64; bit++) + for (int bit = 0; bit < 64; ++bit) { auto revision_prefix = revision + "1"; diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index d355d785cea..18ed733ff01 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -168,7 +168,7 @@ private: inline static const String RESTORE_FILE_NAME = "restore"; /// Key has format: ../../r{revision}-{operation} - const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+).*"}; + const re2::RE2 key_regexp {".*/r(\\d+)-(\\w+)$"}; /// Object contains information about schema version. inline static const String SCHEMA_VERSION_OBJECT = ".SCHEMA_VERSION"; diff --git a/src/Disks/registerDisks.cpp b/src/Disks/registerDisks.cpp index 44522f56130..88c3fdde1e0 100644 --- a/src/Disks/registerDisks.cpp +++ b/src/Disks/registerDisks.cpp @@ -15,7 +15,7 @@ void registerDiskS3(DiskFactory & factory); #endif #if USE_AZURE_BLOB_STORAGE -void registerDiskBlobStorage(DiskFactory & factory); +void registerDiskAzureBlobStorage(DiskFactory & factory); #endif #if USE_SSL @@ -41,7 +41,7 @@ void registerDisks() #endif #if USE_AZURE_BLOB_STORAGE - registerDiskBlobStorage(factory); + registerDiskAzureBlobStorage(factory); #endif #if USE_SSL diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 75b096de425..d292bbf551c 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -57,6 +57,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes; format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line; format_settings.csv.delimiter = settings.format_csv_delimiter; + format_settings.csv.tuple_delimiter = settings.format_csv_delimiter; format_settings.csv.empty_as_default = settings.input_format_csv_empty_as_default; format_settings.csv.input_format_enum_as_number = settings.input_format_csv_enum_as_number; format_settings.csv.null_representation = settings.format_csv_null_representation; @@ -114,6 +115,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.orc.import_nested = settings.input_format_orc_import_nested; + format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.seekable_read = settings.input_format_allow_seeks; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index a18a20bac7b..d9af07fdc9c 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -95,6 +95,7 @@ struct FormatSettings bool input_format_enum_as_number = false; bool input_format_arrays_as_nested_csv = false; String null_representation = "\\N"; + char tuple_delimiter = ','; } csv; struct Custom @@ -200,6 +201,7 @@ struct FormatSettings struct { bool import_nested = false; + int64_t row_batch_size = 100'000; } orc; /// For capnProto format we should determine how to diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index bf13b7a22c1..645069bfbdf 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -10,6 +11,7 @@ #include #include +#include #include @@ -63,7 +65,7 @@ void NativeReader::resetParser() use_index = false; } -void NativeReader::readData(const IDataType & type, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint, size_t revision) +void NativeReader::readData(const ISerialization & serialization, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint) { ISerialization::DeserializeBinaryBulkSettings settings; settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; @@ -73,21 +75,12 @@ void NativeReader::readData(const IDataType & type, ColumnPtr & column, ReadBuff ISerialization::DeserializeBinaryBulkStatePtr state; - const auto * aggregate_function_data_type = typeid_cast(&type); - if (aggregate_function_data_type && aggregate_function_data_type->isVersioned()) - { - auto version = aggregate_function_data_type->getVersionFromRevision(revision); - aggregate_function_data_type->setVersion(version, /* if_empty */true); - } - - auto serialization = type.getDefaultSerialization(); - - serialization->deserializeBinaryBulkStatePrefix(settings, state); - serialization->deserializeBinaryBulkWithMultipleStreams(column, rows, settings, state, nullptr); + serialization.deserializeBinaryBulkStatePrefix(settings, state); + serialization.deserializeBinaryBulkWithMultipleStreams(column, rows, settings, state, nullptr); if (column->size() != rows) - throw Exception("Cannot read all data in NativeBlockInputStream. Rows read: " + toString(column->size()) + ". Rows expected: " + toString(rows) + ".", - ErrorCodes::CANNOT_READ_ALL_DATA); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, + "Cannot read all data in NativeBlockInputStream. Rows read: {}. Rows expected: {}", column->size(), rows); } @@ -151,6 +144,30 @@ Block NativeReader::read() readBinary(type_name, istr); column.type = data_type_factory.get(type_name); + const auto * aggregate_function_data_type = typeid_cast(column.type.get()); + if (aggregate_function_data_type && aggregate_function_data_type->isVersioned()) + { + auto version = aggregate_function_data_type->getVersionFromRevision(server_revision); + aggregate_function_data_type->setVersion(version, /*if_empty=*/ true); + } + + SerializationPtr serialization; + if (server_revision >= DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION) + { + auto info = column.type->createSerializationInfo({}); + + UInt8 has_custom; + readBinary(has_custom, istr); + if (has_custom) + info->deserializeFromKindsBinary(istr); + + serialization = column.type->getSerialization(*info); + } + else + { + serialization = column.type->getDefaultSerialization(); + } + if (use_index) { /// Index allows to do more checks. @@ -161,11 +178,11 @@ Block NativeReader::read() } /// Data - ColumnPtr read_column = column.type->createColumn(); + ColumnPtr read_column = column.type->createColumn(*serialization); double avg_value_size_hint = avg_value_size_hints.empty() ? 0 : avg_value_size_hints[i]; if (rows) /// If no rows, nothing to read. - readData(*column.type, read_column, istr, rows, avg_value_size_hint, server_revision); + readData(*serialization, read_column, istr, rows, avg_value_size_hint); column.column = std::move(read_column); @@ -175,8 +192,8 @@ Block NativeReader::read() auto & header_column = header.getByName(column.name); if (!header_column.type->equals(*column.type)) { - column.column = recursiveTypeConversion(column.column, column.type, header.getByPosition(i).type); - column.type = header.getByPosition(i).type; + column.column = recursiveTypeConversion(column.column, column.type, header.safeGetByPosition(i).type); + column.type = header.safeGetByPosition(i).type; } } diff --git a/src/Formats/NativeReader.h b/src/Formats/NativeReader.h index 215bfa6812e..1f9eb8b9764 100644 --- a/src/Formats/NativeReader.h +++ b/src/Formats/NativeReader.h @@ -31,7 +31,7 @@ public: IndexForNativeFormat::Blocks::const_iterator index_block_it_, IndexForNativeFormat::Blocks::const_iterator index_block_end_); - static void readData(const IDataType & type, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint, size_t revision); + static void readData(const ISerialization & serialization, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint); Block getHeader() const; diff --git a/src/Formats/NativeWriter.cpp b/src/Formats/NativeWriter.cpp index 1a4cc24a7d9..eb744e130f7 100644 --- a/src/Formats/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -11,6 +12,8 @@ #include #include +#include +#include #include namespace DB @@ -43,7 +46,7 @@ void NativeWriter::flush() } -static void writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit) +static void writeData(const ISerialization & serialization, const ColumnPtr & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit) { /** If there are columns-constants - then we materialize them. * (Since the data type does not know how to serialize / deserialize constants.) @@ -55,12 +58,10 @@ static void writeData(const IDataType & type, const ColumnPtr & column, WriteBuf settings.position_independent_encoding = false; settings.low_cardinality_max_dictionary_size = 0; //-V1048 - auto serialization = type.getDefaultSerialization(); - ISerialization::SerializeBinaryBulkStatePtr state; - serialization->serializeBinaryBulkStatePrefix(settings, state); - serialization->serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state); - serialization->serializeBinaryBulkStateSuffix(settings, state); + serialization.serializeBinaryBulkStatePrefix(settings, state); + serialization.serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state); + serialization.serializeBinaryBulkStateSuffix(settings, state); } @@ -140,9 +141,27 @@ void NativeWriter::write(const Block & block) writeStringBinary(type_name, ostr); + /// Serialization. Dynamic, if client supports it. + SerializationPtr serialization; + if (client_revision >= DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION) + { + auto info = column.column->getSerializationInfo(); + serialization = column.type->getSerialization(*info); + + bool has_custom = info->hasCustomSerialization(); + writeBinary(static_cast(has_custom), ostr); + if (has_custom) + info->serialializeKindBinary(ostr); + } + else + { + serialization = column.type->getDefaultSerialization(); + column.column = recursiveRemoveSparse(column.column); + } + /// Data if (rows) /// Zero items of data is always represented as zero number of bytes. - writeData(*column.type, column.column, ostr, 0, 0); + writeData(*serialization, column.column, ostr, 0, 0); if (index) { diff --git a/src/Functions/CRC.cpp b/src/Functions/CRC.cpp index 00aa631c85b..abcf137f2e7 100644 --- a/src/Functions/CRC.cpp +++ b/src/Functions/CRC.cpp @@ -33,7 +33,7 @@ struct CRCImpl static CRCBase base(polynomial); T crc = 0; - for (size_t i = 0; i < size; i++) + for (size_t i = 0; i < size; ++i) crc = base.tab[(crc ^ buf[i]) & 0xff] ^ (crc >> 8); return crc; } diff --git a/src/Functions/CustomWeekTransforms.h b/src/Functions/CustomWeekTransforms.h index 218dcd083eb..5ccb2e06c44 100644 --- a/src/Functions/CustomWeekTransforms.h +++ b/src/Functions/CustomWeekTransforms.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include /// The default mode value to use for the WEEK() function #define DEFAULT_WEEK_MODE 0 diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 15a08c4e76d..08dac9c2ba0 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -3,8 +3,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index af34f27d6b8..4224a74ae8e 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include diff --git a/src/Functions/FunctionMathUnary.h b/src/Functions/FunctionMathUnary.h index 2d39daac366..d9ca162ba16 100644 --- a/src/Functions/FunctionMathUnary.h +++ b/src/Functions/FunctionMathUnary.h @@ -94,7 +94,7 @@ private: Impl::execute(src_remaining, dst_remaining); if constexpr (is_big_int_v || std::is_same_v) - for (size_t i = 0; i < rows_remaining; i++) + for (size_t i = 0; i < rows_remaining; ++i) dst_data[rows_size + i] = dst_remaining[i]; else memcpy(&dst_data[rows_size], dst_remaining, rows_remaining * sizeof(ReturnType)); diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index 8c507077acd..5248f524a2b 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -56,7 +56,7 @@ public: const auto & source_data = typeid_cast &>(col).getData(); - Int32 scale_diff = typeid_cast(*src.type).getScale() - target_scale; + const Int32 scale_diff = typeid_cast(*src.type).getScale() - target_scale; if (scale_diff == 0) { for (size_t i = 0; i < input_rows_count; ++i) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 9238cc81c37..8018fa8e726 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -34,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -180,6 +182,7 @@ struct ConvertImpl vec_null_map_to = &col_null_map_to->getData(); } + bool result_is_bool = isBool(result_type); for (size_t i = 0; i < input_rows_count; ++i) { if constexpr (std::is_same_v != std::is_same_v) @@ -266,6 +269,12 @@ struct ConvertImpl vec_to[i] = static_cast(vec_from[i]); } } + + if constexpr (std::is_same_v) + { + if (result_is_bool) + vec_to[i] = static_cast(vec_to[i]); + } } } @@ -850,11 +859,15 @@ struct ConvertImpl struct ConvertImplGenericToString { - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) { + static_assert(std::is_same_v || std::is_same_v, + "Can be used only to serialize to ColumnString or ColumnFixedString"); + ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); const auto & col_with_type_and_name = columnGetNested(arguments[0]); @@ -862,27 +875,25 @@ struct ConvertImplGenericToString const IColumn & col_from = *col_with_type_and_name.column; size_t size = col_from.size(); + auto col_to = result_type->createColumn(); - auto col_to = ColumnString::create(); - - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - - data_to.resize(size * 2); /// Using coefficient 2 for initial size is arbitrary. - offsets_to.resize(size); - - WriteBufferFromVector write_buffer(data_to); - - FormatSettings format_settings; - auto serialization = type.getDefaultSerialization(); - for (size_t i = 0; i < size; ++i) { - serialization->serializeText(col_from, i, write_buffer, format_settings); - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } + ColumnStringHelpers::WriteHelper write_helper( + assert_cast(*col_to), + size); - write_buffer.finalize(); + auto & write_buffer = write_helper.getWriteBuffer(); + + FormatSettings format_settings; + auto serialization = type.getDefaultSerialization(); + for (size_t i = 0; i < size; ++i) + { + serialization->serializeText(col_from, i, write_buffer, format_settings); + write_helper.rowWritten(); + } + + write_helper.finalize(); + } if (result_type->isNullable() && null_map) return ColumnNullable::create(std::move(col_to), std::move(null_map)); @@ -1006,7 +1017,8 @@ inline bool tryParseImpl(DataTypeUUID::FieldType & x, ReadBuffer & else message_buf << " at begin of string"; - if (isNativeNumber(to_type)) + // Currently there are no functions toIPv{4,6}Or{Null,Zero} + if (isNativeNumber(to_type) && !(to_type.getName() == "IPv4" || to_type.getName() == "IPv6")) message_buf << ". Note: there are to" << to_type.getName() << "OrZero and to" << to_type.getName() << "OrNull functions, which returns zero/NULL instead of throwing exception."; throw Exception(message_buf.str(), ErrorCodes::CANNOT_PARSE_TEXT); @@ -1285,40 +1297,35 @@ template struct ConvertImpl, DataTypeFixedString>, ToDataType, Name, ConvertReturnNullOnErrorTag> : ConvertThroughParsing {}; -/// Generic conversion of any type from String. Used for complex types: Array and Tuple. +/// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization. +template struct ConvertImplGenericFromString { - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) + static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) { + static_assert(std::is_same_v || std::is_same_v, + "Can be used only to parse from ColumnString or ColumnFixedString"); + const IColumn & col_from = *arguments[0].column; - size_t size = col_from.size(); - const IDataType & data_type_to = *result_type; - - if (const ColumnString * col_from_string = checkAndGetColumn(&col_from)) + if (const StringColumnType * col_from_string = checkAndGetColumn(&col_from)) { auto res = data_type_to.createColumn(); IColumn & column_to = *res; - column_to.reserve(size); - - const ColumnString::Chars & chars = col_from_string->getChars(); - const IColumn::Offsets & offsets = col_from_string->getOffsets(); - - size_t current_offset = 0; + column_to.reserve(input_rows_count); FormatSettings format_settings; auto serialization = data_type_to.getDefaultSerialization(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < input_rows_count; ++i) { - ReadBufferFromMemory read_buffer(&chars[current_offset], offsets[i] - current_offset - 1); + const auto & val = col_from_string->getDataAt(i); + ReadBufferFromMemory read_buffer(val.data, val.size); serialization->deserializeWholeText(column_to, read_buffer, format_settings); if (!read_buffer.eof()) throwExceptionForIncompletelyParsedValue(read_buffer, result_type); - - current_offset = offsets[i]; } return res; @@ -1352,6 +1359,18 @@ struct ConvertImpl, T, Name, ConvertDefau } }; +template +struct ConvertImpl +{ + template + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, + Additions additions [[maybe_unused]] = Additions()) + { + + return arguments[0].column; + } +}; + /** Conversion from FixedString to String. * Cutting sequences of zero bytes from end of strings. @@ -1721,7 +1740,10 @@ private: throw Exception("Wrong UUID conversion", ErrorCodes::CANNOT_CONVERT_TYPE); } else - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count); + { + result_column + = ConvertImpl::execute(arguments, result_type, input_rows_count); + } } else { @@ -1767,7 +1789,7 @@ private: /// Generic conversion of any type to String. if (std::is_same_v) { - return ConvertImplGenericToString::execute(arguments, result_type); + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); } else throw Exception("Illegal type " + arguments[0].type->getName() + " of argument of function " + getName(), @@ -2551,6 +2573,7 @@ private: { /// In case when converting to Nullable type, we apply different parsing rule, /// that will not throw an exception but return NULL in case of malformed input. + FunctionPtr function = FunctionConvertFromString::create(); return createFunctionAdaptor(function, from_type); } @@ -2610,6 +2633,37 @@ private: }; } + template + WrapperType createBoolWrapper(const DataTypePtr & from_type, const ToDataType * const to_type, bool requested_result_is_nullable) const + { + if (checkAndGetDataType(from_type.get())) + { + return &ConvertImplGenericFromString::execute; + } + + return createWrapper(from_type, to_type, requested_result_is_nullable); + } + + WrapperType createUInt8ToUInt8Wrapper(const DataTypePtr from_type, const DataTypePtr to_type) const + { + return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr + { + if (isBool(from_type) || !isBool(to_type)) + return arguments.front().column; + + /// Special case when we convert UInt8 column to Bool column. + /// both columns have type UInt8, but we shouldn't use identity wrapper, + /// because Bool column can contain only 0 and 1. + auto res_column = to_type->createColumn(); + const auto & data_from = checkAndGetColumn(arguments[0].column.get())->getData(); + auto & data_to = assert_cast(res_column.get())->getData(); + data_to.resize(data_from.size()); + for (size_t i = 0; i != data_from.size(); ++i) + data_to[i] = static_cast(data_from[i]); + return res_column; + }; + } + static WrapperType createStringWrapper(const DataTypePtr & from_type) { FunctionPtr function = FunctionToString::create(); @@ -2725,10 +2779,7 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t /*input_rows_count*/) - { - return ConvertImplGenericFromString::execute(arguments, result_type); - }; + return &ConvertImplGenericFromString::execute; } else { @@ -2745,10 +2796,7 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t /*input_rows_count*/) - { - return ConvertImplGenericFromString::execute(arguments, result_type); - }; + return &ConvertImplGenericFromString::execute; } const auto * from_type = checkAndGetDataType(from_type_untyped.get()); @@ -2816,10 +2864,7 @@ private: /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t /*input_rows_count*/) - { - return ConvertImplGenericFromString::execute(arguments, result_type); - }; + return &ConvertImplGenericFromString::execute; } const auto * from_type = checkAndGetDataType(from_type_untyped.get()); @@ -3275,7 +3320,12 @@ private: WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const { if (from_type->equals(*to_type)) + { + if (isUInt8(from_type)) + return createUInt8ToUInt8Wrapper(from_type, to_type); + return createIdentityWrapper(from_type); + } else if (WhichDataType(from_type).isNothing()) return createNothingWrapper(to_type.get()); @@ -3287,7 +3337,6 @@ private: using ToDataType = typename Types::LeftType; if constexpr ( - std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v || @@ -3309,6 +3358,14 @@ private: ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); return true; } + if constexpr (std::is_same_v) + { + if (isBool(to_type)) + ret = createBoolWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); + else + ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); + return true; + } if constexpr ( std::is_same_v || std::is_same_v) @@ -3330,6 +3387,38 @@ private: return false; }; + auto make_custom_serialization_wrapper = [&](const auto & types) -> bool + { + using Types = std::decay_t; + using ToDataType = typename Types::RightType; + using FromDataType = typename Types::LeftType; + + if constexpr (WhichDataType(FromDataType::type_id).isStringOrFixedString()) + { + if (to_type->getCustomSerialization()) + { + ret = &ConvertImplGenericFromString::execute; + return true; + } + } + if constexpr (WhichDataType(ToDataType::type_id).isStringOrFixedString()) + { + if (from_type->getCustomSerialization()) + { + ret = [](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + }; + return true; + } + } + + return false; + }; + + if (callOnTwoTypeIndexes(from_type->getTypeId(), to_type->getTypeId(), make_custom_serialization_wrapper)) + return ret; + if (callOnIndexAndDataType(to_type->getTypeId(), make_default_wrapper)) return ret; diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index c52d54f30aa..71597f2b433 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -68,11 +68,12 @@ public: std::shared_ptr getDictionary(const String & dictionary_name) { - auto dict = getContext()->getExternalDictionariesLoader().getDictionary(dictionary_name, getContext()); + auto current_context = getContext(); + auto dict = current_context->getExternalDictionariesLoader().getDictionary(dictionary_name, current_context); if (!access_checked) { - getContext()->checkAccess(AccessType::dictGet, dict->getDatabaseOrNoDatabaseTag(), dict->getDictionaryID().getTableName()); + current_context->checkAccess(AccessType::dictGet, dict->getDatabaseOrNoDatabaseTag(), dict->getDictionaryID().getTableName()); access_checked = true; } @@ -106,8 +107,9 @@ public: if (!attr_name_col) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument of function dictGet must be a constant string"); - const auto dictionary_name = dict_name_col->getValue(); - const auto attribute_name = attr_name_col->getValue(); + const auto & dictionary_name = dict_name_col->getValue(); + const auto & attribute_name = attr_name_col->getValue(); + return getDictionary(dictionary_name)->isInjective(attribute_name); } diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index f427deced3a..87a2ecd4c57 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -609,7 +609,7 @@ ColumnPtr FunctionAnyArityLogical::executeImpl( ColumnsWithTypeAndName arguments = std::move(args); /// Special implementation for short-circuit arguments. - if (checkShirtCircuitArguments(arguments) != -1) + if (checkShortCircuitArguments(arguments) != -1) return executeShortCircuit(arguments, result_type); ColumnRawPtrs args_in; diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h index 3ddf7ea84eb..7d4f5489e86 100644 --- a/src/Functions/FunctionsLogical.h +++ b/src/Functions/FunctionsLogical.h @@ -185,7 +185,7 @@ public: if constexpr (!Impl::isSaturable()) { auto * result = nativeBoolCast(b, types[0], values[0]); - for (size_t i = 1; i < types.size(); i++) + for (size_t i = 1; i < types.size(); ++i) result = Impl::apply(b, result, nativeBoolCast(b, types[i], values[i])); return b.CreateSelect(result, b.getInt8(1), b.getInt8(0)); } @@ -194,7 +194,7 @@ public: auto * stop = llvm::BasicBlock::Create(next->getContext(), "", next->getParent()); b.SetInsertPoint(stop); auto * phi = b.CreatePHI(b.getInt8Ty(), values.size()); - for (size_t i = 0; i < types.size(); i++) + for (size_t i = 0; i < types.size(); ++i) { b.SetInsertPoint(next); auto * value = values[i]; diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h index 27907626971..a6e705bb1af 100644 --- a/src/Functions/FunctionsStringArray.h +++ b/src/Functions/FunctionsStringArray.h @@ -749,7 +749,7 @@ private: { ColumnsWithTypeAndName cols; cols.emplace_back(col_arr.getDataPtr(), nested_type, "tmp"); - return ConvertImplGenericToString::execute(cols, std::make_shared()); + return ConvertImplGenericToString::execute(cols, std::make_shared(), col_arr.size()); } } diff --git a/src/Functions/FunctionsTimeWindow.h b/src/Functions/FunctionsTimeWindow.h index 6e3b5da9971..6e5d79fd062 100644 --- a/src/Functions/FunctionsTimeWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 12f2f50a5f0..cfb4e12a025 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -220,10 +221,15 @@ ColumnPtr IExecutableFunction::executeWithoutLowCardinalityColumns( return res; } -ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const +static void convertSparseColumnsToFull(ColumnsWithTypeAndName & args) +{ + for (auto & column : args) + column.column = recursiveRemoveSparse(column.column); +} + +ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const { ColumnPtr result; - if (useDefaultImplementationForLowCardinalityColumns()) { ColumnsWithTypeAndName columns_without_low_cardinality = arguments; @@ -264,6 +270,73 @@ ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, return result; } +ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const +{ + if (useDefaultImplementationForSparseColumns()) + { + size_t num_sparse_columns = 0; + size_t num_full_columns = 0; + size_t sparse_column_position = 0; + + for (size_t i = 0; i < arguments.size(); ++i) + { + const auto * column_sparse = checkAndGetColumn(arguments[i].column.get()); + /// In rare case, when sparse column doesn't have default values, + /// it's more convenient to convert it to full before execution of function. + if (column_sparse && column_sparse->getNumberOfDefaults()) + { + sparse_column_position = i; + ++num_sparse_columns; + } + else if (!isColumnConst(*arguments[i].column)) + { + ++num_full_columns; + } + } + + auto columns_without_sparse = arguments; + if (num_sparse_columns == 1 && num_full_columns == 0) + { + auto & arg_with_sparse = columns_without_sparse[sparse_column_position]; + ColumnPtr sparse_offsets; + { + /// New scope to avoid possible mistakes on dangling reference. + const auto & column_sparse = assert_cast(*arg_with_sparse.column); + sparse_offsets = column_sparse.getOffsetsPtr(); + arg_with_sparse.column = column_sparse.getValuesPtr(); + } + + size_t values_size = arg_with_sparse.column->size(); + for (size_t i = 0; i < columns_without_sparse.size(); ++i) + { + if (i == sparse_column_position) + continue; + + columns_without_sparse[i].column = columns_without_sparse[i].column->cloneResized(values_size); + } + + auto res = executeWithoutSparseColumns(columns_without_sparse, result_type, values_size, dry_run); + + if (isColumnConst(*res)) + return res->cloneResized(input_rows_count); + + /// If default of sparse column is changed after execution of function, convert to full column. + if (!result_type->supportsSparseSerialization() || !res->isDefaultAt(0)) + { + const auto & offsets_data = assert_cast &>(*sparse_offsets).getData(); + return res->createWithOffsets(offsets_data, (*res)[0], input_rows_count, /*shift=*/ 1); + } + + return ColumnSparse::create(res, sparse_offsets, input_rows_count); + } + + convertSparseColumnsToFull(columns_without_sparse); + return executeWithoutSparseColumns(columns_without_sparse, result_type, input_rows_count, dry_run); + } + + return executeWithoutSparseColumns(arguments, result_type, input_rows_count, dry_run); +} + void IFunctionOverloadResolver::checkNumberOfArguments(size_t number_of_arguments) const { if (isVariadic()) diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index dfa3f00d1cf..8063ad77ad0 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -76,6 +76,13 @@ protected: */ virtual bool useDefaultImplementationForLowCardinalityColumns() const { return true; } + /** If function arguments has single sparse column and all other arguments are constants, call function on nested column. + * Otherwise, convert all sparse columns to ordinary columns. + * If default value doesn't change after function execution, returns sparse column as a result. + * Otherwise, result column is converted to full. + */ + virtual bool useDefaultImplementationForSparseColumns() const { return true; } + /** Some arguments could remain constant during this implementation. */ virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const { return {}; } @@ -96,6 +103,8 @@ private: ColumnPtr executeWithoutLowCardinalityColumns( const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const; + ColumnPtr executeWithoutSparseColumns( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const; }; using ExecutableFunctionPtr = std::shared_ptr; @@ -351,6 +360,13 @@ protected: */ virtual bool useDefaultImplementationForLowCardinalityColumns() const { return true; } + /** If function arguments has single sparse column and all other arguments are constants, call function on nested column. + * Otherwise, convert all sparse columns to ordinary columns. + * If default value doesn't change after function execution, returns sparse column as a result. + * Otherwise, result column is converted to full. + */ + virtual bool useDefaultImplementationForSparseColumns() const { return true; } + // /// If it isn't, will convert all ColumnLowCardinality arguments to full columns. virtual bool canBeExecutedOnLowCardinalityDictionary() const { return true; } @@ -404,6 +420,13 @@ public: */ virtual bool useDefaultImplementationForLowCardinalityColumns() const { return true; } + /** If function arguments has single sparse column and all other arguments are constants, call function on nested column. + * Otherwise, convert all sparse columns to ordinary columns. + * If default value doesn't change after function execution, returns sparse column as a result. + * Otherwise, result column is converted to full. + */ + virtual bool useDefaultImplementationForSparseColumns() const { return true; } + /// If it isn't, will convert all ColumnLowCardinality arguments to full columns. virtual bool canBeExecutedOnLowCardinalityDictionary() const { return true; } diff --git a/src/Functions/IFunctionAdaptors.h b/src/Functions/IFunctionAdaptors.h index 9bfe010c0d0..ec43087ad66 100644 --- a/src/Functions/IFunctionAdaptors.h +++ b/src/Functions/IFunctionAdaptors.h @@ -29,6 +29,7 @@ protected: bool useDefaultImplementationForNulls() const final { return function->useDefaultImplementationForNulls(); } bool useDefaultImplementationForConstants() const final { return function->useDefaultImplementationForConstants(); } bool useDefaultImplementationForLowCardinalityColumns() const final { return function->useDefaultImplementationForLowCardinalityColumns(); } + bool useDefaultImplementationForSparseColumns() const final { return function->useDefaultImplementationForSparseColumns(); } ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return function->getArgumentsThatAreAlwaysConstant(); } bool canBeExecutedOnDefaultArguments() const override { return function->canBeExecutedOnDefaultArguments(); } @@ -124,6 +125,7 @@ public: bool useDefaultImplementationForNulls() const override { return function->useDefaultImplementationForNulls(); } bool useDefaultImplementationForLowCardinalityColumns() const override { return function->useDefaultImplementationForLowCardinalityColumns(); } + bool useDefaultImplementationForSparseColumns() const override { return function->useDefaultImplementationForSparseColumns(); } bool canBeExecutedOnLowCardinalityDictionary() const override { return function->canBeExecutedOnLowCardinalityDictionary(); } FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 4320f47c424..e6305431d8f 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -32,10 +32,18 @@ template struct ReplaceRegexpImpl { /// Sequence of instructions, describing how to get resulting string. - /// Each element is either: - /// - substitution (in that case first element of pair is their number and second element is empty) - /// - string that need to be inserted (in that case, first element of pair is that string and second element is -1) - using Instructions = std::vector>; + struct Instruction + { + /// If not negative - perform substitution of n-th subpattern from the regexp match. + int substitution_num = -1; + /// Otherwise - paste this string verbatim. + std::string literal; + + Instruction(int substitution_num_) : substitution_num(substitution_num_) {} + Instruction(std::string literal_) : literal(std::move(literal_)) {} + }; + + using Instructions = std::vector; static const size_t max_captures = 10; @@ -53,10 +61,10 @@ struct ReplaceRegexpImpl { if (!now.empty()) { - instructions.emplace_back(-1, now); + instructions.emplace_back(now); now = ""; } - instructions.emplace_back(s[i + 1] - '0', String()); + instructions.emplace_back(s[i + 1] - '0'); } else now += s[i + 1]; /// Escaping @@ -68,16 +76,15 @@ struct ReplaceRegexpImpl if (!now.empty()) { - instructions.emplace_back(-1, now); + instructions.emplace_back(now); now = ""; } for (const auto & it : instructions) - if (it.first >= num_captures) - throw Exception( - "Invalid replace instruction in replacement string. Id: " + toString(it.first) + ", but regexp has only " - + toString(num_captures - 1) + " subpatterns", - ErrorCodes::BAD_ARGUMENTS); + if (it.substitution_num >= num_captures) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Invalid replace instruction in replacement string. Id: {}, but regexp has only {} subpatterns", + it.substitution_num, num_captures - 1); return instructions; } @@ -93,56 +100,51 @@ struct ReplaceRegexpImpl { re2_st::StringPiece matches[max_captures]; - size_t start_pos = 0; - bool is_first_match = true; - bool is_start_pos_added_one = false; + size_t copy_pos = 0; + size_t match_pos = 0; - while (start_pos < static_cast(input.length())) + while (match_pos < static_cast(input.length())) { /// If no more replacements possible for current string bool can_finish_current_string = false; - if (searcher.Match(input, start_pos, input.length(), re2_st::RE2::Anchor::UNANCHORED, matches, num_captures)) + if (searcher.Match(input, match_pos, input.length(), re2_st::RE2::Anchor::UNANCHORED, matches, num_captures)) { - if (is_start_pos_added_one) - start_pos -= 1; - const auto & match = matches[0]; - size_t bytes_to_copy = (match.data() - input.data()) - start_pos; + size_t bytes_to_copy = (match.data() - input.data()) - copy_pos; /// Copy prefix before matched regexp without modification res_data.resize(res_data.size() + bytes_to_copy); - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + start_pos, bytes_to_copy); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + copy_pos, bytes_to_copy); res_offset += bytes_to_copy; - start_pos += bytes_to_copy + match.length(); - - /// To avoid infinite loop. - if (is_first_match && match.length() == 0 && !replace_one && input.length() > 1) - { - start_pos += 1; - is_start_pos_added_one = true; - } + copy_pos += bytes_to_copy + match.length(); + match_pos = copy_pos; /// Do substitution instructions for (const auto & it : instructions) { - if (it.first >= 0) + if (it.substitution_num >= 0) { - res_data.resize(res_data.size() + matches[it.first].length()); - memcpy(&res_data[res_offset], matches[it.first].data(), matches[it.first].length()); - res_offset += matches[it.first].length(); + const auto & substitution = matches[it.substitution_num]; + + res_data.resize(res_data.size() + substitution.length()); + memcpy(&res_data[res_offset], substitution.data(), substitution.length()); + res_offset += substitution.length(); } else { - res_data.resize(res_data.size() + it.second.size()); - memcpy(&res_data[res_offset], it.second.data(), it.second.size()); - res_offset += it.second.size(); + const auto & literal = it.literal; + + res_data.resize(res_data.size() + literal.size()); + memcpy(&res_data[res_offset], literal.data(), literal.size()); + res_offset += literal.size(); } } - if (replace_one || (!is_first_match && match.length() == 0)) + if (replace_one) can_finish_current_string = true; - is_first_match = false; + else if (match.length() == 0) + ++match_pos; /// Step one character to avoid infinite loop. } else can_finish_current_string = true; @@ -150,10 +152,11 @@ struct ReplaceRegexpImpl /// If ready, append suffix after match to end of string. if (can_finish_current_string) { - res_data.resize(res_data.size() + input.length() - start_pos); - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + start_pos, input.length() - start_pos); - res_offset += input.length() - start_pos; - start_pos = input.length(); + res_data.resize(res_data.size() + input.length() - copy_pos); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data() + copy_pos, input.length() - copy_pos); + res_offset += input.length() - copy_pos; + copy_pos = input.length(); + match_pos = copy_pos; } } diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index a5913105146..b928254e454 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -204,7 +204,7 @@ private: std::map summing_map; - for (size_t i = 0; i < row_count; i++) + for (size_t i = 0; i < row_count; ++i) { [[maybe_unused]] bool first = true; for (auto & arg : args) @@ -222,7 +222,7 @@ private: } Field temp_val; - for (size_t j = 0; j < len; j++) + for (size_t j = 0; j < len; ++j) { KeyType key; if constexpr (std::is_same::value) diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp index c8c86060265..c89a7f80dfd 100644 --- a/src/Functions/dateName.cpp +++ b/src/Functions/dateName.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/Functions/extractTimeZoneFromFunctionArguments.cpp b/src/Functions/extractTimeZoneFromFunctionArguments.cpp index 50254606510..88e1d664bf0 100644 --- a/src/Functions/extractTimeZoneFromFunctionArguments.cpp +++ b/src/Functions/extractTimeZoneFromFunctionArguments.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 9bb2abcb2c7..9f303b86ad3 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -14,7 +14,7 @@ #include -#include +#include #include #include diff --git a/src/Functions/formatString.h b/src/Functions/formatString.h index c72e7db9579..419ecf1c773 100644 --- a/src/Functions/formatString.h +++ b/src/Functions/formatString.h @@ -42,7 +42,7 @@ struct FormatImpl static void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res) { res = 0; - for (UInt64 pos = l; pos < r; pos++) + for (UInt64 pos = l; pos < r; ++pos) { if (!isNumericASCII(description[pos])) throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS); diff --git a/src/Functions/geoToH3.cpp b/src/Functions/geoToH3.cpp index 93865782c8e..18951d1a03f 100644 --- a/src/Functions/geoToH3.cpp +++ b/src/Functions/geoToH3.cpp @@ -76,7 +76,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const double lon = col_lon->getFloat64(row); const double lat = col_lat->getFloat64(row); diff --git a/src/Functions/geoToS2.cpp b/src/Functions/geoToS2.cpp index 644e4661412..32d2a1d7a10 100644 --- a/src/Functions/geoToS2.cpp +++ b/src/Functions/geoToS2.cpp @@ -73,7 +73,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (const auto row : collections::range(0, input_rows_count)) + for (size_t row = 0; row < input_rows_count; ++row) { const Float64 lon = col_lon->getFloat64(row); const Float64 lat = col_lat->getFloat64(row); diff --git a/src/Functions/h3EdgeAngle.cpp b/src/Functions/h3EdgeAngle.cpp index 68e44e38bb9..5d5ad6cd1d3 100644 --- a/src/Functions/h3EdgeAngle.cpp +++ b/src/Functions/h3EdgeAngle.cpp @@ -58,7 +58,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const int resolution = col_hindex->getUInt(row); if (resolution > MAX_H3_RES) diff --git a/src/Functions/h3EdgeLengthM.cpp b/src/Functions/h3EdgeLengthM.cpp index eb0aab029b7..3eef9be9345 100644 --- a/src/Functions/h3EdgeLengthM.cpp +++ b/src/Functions/h3EdgeLengthM.cpp @@ -63,7 +63,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 resolution = col_hindex->getUInt(row); if (resolution > MAX_H3_RES) diff --git a/src/Functions/h3GetBaseCell.cpp b/src/Functions/h3GetBaseCell.cpp index 1f635fda715..83978919f2c 100644 --- a/src/Functions/h3GetBaseCell.cpp +++ b/src/Functions/h3GetBaseCell.cpp @@ -55,7 +55,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 hindex = col_hindex->getUInt(row); diff --git a/src/Functions/h3GetFaces.cpp b/src/Functions/h3GetFaces.cpp index 5d82c16296c..c0300e7212b 100644 --- a/src/Functions/h3GetFaces.cpp +++ b/src/Functions/h3GetFaces.cpp @@ -64,7 +64,7 @@ public: auto current_offset = 0; std::vector faces; - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { int max_faces = maxFaceCount(data[row]); @@ -73,7 +73,7 @@ public: // function name h3GetFaces (v3.x) changed to getIcosahedronFaces (v4.0.0). getIcosahedronFaces(data[row], faces.data()); - for (int i = 0; i < max_faces; i++) + for (int i = 0; i < max_faces; ++i) { // valid icosahedron faces are represented by integers 0-19 if (faces[i] >= 0 && faces[i] <= 19) diff --git a/src/Functions/h3GetResolution.cpp b/src/Functions/h3GetResolution.cpp index cc4a3c7443d..02b634dac89 100644 --- a/src/Functions/h3GetResolution.cpp +++ b/src/Functions/h3GetResolution.cpp @@ -55,7 +55,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 hindex = col_hindex->getUInt(row); diff --git a/src/Functions/h3HexAreaM2.cpp b/src/Functions/h3HexAreaM2.cpp index 6aa8fb31aab..96b301806a5 100644 --- a/src/Functions/h3HexAreaM2.cpp +++ b/src/Functions/h3HexAreaM2.cpp @@ -58,7 +58,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 resolution = col_hindex->getUInt(row); if (resolution > MAX_H3_RES) diff --git a/src/Functions/h3IndexesAreNeighbors.cpp b/src/Functions/h3IndexesAreNeighbors.cpp index f938f7fe784..27eaacad4d6 100644 --- a/src/Functions/h3IndexesAreNeighbors.cpp +++ b/src/Functions/h3IndexesAreNeighbors.cpp @@ -63,7 +63,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 hindex_origin = col_hindex_origin->getUInt(row); const UInt64 hindex_dest = col_hindex_dest->getUInt(row); diff --git a/src/Functions/h3IsPentagon.cpp b/src/Functions/h3IsPentagon.cpp index 039fea39f2a..a6726fe1656 100644 --- a/src/Functions/h3IsPentagon.cpp +++ b/src/Functions/h3IsPentagon.cpp @@ -56,7 +56,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0 ; row < input_rows_count ; row++) + for (size_t row = 0 ; row < input_rows_count ; ++row) { UInt8 res = isPentagon(data[row]); dst_data[row] = res; diff --git a/src/Functions/h3IsResClassIII.cpp b/src/Functions/h3IsResClassIII.cpp index f2f7ae445f2..c6b79d404a4 100644 --- a/src/Functions/h3IsResClassIII.cpp +++ b/src/Functions/h3IsResClassIII.cpp @@ -56,7 +56,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0 ; row < input_rows_count ; row++) + for (size_t row = 0 ; row < input_rows_count ; ++row) { UInt8 res = isResClassIII(data[row]); dst_data[row] = res; diff --git a/src/Functions/h3IsValid.cpp b/src/Functions/h3IsValid.cpp index 891d534375e..aa109eee6b4 100644 --- a/src/Functions/h3IsValid.cpp +++ b/src/Functions/h3IsValid.cpp @@ -55,7 +55,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 hindex = col_hindex->getUInt(row); diff --git a/src/Functions/h3ToChildren.cpp b/src/Functions/h3ToChildren.cpp index 5745838e9cb..56b3dd9a88c 100644 --- a/src/Functions/h3ToChildren.cpp +++ b/src/Functions/h3ToChildren.cpp @@ -76,7 +76,7 @@ public: std::vector hindex_vec; - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 parent_hindex = col_hindex->getUInt(row); const UInt8 child_resolution = col_resolution->getUInt(row); diff --git a/src/Functions/h3ToParent.cpp b/src/Functions/h3ToParent.cpp index 76ebea6daf6..fef1b16696f 100644 --- a/src/Functions/h3ToParent.cpp +++ b/src/Functions/h3ToParent.cpp @@ -66,7 +66,7 @@ public: auto & dst_data = dst->getData(); dst_data.resize(input_rows_count); - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 hindex = col_hindex->getUInt(row); const UInt8 resolution = col_resolution->getUInt(row); diff --git a/src/Functions/h3kRing.cpp b/src/Functions/h3kRing.cpp index 1bcb3e1ab6c..9fc6312daa4 100644 --- a/src/Functions/h3kRing.cpp +++ b/src/Functions/h3kRing.cpp @@ -73,7 +73,7 @@ public: std::vector hindex_vec; - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { const H3Index origin_hindex = col_hindex->getUInt(row); const int k = col_k->getInt(row); diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 953aff3568e..6841098ebcf 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -969,7 +969,7 @@ private: static void executeShortCircuitArguments(ColumnsWithTypeAndName & arguments) { - int last_short_circuit_argument_index = checkShirtCircuitArguments(arguments); + int last_short_circuit_argument_index = checkShortCircuitArguments(arguments); if (last_short_circuit_argument_index == -1) return; diff --git a/src/Functions/ignore.cpp b/src/Functions/ignore.cpp index 931ef4a00ed..77c16cf7819 100644 --- a/src/Functions/ignore.cpp +++ b/src/Functions/ignore.cpp @@ -36,6 +36,8 @@ public: /// (in getResultIfAlwaysReturnsConstantAndHasArguments) bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + bool useDefaultImplementationForSparseColumns() const override { return false; } + String getName() const override { return name; diff --git a/src/Functions/isIPAddressContainedIn.cpp b/src/Functions/isIPAddressContainedIn.cpp index 048fa04adb1..3d2a38ef4c0 100644 --- a/src/Functions/isIPAddressContainedIn.cpp +++ b/src/Functions/isIPAddressContainedIn.cpp @@ -210,7 +210,7 @@ namespace DB ColumnUInt8::MutablePtr col_res = ColumnUInt8::create(input_rows_count); ColumnUInt8::Container & vec_res = col_res->getData(); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { const auto cidr = parseIPWithCIDR(col_cidr.getDataAt(i)); vec_res[i] = isAddressInRange(addr, cidr) ? 1 : 0; @@ -227,7 +227,7 @@ namespace DB ColumnUInt8::MutablePtr col_res = ColumnUInt8::create(input_rows_count); ColumnUInt8::Container & vec_res = col_res->getData(); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { const auto addr = IPAddressVariant(col_addr.getDataAt(i)); vec_res[i] = isAddressInRange(addr, cidr) ? 1 : 0; @@ -241,7 +241,7 @@ namespace DB ColumnUInt8::MutablePtr col_res = ColumnUInt8::create(input_rows_count); ColumnUInt8::Container & vec_res = col_res->getData(); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { const auto addr = IPAddressVariant(col_addr.getDataAt(i)); const auto cidr = parseIPWithCIDR(col_cidr.getDataAt(i)); diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 03a9da404c2..4e242c4348b 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -310,7 +310,7 @@ public: FunctionLike func_like; - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { size_t element_start_row = row != 0 ? column_array.getOffsets()[row-1] : 0; size_t elem_size = column_array.getOffsets()[row]- element_start_row; @@ -457,7 +457,7 @@ public: IColumn::Offset current_offset = 0; - for (size_t row = 0; row < input_rows_count; row++) + for (size_t row = 0; row < input_rows_count; ++row) { size_t element_start_row = row != 0 ? nested_column.getOffsets()[row-1] : 0; size_t element_size = nested_column.getOffsets()[row]- element_start_row; @@ -492,7 +492,7 @@ public: auto res = func_like.executeImpl(new_arguments, result_type, input_rows_count); const auto & container = checkAndGetColumn(res.get())->getData(); - for (size_t row_num = 0; row_num < element_size; row_num++) + for (size_t row_num = 0; row_num < element_size; ++row_num) { if (container[row_num] == 1) { diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 3e5242d5f9b..070a7c2f05e 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -262,7 +262,7 @@ public: private: static void executeShortCircuitArguments(ColumnsWithTypeAndName & arguments) { - int last_short_circuit_argument_index = checkShirtCircuitArguments(arguments); + int last_short_circuit_argument_index = checkShortCircuitArguments(arguments); if (last_short_circuit_argument_index < 0) return; diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp index 03e46541cdf..c3a9c411cbc 100644 --- a/src/Functions/pointInPolygon.cpp +++ b/src/Functions/pointInPolygon.cpp @@ -139,7 +139,7 @@ public: } else { - for (size_t i = 1; i < arguments.size(); i++) + for (size_t i = 1; i < arguments.size(); ++i) { const auto * array = checkAndGetDataType(arguments[i].get()); if (array == nullptr) diff --git a/src/Functions/polygonArea.cpp b/src/Functions/polygonArea.cpp index 2e38d6c74b9..c4c573490f6 100644 --- a/src/Functions/polygonArea.cpp +++ b/src/Functions/polygonArea.cpp @@ -78,7 +78,7 @@ public: { auto geometries = Converter::convert(arguments[0].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) res_data.emplace_back(boost::geometry::area(geometries[i])); } } diff --git a/src/Functions/polygonConvexHull.cpp b/src/Functions/polygonConvexHull.cpp index 887a12b8b6a..e8756f11bba 100644 --- a/src/Functions/polygonConvexHull.cpp +++ b/src/Functions/polygonConvexHull.cpp @@ -75,7 +75,7 @@ public: { auto geometries = Converter::convert(arguments[0].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { Polygon convex_hull{}; boost::geometry::convex_hull(geometries[i], convex_hull); diff --git a/src/Functions/polygonPerimeter.cpp b/src/Functions/polygonPerimeter.cpp index 8291020197a..eedb91a1622 100644 --- a/src/Functions/polygonPerimeter.cpp +++ b/src/Functions/polygonPerimeter.cpp @@ -77,7 +77,7 @@ public: { auto geometries = Converter::convert(arguments[0].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) res_data.emplace_back(boost::geometry::perimeter(geometries[i])); } } diff --git a/src/Functions/polygonsDistance.cpp b/src/Functions/polygonsDistance.cpp index 8dd88e1c3bd..51c0198b465 100644 --- a/src/Functions/polygonsDistance.cpp +++ b/src/Functions/polygonsDistance.cpp @@ -83,7 +83,7 @@ public: auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); auto second = RightConverter::convert(arguments[1].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { boost::geometry::correct(first[i]); boost::geometry::correct(second[i]); diff --git a/src/Functions/polygonsEquals.cpp b/src/Functions/polygonsEquals.cpp index da1db43229b..5c572a16d0e 100644 --- a/src/Functions/polygonsEquals.cpp +++ b/src/Functions/polygonsEquals.cpp @@ -82,7 +82,7 @@ public: auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); auto second = RightConverter::convert(arguments[1].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { boost::geometry::correct(first[i]); boost::geometry::correct(second[i]); diff --git a/src/Functions/polygonsSymDifference.cpp b/src/Functions/polygonsSymDifference.cpp index 8ef0142072a..4f718760124 100644 --- a/src/Functions/polygonsSymDifference.cpp +++ b/src/Functions/polygonsSymDifference.cpp @@ -81,7 +81,7 @@ public: auto second = RightConverter::convert(arguments[1].column->convertToFullColumnIfConst()); /// NOLINTNEXTLINE(clang-analyzer-core.uninitialized.Assign) - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { boost::geometry::correct(first[i]); boost::geometry::correct(second[i]); diff --git a/src/Functions/polygonsUnion.cpp b/src/Functions/polygonsUnion.cpp index 770aa14ac52..e0c6f208c91 100644 --- a/src/Functions/polygonsUnion.cpp +++ b/src/Functions/polygonsUnion.cpp @@ -82,7 +82,7 @@ public: /// We are not interested in some pitfalls in third-party libraries /// NOLINTNEXTLINE(clang-analyzer-core.uninitialized.Assign) - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { /// Orient the polygons correctly. boost::geometry::correct(first[i]); diff --git a/src/Functions/polygonsWithin.cpp b/src/Functions/polygonsWithin.cpp index 66e5b4e6e17..0412c9a656d 100644 --- a/src/Functions/polygonsWithin.cpp +++ b/src/Functions/polygonsWithin.cpp @@ -85,7 +85,7 @@ public: auto second = RightConverter::convert(arguments[1].column->convertToFullColumnIfConst()); /// NOLINTNEXTLINE(clang-analyzer-core.uninitialized.Assign) - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { boost::geometry::correct(first[i]); boost::geometry::correct(second[i]); diff --git a/src/Functions/readWkt.cpp b/src/Functions/readWkt.cpp index c3ae6516e0f..b8d0d20acb3 100644 --- a/src/Functions/readWkt.cpp +++ b/src/Functions/readWkt.cpp @@ -55,7 +55,7 @@ public: Serializer serializer; Geometry geometry; - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { const auto & str = column_string->getDataAt(i).toString(); boost::geometry::read_wkt(str, geometry); diff --git a/src/Functions/s2CapContains.cpp b/src/Functions/s2CapContains.cpp index c3ebbf0d251..100b028646c 100644 --- a/src/Functions/s2CapContains.cpp +++ b/src/Functions/s2CapContains.cpp @@ -91,7 +91,7 @@ public: auto & dst_data = dst->getData(); dst_data.reserve(input_rows_count); - for (const auto row : collections::range(0, input_rows_count)) + for (size_t row=0 ; row < input_rows_count; ++row) { const auto center = S2CellId(col_center->getUInt(row)); const Float64 degrees = col_degrees->getFloat64(row); diff --git a/src/Functions/s2CapUnion.cpp b/src/Functions/s2CapUnion.cpp index 2328db4cb52..263163963af 100644 --- a/src/Functions/s2CapUnion.cpp +++ b/src/Functions/s2CapUnion.cpp @@ -95,7 +95,7 @@ public: auto & vec_res_radius = col_res_radius->getData(); vec_res_radius.reserve(input_rows_count); - for (const auto row : collections::range(0, input_rows_count)) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 first_center = col_center1->getUInt(row); const Float64 first_radius = col_radius1->getFloat64(row); diff --git a/src/Functions/s2CellsIntersect.cpp b/src/Functions/s2CellsIntersect.cpp index d7801afe0d0..f8273a1fcca 100644 --- a/src/Functions/s2CellsIntersect.cpp +++ b/src/Functions/s2CellsIntersect.cpp @@ -72,7 +72,7 @@ public: auto & dst_data = dst->getData(); dst_data.reserve(input_rows_count); - for (const auto row : collections::range(0, input_rows_count)) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 id_first = col_id_first->getInt(row); const UInt64 id_second = col_id_second->getInt(row); diff --git a/src/Functions/s2GetNeighbors.cpp b/src/Functions/s2GetNeighbors.cpp index 99c1395f3cd..c0b2e634e6f 100644 --- a/src/Functions/s2GetNeighbors.cpp +++ b/src/Functions/s2GetNeighbors.cpp @@ -72,7 +72,7 @@ public: dst_offsets.resize(input_rows_count); size_t current_offset = 0; - for (const auto row : collections::range(0, input_rows_count)) + for (size_t row = 0; row < input_rows_count; ++row) { const UInt64 id = col_id->getUInt(row); diff --git a/src/Functions/s2RectAdd.cpp b/src/Functions/s2RectAdd.cpp index 9a6fcd25e5a..f7c39b2a6b1 100644 --- a/src/Functions/s2RectAdd.cpp +++ b/src/Functions/s2RectAdd.cpp @@ -77,7 +77,7 @@ public: auto & vec_res_second = col_res_second->getData(); vec_res_second.reserve(input_rows_count); - for (const auto row : collections::range(0, input_rows_count)) + for (size_t row = 0; row < input_rows_count; ++row) { const auto lo = S2CellId(col_lo->getUInt(row)); const auto hi = S2CellId(col_hi->getUInt(row)); diff --git a/src/Functions/s2RectContains.cpp b/src/Functions/s2RectContains.cpp index 11db27e68ca..90ced5450bc 100644 --- a/src/Functions/s2RectContains.cpp +++ b/src/Functions/s2RectContains.cpp @@ -70,7 +70,7 @@ public: auto & dst_data = dst->getData(); dst_data.reserve(input_rows_count); - for (const auto row : collections::range(0, input_rows_count)) + for (size_t row = 0; row < input_rows_count; ++row) { const auto lo = S2CellId(col_lo->getUInt(row)); const auto hi = S2CellId(col_hi->getUInt(row)); diff --git a/src/Functions/s2RectIntersection.cpp b/src/Functions/s2RectIntersection.cpp index a8a4536c9e7..b108cc1b64f 100644 --- a/src/Functions/s2RectIntersection.cpp +++ b/src/Functions/s2RectIntersection.cpp @@ -81,7 +81,7 @@ public: auto & vec_res_second = col_res_second->getData(); vec_res_second.reserve(input_rows_count); - for (const auto row : collections::range(0, input_rows_count)) + for (size_t row = 0; row < input_rows_count; ++row) { const auto lo1 = S2CellId(col_lo1->getUInt(row)); const auto hi1 = S2CellId(col_hi1->getUInt(row)); diff --git a/src/Functions/s2RectUnion.cpp b/src/Functions/s2RectUnion.cpp index f187c068345..bd40a747a09 100644 --- a/src/Functions/s2RectUnion.cpp +++ b/src/Functions/s2RectUnion.cpp @@ -79,7 +79,7 @@ public: auto & vec_res_second = col_res_second->getData(); vec_res_second.reserve(input_rows_count); - for (const auto row : collections::range(0, input_rows_count)) + for (size_t row = 0; row < input_rows_count; ++row) { const auto lo1 = S2CellId(col_lo1->getUInt(row)); const auto hi1 = S2CellId(col_hi1->getUInt(row)); diff --git a/src/Functions/s2ToGeo.cpp b/src/Functions/s2ToGeo.cpp index 032fdbfe323..03a67d49e45 100644 --- a/src/Functions/s2ToGeo.cpp +++ b/src/Functions/s2ToGeo.cpp @@ -78,7 +78,7 @@ public: auto & latitude = col_latitude->getData(); latitude.reserve(input_rows_count); - for (const auto row : collections::range(0, input_rows_count)) + for (size_t row = 0; row < input_rows_count; ++row) { const auto id = S2CellId(col_id->getUInt(row)); diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index 87d5f955e88..e277c906c1b 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #if defined(OS_LINUX) # include diff --git a/src/Functions/svg.cpp b/src/Functions/svg.cpp index b3a89c0393c..e1d48ffc061 100644 --- a/src/Functions/svg.cpp +++ b/src/Functions/svg.cpp @@ -79,7 +79,7 @@ public: auto figures = Converter::convert(arguments[0].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { std::stringstream str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM boost::geometry::correct(figures[i]); diff --git a/src/Functions/throwIf.cpp b/src/Functions/throwIf.cpp index d499f1f492f..7533e30c9b9 100644 --- a/src/Functions/throwIf.cpp +++ b/src/Functions/throwIf.cpp @@ -48,36 +48,53 @@ public: const size_t number_of_arguments = arguments.size(); if (number_of_arguments < 1 || number_of_arguments > 2) - throw Exception{"Number of arguments for function " + getName() + " doesn't match: passed " - + toString(number_of_arguments) + ", should be 1 or 2", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2", + getName(), + toString(number_of_arguments)); if (!isNativeNumber(arguments[0])) - throw Exception{"Argument for function " + getName() + " must be number", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument for function {} must be number", + getName()); if (number_of_arguments > 1 && !isString(arguments[1])) - throw Exception{"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}", + arguments[1]->getName(), + getName()); return std::make_shared(); } - bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForConstants() const override { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + /** Prevent constant folding for FunctionThrowIf because for short circuit evaluation + * it is unsafe to evaluate this function during DAG analysis. + */ + bool isSuitableForConstantFolding() const override { return false; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { + if (input_rows_count == 0) + return result_type->createColumn(); + std::optional custom_message; if (arguments.size() == 2) { - const auto * msg_column = checkAndGetColumnConst(arguments[1].column.get()); - if (!msg_column) - throw Exception{"Second argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_COLUMN}; - custom_message = msg_column->getValue(); + const auto * message_column = checkAndGetColumnConst(arguments[1].column.get()); + if (!message_column) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Second argument for function {} must be constant String", + getName()); + + custom_message = message_column->getValue(); } - const auto * in = arguments.front().column.get(); + auto first_argument_column = arguments.front().column; + const auto * in = first_argument_column.get(); ColumnPtr res; if (!((res = execute(in, custom_message)) @@ -90,7 +107,9 @@ public: || (res = execute(in, custom_message)) || (res = execute(in, custom_message)) || (res = execute(in, custom_message)))) + { throw Exception{"Illegal column " + in->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; + } return res; } @@ -98,15 +117,22 @@ public: template ColumnPtr execute(const IColumn * in_untyped, const std::optional & message) const { - if (const auto in = checkAndGetColumn>(in_untyped)) + const auto * in = checkAndGetColumn>(in_untyped); + + if (!in) + in = checkAndGetColumnConstData>(in_untyped); + + if (in) { const auto & in_data = in->getData(); if (!memoryIsZero(in_data.data(), in_data.size() * sizeof(in_data[0]))) - throw Exception{message.value_or("Value passed to '" + getName() + "' function is non zero"), - ErrorCodes::FUNCTION_THROW_IF_VALUE_IS_NON_ZERO}; + { + throw Exception(ErrorCodes::FUNCTION_THROW_IF_VALUE_IS_NON_ZERO, + message.value_or("Value passed to '" + getName() + "' function is non zero")); + } /// We return non constant to avoid constant folding. - return ColumnUInt8::create(in_data.size(), 0); + return ColumnUInt8::create(in_data.size(), 0); } return nullptr; diff --git a/src/Functions/timezoneOf.cpp b/src/Functions/timezoneOf.cpp index a6556bdb800..03c9e27a3a8 100644 --- a/src/Functions/timezoneOf.cpp +++ b/src/Functions/timezoneOf.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index ecc3b80f088..f8ea44851b6 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/Functions/today.cpp b/src/Functions/today.cpp index fb9fd945239..fe63197d127 100644 --- a/src/Functions/today.cpp +++ b/src/Functions/today.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/src/Functions/wkt.cpp b/src/Functions/wkt.cpp index 8fbb8f59d33..732441eeef2 100644 --- a/src/Functions/wkt.cpp +++ b/src/Functions/wkt.cpp @@ -49,7 +49,7 @@ public: auto figures = Converter::convert(arguments[0].column->convertToFullColumnIfConst()); - for (size_t i = 0; i < input_rows_count; i++) + for (size_t i = 0; i < input_rows_count; ++i) { std::stringstream str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM str << boost::geometry::wkt(figures[i]); diff --git a/src/Functions/yesterday.cpp b/src/Functions/yesterday.cpp index f792f885472..364d4721b34 100644 --- a/src/Functions/yesterday.cpp +++ b/src/Functions/yesterday.cpp @@ -1,4 +1,4 @@ -#include +#include #include diff --git a/src/IO/AIO.cpp b/src/IO/AIO.cpp index 777d9bbbc7f..97e5a470463 100644 --- a/src/IO/AIO.cpp +++ b/src/IO/AIO.cpp @@ -95,7 +95,7 @@ int io_destroy(int ctx) int io_submit(int ctx, long nr, struct iocb * iocbpp[]) { - for (long i = 0; i < nr; i++) + for (long i = 0; i < nr; ++i) { struct aiocb * iocb = &iocbpp[i]->aio; diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp index b2be45471c8..a27c9035c61 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp @@ -69,7 +69,8 @@ bool AsynchronousReadBufferFromFileDescriptor::nextImpl() { Stopwatch watch; CurrentMetrics::Increment metric_increment{CurrentMetrics::AsynchronousReadWait}; - size = prefetch_future.get(); + auto result = prefetch_future.get(); + size = result.size; ProfileEvents::increment(ProfileEvents::AsynchronousReadWaitMicroseconds, watch.elapsedMicroseconds()); } @@ -90,7 +91,7 @@ bool AsynchronousReadBufferFromFileDescriptor::nextImpl() { /// No pending request. Do synchronous read. - auto size = readInto(memory.data(), memory.size()).get(); + auto [size, _] = readInto(memory.data(), memory.size()).get(); file_offset_of_buffer_end += size; if (size) @@ -201,4 +202,3 @@ void AsynchronousReadBufferFromFileDescriptor::rewind() } } - diff --git a/src/IO/AsynchronousReader.h b/src/IO/AsynchronousReader.h index e4a81623205..e79e72f3bec 100644 --- a/src/IO/AsynchronousReader.h +++ b/src/IO/AsynchronousReader.h @@ -49,10 +49,18 @@ public: size_t ignore = 0; }; - /// Less than requested amount of data can be returned. - /// If size is zero - the file has ended. - /// (for example, EINTR must be handled by implementation automatically) - using Result = size_t; + struct Result + { + /// size + /// Less than requested amount of data can be returned. + /// If size is zero - the file has ended. + /// (for example, EINTR must be handled by implementation automatically) + size_t size = 0; + + /// offset + /// Optional. Useful when implementation needs to do ignore(). + size_t offset = 0; + }; /// Submit request and obtain a handle. This method don't perform any waits. /// If this method did not throw, the caller must wait for the result with 'wait' method diff --git a/src/IO/LimitReadBuffer.cpp b/src/IO/LimitReadBuffer.cpp index 9daffa3a1d3..30914f9b798 100644 --- a/src/IO/LimitReadBuffer.cpp +++ b/src/IO/LimitReadBuffer.cpp @@ -29,7 +29,8 @@ bool LimitReadBuffer::nextImpl() if (!in->next()) { - working_buffer = in->buffer(); + /// Clearing the buffer with existing data. + set(in->position(), 0); return false; } diff --git a/src/IO/Lz4DeflatingWriteBuffer.cpp b/src/IO/Lz4DeflatingWriteBuffer.cpp index 5d9c5d40e6f..da954b13df9 100644 --- a/src/IO/Lz4DeflatingWriteBuffer.cpp +++ b/src/IO/Lz4DeflatingWriteBuffer.cpp @@ -54,14 +54,19 @@ void Lz4DeflatingWriteBuffer::nextImpl() in_data = reinterpret_cast(working_buffer.begin()); in_capacity = offset(); + out_capacity = out->buffer().end() - out->position(); + out_data = reinterpret_cast(out->position()); + try { if (first_time) { - out->nextIfAtEnd(); - - out_data = reinterpret_cast(out->position()); - out_capacity = out->buffer().end() - out->position(); + if (out_capacity < LZ4F_HEADER_SIZE_MAX) + { + out->next(); + out_capacity = out->buffer().end() - out->position(); + out_data = reinterpret_cast(out->position()); + } /// write frame header and check for errors size_t header_size = LZ4F_compressBegin(ctx, out_data, out_capacity, &kPrefs); @@ -74,24 +79,29 @@ void Lz4DeflatingWriteBuffer::nextImpl() out_capacity -= header_size; out->position() = out->buffer().end() - out_capacity; + out_data = reinterpret_cast(out->position()); + first_time = false; } do { /// Ensure that there is enough space for compressed block of minimal size - if (out_capacity < LZ4F_compressBound(0, &kPrefs)) + size_t min_compressed_block_size = LZ4F_compressBound(1, &kPrefs); + if (out_capacity < min_compressed_block_size) { out->next(); out_capacity = out->buffer().end() - out->position(); + out_data = reinterpret_cast(out->position()); } - out_data = reinterpret_cast(out->position()); - /// LZ4F_compressUpdate compresses whole input buffer at once so we need to shink it manually size_t cur_buffer_size = in_capacity; - while (out_capacity < LZ4F_compressBound(cur_buffer_size, &kPrefs)) - cur_buffer_size /= 2; + if (out_capacity >= min_compressed_block_size) /// We cannot shrink the input buffer if it's already too small. + { + while (out_capacity < LZ4F_compressBound(cur_buffer_size, &kPrefs)) + cur_buffer_size /= 2; + } size_t compressed_size = LZ4F_compressUpdate(ctx, out_data, out_capacity, in_data, cur_buffer_size, nullptr); @@ -101,11 +111,12 @@ void Lz4DeflatingWriteBuffer::nextImpl() "LZ4 failed to encode stream. LZ4F version: {}", LZ4F_VERSION); - out_capacity -= compressed_size; in_capacity -= cur_buffer_size; - in_data = reinterpret_cast(working_buffer.end() - in_capacity); + + out_capacity -= compressed_size; out->position() = out->buffer().end() - out_capacity; + out_data = reinterpret_cast(out->position()); } while (in_capacity > 0); } @@ -120,14 +131,16 @@ void Lz4DeflatingWriteBuffer::finalizeBefore() { next(); + out_capacity = out->buffer().end() - out->position(); + out_data = reinterpret_cast(out->position()); + if (out_capacity < LZ4F_compressBound(0, &kPrefs)) { out->next(); out_capacity = out->buffer().end() - out->position(); + out_data = reinterpret_cast(out->position()); } - out_data = reinterpret_cast(out->position()); - /// compression end size_t end_size = LZ4F_compressEnd(ctx, out_data, out_capacity, nullptr); @@ -139,6 +152,7 @@ void Lz4DeflatingWriteBuffer::finalizeBefore() out_capacity -= end_size; out->position() = out->buffer().end() - out_capacity; + out_data = reinterpret_cast(out->position()); } void Lz4DeflatingWriteBuffer::finalizeAfter() diff --git a/src/IO/ReadBufferFromBlobStorage.cpp b/src/IO/ReadBufferFromAzureBlobStorage.cpp similarity index 88% rename from src/IO/ReadBufferFromBlobStorage.cpp rename to src/IO/ReadBufferFromAzureBlobStorage.cpp index ada462f0b87..0ce6db97437 100644 --- a/src/IO/ReadBufferFromBlobStorage.cpp +++ b/src/IO/ReadBufferFromAzureBlobStorage.cpp @@ -4,7 +4,7 @@ #if USE_AZURE_BLOB_STORAGE -#include +#include #include #include #include @@ -22,7 +22,7 @@ namespace ErrorCodes } -ReadBufferFromBlobStorage::ReadBufferFromBlobStorage( +ReadBufferFromAzureBlobStorage::ReadBufferFromAzureBlobStorage( std::shared_ptr blob_container_client_, const String & path_, size_t max_single_read_retries_, @@ -48,7 +48,7 @@ ReadBufferFromBlobStorage::ReadBufferFromBlobStorage( } -bool ReadBufferFromBlobStorage::nextImpl() +bool ReadBufferFromAzureBlobStorage::nextImpl() { if (read_until_position) { @@ -81,9 +81,9 @@ bool ReadBufferFromBlobStorage::nextImpl() } catch (const Azure::Storage::StorageException & e) { - LOG_INFO(log, "Exception caught during Azure Read for file {} at attempt {} : {}", path, i, e.Message); + LOG_INFO(log, "Exception caught during Azure Read for file {} at attempt {}: {}", path, i, e.Message); if (i + 1 == max_single_read_retries) - throw e; + throw; sleepForMilliseconds(sleep_time_with_backoff_milliseconds); sleep_time_with_backoff_milliseconds *= 2; @@ -102,7 +102,7 @@ bool ReadBufferFromBlobStorage::nextImpl() } -off_t ReadBufferFromBlobStorage::seek(off_t offset_, int whence) +off_t ReadBufferFromAzureBlobStorage::seek(off_t offset_, int whence) { if (initialized) throw Exception("Seek is allowed only before first read attempt from the buffer.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); @@ -119,13 +119,13 @@ off_t ReadBufferFromBlobStorage::seek(off_t offset_, int whence) } -off_t ReadBufferFromBlobStorage::getPosition() +off_t ReadBufferFromAzureBlobStorage::getPosition() { return offset - available(); } -void ReadBufferFromBlobStorage::initialize() +void ReadBufferFromAzureBlobStorage::initialize() { if (initialized) return; @@ -149,11 +149,11 @@ void ReadBufferFromBlobStorage::initialize() data_stream = std::move(download_response.Value.BodyStream); break; } - catch (const Azure::Storage::StorageException & e) + catch (const Azure::Core::RequestFailedException & e) { - LOG_INFO(log, "Exception caught during Azure Download for file {} at offset {} at attempt {} : {}", path, offset, i, e.Message); + LOG_INFO(log, "Exception caught during Azure Download for file {} at offset {} at attempt {} : {}", path, offset, i + 1, e.Message); if (i + 1 == max_single_download_retries) - throw e; + throw; sleepForMilliseconds(sleep_time_with_backoff_milliseconds); sleep_time_with_backoff_milliseconds *= 2; diff --git a/src/IO/ReadBufferFromBlobStorage.h b/src/IO/ReadBufferFromAzureBlobStorage.h similarity index 87% rename from src/IO/ReadBufferFromBlobStorage.h rename to src/IO/ReadBufferFromAzureBlobStorage.h index cd66e897e25..53749ad3199 100644 --- a/src/IO/ReadBufferFromBlobStorage.h +++ b/src/IO/ReadBufferFromAzureBlobStorage.h @@ -14,11 +14,11 @@ namespace DB { -class ReadBufferFromBlobStorage : public SeekableReadBuffer +class ReadBufferFromAzureBlobStorage : public SeekableReadBuffer { public: - explicit ReadBufferFromBlobStorage( + explicit ReadBufferFromAzureBlobStorage( std::shared_ptr blob_container_client_, const String & path_, size_t max_single_read_retries_, @@ -55,7 +55,7 @@ private: char * data_ptr; size_t data_capacity; - Poco::Logger * log = &Poco::Logger::get("ReadBufferFromBlobStorage"); + Poco::Logger * log = &Poco::Logger::get("ReadBufferFromAzureBlobStorage"); }; } diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 53d2067780e..f01640cb95b 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -235,12 +235,13 @@ std::unique_ptr ReadBufferFromS3::initialize() throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1); req.SetRange(fmt::format("bytes={}-{}", offset, read_until_position - 1)); - LOG_DEBUG(log, "Read S3 object. Bucket: {}, Key: {}, Range: {}-{}", bucket, key, offset, read_until_position - 1); + LOG_TEST(log, "Read S3 object. Bucket: {}, Key: {}, Range: {}-{}", bucket, key, offset, read_until_position - 1); } else { - req.SetRange(fmt::format("bytes={}-", offset)); - LOG_DEBUG(log, "Read S3 object. Bucket: {}, Key: {}, Offset: {}", bucket, key, offset); + if (offset) + req.SetRange(fmt::format("bytes={}-", offset)); + LOG_TEST(log, "Read S3 object. Bucket: {}, Key: {}, Offset: {}", bucket, key, offset); } Aws::S3::Model::GetObjectOutcome outcome = client_ptr->GetObject(req); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index c48306cf6d3..b2ad4035cdc 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -8,9 +8,9 @@ #include -#include -#include -#include +#include +#include +#include #include #include #include @@ -899,13 +899,8 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re { /// Unix timestamp with subsecond precision, already scaled to integer. /// For disambiguation we support only time since 2001-09-09 01:46:40 UTC and less than 30 000 years in future. - - for (size_t i = 0; i < scale; ++i) - { - components.fractional *= 10; - components.fractional += components.whole % 10; - components.whole /= 10; - } + components.fractional = components.whole % common::exp10_i32(scale); + components.whole = components.whole / common::exp10_i32(scale); } datetime64 = DecimalUtils::decimalFromComponents(components, scale); diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 68bdbc9cf86..25b03d66097 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -119,7 +119,7 @@ void PocoHTTPClient::makeRequestInternal( Poco::Logger * log = &Poco::Logger::get("AWSClient"); auto uri = request.GetUri().GetURIString(); - LOG_DEBUG(log, "Make request to: {}", uri); + LOG_TEST(log, "Make request to: {}", uri); enum class S3MetricType { @@ -251,7 +251,7 @@ void PocoHTTPClient::makeRequestInternal( if (request.GetContentBody()) { - LOG_TRACE(log, "Writing request body."); + LOG_TEST(log, "Writing request body."); if (attempt > 0) /// rewind content body buffer. { @@ -259,24 +259,24 @@ void PocoHTTPClient::makeRequestInternal( request.GetContentBody()->seekg(0); } auto size = Poco::StreamCopier::copyStream(*request.GetContentBody(), request_body_stream); - LOG_DEBUG(log, "Written {} bytes to request body", size); + LOG_TEST(log, "Written {} bytes to request body", size); } - LOG_TRACE(log, "Receiving response..."); + LOG_TEST(log, "Receiving response..."); auto & response_body_stream = session->receiveResponse(poco_response); watch.stop(); ProfileEvents::increment(select_metric(S3MetricType::Microseconds), watch.elapsedMicroseconds()); int status_code = static_cast(poco_response.getStatus()); - LOG_DEBUG(log, "Response status: {}, {}", status_code, poco_response.getReason()); + LOG_TEST(log, "Response status: {}, {}", status_code, poco_response.getReason()); if (poco_response.getStatus() == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT) { auto location = poco_response.get("location"); remote_host_filter.checkURL(Poco::URI(location)); uri = location; - LOG_DEBUG(log, "Redirecting request to new location: {}", location); + LOG_TEST(log, "Redirecting request to new location: {}", location); ProfileEvents::increment(select_metric(S3MetricType::Redirects)); @@ -292,7 +292,7 @@ void PocoHTTPClient::makeRequestInternal( response->AddHeader(header_name, header_value); headers_ss << header_name << ": " << header_value << "; "; } - LOG_DEBUG(log, "Received headers: {}", headers_ss.str()); + LOG_TEST(log, "Received headers: {}", headers_ss.str()); if (status_code == 429 || status_code == 503) { // API throttling diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 41b2b1f059a..432dc443300 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -51,8 +51,8 @@ const std::pair & convertLogLevel(Aws::U {Aws::Utils::Logging::LogLevel::Error, {DB::LogsLevel::error, Poco::Message::PRIO_ERROR}}, {Aws::Utils::Logging::LogLevel::Warn, {DB::LogsLevel::warning, Poco::Message::PRIO_WARNING}}, {Aws::Utils::Logging::LogLevel::Info, {DB::LogsLevel::information, Poco::Message::PRIO_INFORMATION}}, - {Aws::Utils::Logging::LogLevel::Debug, {DB::LogsLevel::debug, Poco::Message::PRIO_DEBUG}}, - {Aws::Utils::Logging::LogLevel::Trace, {DB::LogsLevel::trace, Poco::Message::PRIO_TRACE}}, + {Aws::Utils::Logging::LogLevel::Debug, {DB::LogsLevel::debug, Poco::Message::PRIO_TEST}}, + {Aws::Utils::Logging::LogLevel::Trace, {DB::LogsLevel::trace, Poco::Message::PRIO_TEST}}, }; return mapping.at(log_level); } diff --git a/src/IO/SynchronousReader.cpp b/src/IO/SynchronousReader.cpp index 599299ddad4..4414da28d28 100644 --- a/src/IO/SynchronousReader.cpp +++ b/src/IO/SynchronousReader.cpp @@ -82,10 +82,9 @@ std::future SynchronousReader::submit(Request reque watch.stop(); ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); - return bytes_read; + return Result{ .size = bytes_read, .offset = 0}; + }); } } - - diff --git a/src/IO/ThreadPoolReader.cpp b/src/IO/ThreadPoolReader.cpp index 32bc13ecb75..63bc8fe7c49 100644 --- a/src/IO/ThreadPoolReader.cpp +++ b/src/IO/ThreadPoolReader.cpp @@ -117,7 +117,7 @@ std::future ThreadPoolReader::submit(Request reques if (!res) { /// The file has ended. - promise.set_value(0); + promise.set_value({0, 0}); watch.stop(); ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheHitElapsedMicroseconds, watch.elapsedMicroseconds()); @@ -176,7 +176,7 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheHitElapsedMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); - promise.set_value(bytes_read); + promise.set_value({bytes_read, 0}); return future; } } @@ -219,7 +219,7 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMissElapsedMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); - return bytes_read; + return Result{ .size = bytes_read, .offset = 0 }; }); auto future = task->get_future(); diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h index 9ceed533855..4d7f300a504 100644 --- a/src/IO/WriteBuffer.h +++ b/src/IO/WriteBuffer.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include diff --git a/src/IO/WriteBufferFromBlobStorage.cpp b/src/IO/WriteBufferFromAzureBlobStorage.cpp similarity index 60% rename from src/IO/WriteBufferFromBlobStorage.cpp rename to src/IO/WriteBufferFromAzureBlobStorage.cpp index 2e63c8c4565..88882fcef65 100644 --- a/src/IO/WriteBufferFromBlobStorage.cpp +++ b/src/IO/WriteBufferFromAzureBlobStorage.cpp @@ -4,15 +4,16 @@ #if USE_AZURE_BLOB_STORAGE -#include +#include #include #include +#include namespace DB { -WriteBufferFromBlobStorage::WriteBufferFromBlobStorage( +WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( std::shared_ptr blob_container_client_, const String & blob_path_, size_t max_single_part_upload_size_, @@ -23,13 +24,32 @@ WriteBufferFromBlobStorage::WriteBufferFromBlobStorage( blob_path(blob_path_) {} -WriteBufferFromBlobStorage::~WriteBufferFromBlobStorage() +WriteBufferFromAzureBlobStorage::~WriteBufferFromAzureBlobStorage() { finalize(); } +void WriteBufferFromAzureBlobStorage::finalizeImpl() +{ + const size_t max_tries = 3; + for (size_t i = 0; i < max_tries; ++i) + { + try + { + next(); + break; + } + catch (const Azure::Core::RequestFailedException & e) + { + if (i == max_tries - 1) + throw; + LOG_INFO(&Poco::Logger::get("WriteBufferFromAzureBlobStorage"), + "Exception caught during finalizing azure storage write at attempt {}: {}", i + 1, e.Message); + } + } +} -void WriteBufferFromBlobStorage::nextImpl() +void WriteBufferFromAzureBlobStorage::nextImpl() { if (!offset()) return; @@ -39,6 +59,7 @@ void WriteBufferFromBlobStorage::nextImpl() auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); size_t read = 0; + std::vector block_ids; while (read < len) { auto part_len = std::min(len - read, max_single_part_upload_size); @@ -51,16 +72,8 @@ void WriteBufferFromBlobStorage::nextImpl() read += part_len; } -} - -void WriteBufferFromBlobStorage::finalizeImpl() -{ - next(); - - auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); block_blob_client.CommitBlockList(block_ids); - finalized = true; } } diff --git a/src/IO/WriteBufferFromBlobStorage.h b/src/IO/WriteBufferFromAzureBlobStorage.h similarity index 78% rename from src/IO/WriteBufferFromBlobStorage.h rename to src/IO/WriteBufferFromAzureBlobStorage.h index 5f8eaba3c0c..cbbfb577a91 100644 --- a/src/IO/WriteBufferFromBlobStorage.h +++ b/src/IO/WriteBufferFromAzureBlobStorage.h @@ -17,25 +17,23 @@ namespace DB { -class WriteBufferFromBlobStorage : public BufferWithOwnMemory +class WriteBufferFromAzureBlobStorage : public BufferWithOwnMemory { public: - explicit WriteBufferFromBlobStorage( + explicit WriteBufferFromAzureBlobStorage( std::shared_ptr blob_container_client_, const String & blob_path_, size_t max_single_part_upload_size_, size_t buf_size_); - ~WriteBufferFromBlobStorage() override; + ~WriteBufferFromAzureBlobStorage() override; void nextImpl() override; private: - void finalizeImpl() override; - std::vector block_ids; std::shared_ptr blob_container_client; size_t max_single_part_upload_size; const String blob_path; diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 5498e1c90f3..ca2c202014c 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -9,9 +9,9 @@ #include -#include -#include -#include +#include +#include +#include #include #include #include diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 274a4ecc2f2..4f00ec5f96d 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/IO/tests/gtest_DateTimeToString.cpp b/src/IO/tests/gtest_DateTimeToString.cpp index c30c8943944..2d878fdd548 100644 --- a/src/IO/tests/gtest_DateTimeToString.cpp +++ b/src/IO/tests/gtest_DateTimeToString.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index efea8e9d0f7..ae5ce117c61 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -573,6 +574,14 @@ void Aggregator::createAggregateStates(AggregateDataPtr & aggregate_data) const } } +bool Aggregator::hasSparseArguments(AggregateFunctionInstruction * aggregate_instructions) +{ + for (auto * inst = aggregate_instructions; inst->that; ++inst) + if (inst->has_sparse_arguments) + return true; + return false; +} + /** It's interesting - if you remove `noinline`, then gcc for some reason will inline this function, and the performance decreases (~ 10%). * (Probably because after the inline of this function, more internal functions no longer be inlined.) * Inline does not make sense, since the inner loop is entirely inside this function. @@ -592,7 +601,7 @@ void NO_INLINE Aggregator::executeImpl( if (!no_more_keys) { #if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder) + if (compiled_aggregate_functions_holder && !hasSparseArguments(aggregate_instructions)) { executeImplBatch(method, state, aggregates_pool, rows, aggregate_instructions, overflow_row); } @@ -644,7 +653,7 @@ void NO_INLINE Aggregator::executeImplBatch( } } - if (!has_arrays) + if (!has_arrays && !hasSparseArguments(aggregate_instructions)) { for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst) { @@ -770,6 +779,8 @@ void NO_INLINE Aggregator::executeImplBatch( if (inst->offsets) inst->batch_that->addBatchArray(rows, places.get(), inst->state_offset, inst->batch_arguments, inst->offsets, aggregates_pool); + else if (inst->has_sparse_arguments) + inst->batch_that->addBatchSparse(places.get(), inst->state_offset, inst->batch_arguments, aggregates_pool); else inst->batch_that->addBatch(rows, places.get(), inst->state_offset, inst->batch_arguments, aggregates_pool); } @@ -835,6 +846,8 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl( if (inst->offsets) inst->batch_that->addBatchSinglePlace( inst->offsets[static_cast(rows - 1)], res + inst->state_offset, inst->batch_arguments, arena); + else if (inst->has_sparse_arguments) + inst->batch_that->addBatchSparseSinglePlace(res + inst->state_offset, inst->batch_arguments, arena); else inst->batch_that->addBatchSinglePlace(rows, res + inst->state_offset, inst->batch_arguments, arena); } @@ -870,19 +883,30 @@ void Aggregator::prepareAggregateInstructions(Columns columns, AggregateColumns for (size_t i = 0; i < params.aggregates_size; ++i) { + bool allow_sparse_arguments = aggregate_columns[i].size() == 1; + bool has_sparse_arguments = false; + for (size_t j = 0; j < aggregate_columns[i].size(); ++j) { materialized_columns.push_back(columns.at(params.aggregates[i].arguments[j])->convertToFullColumnIfConst()); aggregate_columns[i][j] = materialized_columns.back().get(); - auto column_no_lc = recursiveRemoveLowCardinality(aggregate_columns[i][j]->getPtr()); - if (column_no_lc.get() != aggregate_columns[i][j]) + auto full_column = allow_sparse_arguments + ? aggregate_columns[i][j]->getPtr() + : recursiveRemoveSparse(aggregate_columns[i][j]->getPtr()); + + full_column = recursiveRemoveLowCardinality(full_column); + if (full_column.get() != aggregate_columns[i][j]) { - materialized_columns.emplace_back(std::move(column_no_lc)); + materialized_columns.emplace_back(std::move(full_column)); aggregate_columns[i][j] = materialized_columns.back().get(); } + + if (aggregate_columns[i][j]->isSparse()) + has_sparse_arguments = true; } + aggregate_functions_instructions[i].has_sparse_arguments = has_sparse_arguments; aggregate_functions_instructions[i].arguments = aggregate_columns[i].data(); aggregate_functions_instructions[i].state_offset = offsets_of_aggregate_states[i]; @@ -942,7 +966,7 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData /// Remember the columns we will work with for (size_t i = 0; i < params.keys_size; ++i) { - materialized_columns.push_back(columns.at(params.keys[i])->convertToFullColumnIfConst()); + materialized_columns.push_back(recursiveRemoveSparse(columns.at(params.keys[i]))->convertToFullColumnIfConst()); key_columns[i] = materialized_columns.back().get(); if (!result.isLowCardinality()) diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 3c53769e128..c79c2c5ef64 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1062,6 +1062,7 @@ private: const IAggregateFunction * batch_that{}; const IColumn ** batch_arguments{}; const UInt64 * offsets{}; + bool has_sparse_arguments = false; }; using AggregateFunctionInstructions = std::vector; @@ -1317,6 +1318,8 @@ private: AggregatedDataVariants & data_variants, Columns & key_columns, size_t key_row, MutableColumns & final_key_columns) const; + + static bool hasSparseArguments(AggregateFunctionInstruction * aggregate_instructions); }; diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 121f7c4153f..d1c5fbebbc7 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -69,12 +69,10 @@ static std::unique_ptr openFileIfExists(const std::stri AsynchronousMetrics::AsynchronousMetrics( ContextPtr global_context_, int update_period_seconds, - std::shared_ptr> servers_to_start_before_tables_, - std::shared_ptr> servers_) + const ProtocolServerMetricsFunc & protocol_server_metrics_func_) : WithContext(global_context_) , update_period(update_period_seconds) - , servers_to_start_before_tables(servers_to_start_before_tables_) - , servers(servers_) + , protocol_server_metrics_func(protocol_server_metrics_func_) , log(&Poco::Logger::get("AsynchronousMetrics")) { #if defined(OS_LINUX) @@ -238,7 +236,7 @@ void AsynchronousMetrics::start() thread = std::make_unique([this] { run(); }); } -AsynchronousMetrics::~AsynchronousMetrics() +void AsynchronousMetrics::stop() { try { @@ -249,7 +247,10 @@ AsynchronousMetrics::~AsynchronousMetrics() wait_cond.notify_one(); if (thread) + { thread->join(); + thread.reset(); + } } catch (...) { @@ -257,6 +258,11 @@ AsynchronousMetrics::~AsynchronousMetrics() } } +AsynchronousMetrics::~AsynchronousMetrics() +{ + stop(); +} + AsynchronousMetricValues AsynchronousMetrics::getValues() const { @@ -1381,22 +1387,11 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti return it->second; }; - if (servers_to_start_before_tables) + const auto server_metrics = protocol_server_metrics_func(); + for (const auto & server_metric : server_metrics) { - for (const auto & server : *servers_to_start_before_tables) - { - if (const auto * name = get_metric_name(server.getPortName())) - new_values[name] = server.currentThreads(); - } - } - - if (servers) - { - for (const auto & server : *servers) - { - if (const auto * name = get_metric_name(server.getPortName())) - new_values[name] = server.currentThreads(); - } + if (const auto * name = get_metric_name(server_metric.port_name)) + new_values[name] = server_metric.current_threads; } } diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h index 7a5c2d638d7..3c7581ce1a3 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Interpreters/AsynchronousMetrics.h @@ -30,6 +30,11 @@ class ReadBuffer; using AsynchronousMetricValue = double; using AsynchronousMetricValues = std::unordered_map; +struct ProtocolServerMetrics +{ + String port_name; + size_t current_threads; +}; /** Periodically (by default, each minute, starting at 30 seconds offset) * calculates and updates some metrics, @@ -41,24 +46,25 @@ using AsynchronousMetricValues = std::unordered_map()>; AsynchronousMetrics( ContextPtr global_context_, int update_period_seconds, - std::shared_ptr> servers_to_start_before_tables_, - std::shared_ptr> servers_); + const ProtocolServerMetricsFunc & protocol_server_metrics_func_); ~AsynchronousMetrics(); /// Separate method allows to initialize the `servers` variable beforehand. void start(); + void stop(); + /// Returns copy of all values. AsynchronousMetricValues getValues() const; private: const std::chrono::seconds update_period; - std::shared_ptr> servers_to_start_before_tables{nullptr}; - std::shared_ptr> servers{nullptr}; + ProtocolServerMetricsFunc protocol_server_metrics_func; mutable std::mutex mutex; std::condition_variable wait_cond; diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 30d0dd4cece..b7b6b84439b 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -320,13 +320,29 @@ void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_conf if (old_config) { for (const auto & key : deleted_keys) - impl.erase(key); + { + if (!automatic_clusters.contains(key)) + impl.erase(key); + } } else - impl.clear(); + { + if (!automatic_clusters.empty()) + std::erase_if(impl, [this](const auto & e) { return automatic_clusters.contains(e.first); }); + else + impl.clear(); + } + for (const auto & key : new_config_keys) { + if (new_config.has(config_prefix + "." + key + ".discovery")) + { + /// Handled in ClusterDiscovery + automatic_clusters.insert(key); + continue; + } + if (key.find('.') != String::npos) throw Exception("Cluster names with dots are not supported: '" + key + "'", ErrorCodes::SYNTAX_ERROR); diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index a64e17264b1..3773dadaf13 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -6,6 +6,8 @@ #include #include +#include +#include namespace Poco { @@ -295,12 +297,15 @@ public: void updateClusters(const Poco::Util::AbstractConfiguration & new_config, const Settings & settings, const String & config_prefix, Poco::Util::AbstractConfiguration * old_config = nullptr); -public: using Impl = std::map; Impl getContainer() const; protected: + + /// setup outside of this class, stored to prevent deleting from impl on config update + std::unordered_set automatic_clusters; + Impl impl; mutable std::mutex mutex; }; diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp new file mode 100644 index 00000000000..8b68ba02504 --- /dev/null +++ b/src/Interpreters/ClusterDiscovery.cpp @@ -0,0 +1,479 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace +{ + +fs::path getShardsListPath(const String & zk_root) +{ + return fs::path(zk_root + "/shards"); +} + +} + +/* + * Holds boolean flags for fixed set of keys. + * Flags can be concurrently set from different threads, and consumer can wait for it. + */ +template +class ClusterDiscovery::ConcurrentFlags +{ +public: + template + ConcurrentFlags(It begin, It end) + { + for (auto it = begin; it != end; ++it) + flags.emplace(*it, false); + } + + void set(const T & key) + { + auto it = flags.find(key); + if (it == flags.end()) + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unknown value '{}'", key); + it->second = true; + any_need_update = true; + cv.notify_one(); + } + + /// waits unit at least one flag is set + /// caller should handle all set flags (or set it again manually) + /// note: keys of returen map should not be changed! + /// @param finished - output parameter indicates that stop() was called + std::unordered_map & wait(std::chrono::milliseconds timeout, bool & finished) + { + std::unique_lock lk(mu); + cv.wait_for(lk, timeout, [this]() -> bool { return any_need_update || stop_flag; }); + finished = stop_flag; + + /// all set flags expected to be handled by caller + any_need_update = false; + return flags; + } + + void stop() + { + std::unique_lock lk(mu); + stop_flag = true; + cv.notify_one(); + } + +private: + std::condition_variable cv; + std::mutex mu; + + /// flag indicates that update is required + std::unordered_map flags; + std::atomic_bool any_need_update = true; + bool stop_flag = false; +}; + +ClusterDiscovery::ClusterDiscovery( + const Poco::Util::AbstractConfiguration & config, + ContextPtr context_, + const String & config_prefix) + : context(Context::createCopy(context_)) + , current_node_name(toString(ServerUUID::get())) + , log(&Poco::Logger::get("ClusterDiscovery")) +{ + LOG_DEBUG(log, "Cluster discovery is enabled"); + + Poco::Util::AbstractConfiguration::Keys config_keys; + config.keys(config_prefix, config_keys); + + for (const auto & key : config_keys) + { + String prefix = config_prefix + "." + key + ".discovery"; + if (!config.has(prefix)) + continue; + + clusters_info.emplace( + key, + ClusterInfo( + /* name_= */ key, + /* zk_root_= */ config.getString(prefix + ".path"), + /* port= */ context->getTCPPort(), + /* secure= */ config.getBool(prefix + ".secure", false), + /* shard_id= */ config.getUInt(prefix + ".shard", 0) + ) + ); + } + clusters_to_update = std::make_shared(config_keys.begin(), config_keys.end()); +} + +/// List node in zookeper for cluster +Strings ClusterDiscovery::getNodeNames(zkutil::ZooKeeperPtr & zk, + const String & zk_root, + const String & cluster_name, + int * version, + bool set_callback) +{ + auto watch_callback = [cluster_name, clusters_to_update=clusters_to_update](auto) { clusters_to_update->set(cluster_name); }; + + Coordination::Stat stat; + Strings nodes = zk->getChildrenWatch(getShardsListPath(zk_root), &stat, set_callback ? watch_callback : Coordination::WatchCallback{}); + if (version) + *version = stat.cversion; + return nodes; +} + +/// Reads node information from specified zookeeper nodes +/// On error returns empty result +ClusterDiscovery::NodesInfo ClusterDiscovery::getNodes(zkutil::ZooKeeperPtr & zk, const String & zk_root, const Strings & node_uuids) +{ + NodesInfo result; + for (const auto & node_uuid : node_uuids) + { + String payload; + bool ok = zk->tryGet(getShardsListPath(zk_root) / node_uuid, payload) && + NodeInfo::parse(payload, result[node_uuid]); + if (!ok) + { + LOG_WARNING(log, "Can't get data from node '{}' in '{}'", node_uuid, zk_root); + return {}; + } + } + return result; +} + +/// Checks if cluster nodes set is changed. +/// Returns true if update required. +/// It performs only shallow check (set of nodes' uuids). +/// So, if node's hostname are changed, then cluster won't be updated. +bool ClusterDiscovery::needUpdate(const Strings & node_uuids, const NodesInfo & nodes) +{ + bool has_difference = node_uuids.size() != nodes.size() || + std::any_of(node_uuids.begin(), node_uuids.end(), [&nodes] (auto u) { return !nodes.contains(u); }); + { + /// Just to log updated nodes, suboptimal, but should be ok for expected update sizes + std::set new_names(node_uuids.begin(), node_uuids.end()); + std::set old_names; + for (const auto & [name, _] : nodes) + old_names.emplace(name); + + auto format_cluster_update = [](const std::set & s1, const std::set & s2) + { + std::vector diff; + std::set_difference(s1.begin(), s1.end(), s2.begin(), s2.end(), std::back_inserter(diff)); + + constexpr size_t max_to_show = 3; + size_t sz = diff.size(); + bool need_crop = sz > max_to_show; + if (need_crop) + diff.resize(max_to_show); + + if (sz == 0) + return fmt::format("{} nodes", sz); + return fmt::format("{} node{} [{}{}]", sz, sz != 1 ? "s" : "", fmt::join(diff, ", "), need_crop ? ",..." : ""); + }; + + LOG_DEBUG(log, "Cluster update: added {}, removed {}", + format_cluster_update(new_names, old_names), + format_cluster_update(old_names, new_names)); + } + return has_difference; +} + +ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info) +{ + std::vector> shards; + { + std::map replica_adresses; + + for (const auto & [_, node] : cluster_info.nodes_info) + { + if (cluster_info.current_node.secure != node.secure) + { + LOG_WARNING(log, "Node '{}' in cluster '{}' has different 'secure' value, skipping it", node.address, cluster_info.name); + continue; + } + replica_adresses[node.shard_id].emplace_back(node.address); + } + + shards.reserve(replica_adresses.size()); + for (auto & [_, replicas] : replica_adresses) + shards.emplace_back(std::move(replicas)); + } + + bool secure = cluster_info.current_node.secure; + auto cluster = std::make_shared( + context->getSettings(), + shards, + /* username= */ context->getUserName(), + /* password= */ "", + /* clickhouse_port= */ secure ? context->getTCPPortSecure().value_or(DBMS_DEFAULT_SECURE_PORT) : context->getTCPPort(), + /* treat_local_as_remote= */ false, + /* treat_local_port_as_remote= */ context->getApplicationType() == Context::ApplicationType::LOCAL, + /* secure= */ secure); + return cluster; +} + +/// Reads data from zookeeper and tries to update cluster. +/// Returns true on success (or no update required). +bool ClusterDiscovery::updateCluster(ClusterInfo & cluster_info) +{ + LOG_DEBUG(log, "Updating cluster '{}'", cluster_info.name); + + auto zk = context->getZooKeeper(); + + int start_version; + Strings node_uuids = getNodeNames(zk, cluster_info.zk_root, cluster_info.name, &start_version, false); + auto & nodes_info = cluster_info.nodes_info; + + if (std::find(node_uuids.begin(), node_uuids.end(), current_node_name) == node_uuids.end()) + { + LOG_ERROR(log, "Can't find current node in cluster '{}', will register again", cluster_info.name); + registerInZk(zk, cluster_info); + nodes_info.clear(); + return false; + } + + if (!needUpdate(node_uuids, nodes_info)) + { + LOG_DEBUG(log, "No update required for cluster '{}'", cluster_info.name); + return true; + } + + nodes_info = getNodes(zk, cluster_info.zk_root, node_uuids); + if (nodes_info.empty()) + { + LOG_WARNING(log, "Can't get nodes info for '{}'", cluster_info.name); + return false; + } + + int current_version; + getNodeNames(zk, cluster_info.zk_root, cluster_info.name, ¤t_version, true); + + if (current_version != start_version) + { + LOG_DEBUG(log, "Cluster '{}' configuration changed during update", cluster_info.name); + nodes_info.clear(); + return false; + } + + LOG_DEBUG(log, "Updating system.clusters record for '{}' with {} nodes", cluster_info.name, cluster_info.nodes_info.size()); + + auto cluster = makeCluster(cluster_info); + context->setCluster(cluster_info.name, cluster); + return true; +} + +void ClusterDiscovery::registerInZk(zkutil::ZooKeeperPtr & zk, ClusterInfo & info) +{ + LOG_DEBUG(log, "Registering current node {} in cluster {}", current_node_name, info.name); + + String node_path = getShardsListPath(info.zk_root) / current_node_name; + zk->createAncestors(node_path); + + zk->createOrUpdate(node_path, info.current_node.serialize(), zkutil::CreateMode::Ephemeral); + LOG_DEBUG(log, "Current node {} registered in cluster {}", current_node_name, info.name); +} + +void ClusterDiscovery::initialUpdate() +{ + auto zk = context->getZooKeeper(); + for (auto & [_, info] : clusters_info) + { + registerInZk(zk, info); + if (!updateCluster(info)) + { + LOG_WARNING(log, "Error on initial cluster '{}' update, will retry in background", info.name); + clusters_to_update->set(info.name); + } + } +} + +void ClusterDiscovery::start() +{ + if (clusters_info.empty()) + { + LOG_DEBUG(log, "No defined clusters for discovery"); + return; + } + + try + { + initialUpdate(); + } + catch (...) + { + tryLogCurrentException(log, "Caught exception in cluster discovery initialization"); + } + + using namespace std::chrono_literals; + constexpr static std::chrono::milliseconds DEFAULT_BACKOFF_TIMEOUT = 10ms; + + LOG_DEBUG(log, "Starting working thread"); + main_thread = ThreadFromGlobalPool([this] + { + std::chrono::milliseconds backoff_timeout = DEFAULT_BACKOFF_TIMEOUT; + + bool finish = false; + while (!finish) + { + try + { + finish = runMainThread([&backoff_timeout] { backoff_timeout = DEFAULT_BACKOFF_TIMEOUT; }); + } + catch (...) + { + /* + * it can be zk error (will take new session) or other retriable error, + * should not stop discovery forever + */ + tryLogCurrentException(log, "Caught exception in cluster discovery runMainThread"); + } + std::this_thread::sleep_for(backoff_timeout); + backoff_timeout = std::min(backoff_timeout * 2, std::chrono::milliseconds(3min)); + } + }); +} + +/// Returns `true` on graceful shutdown (no restart required) +bool ClusterDiscovery::runMainThread(std::function up_to_date_callback) +{ + setThreadName("ClusterDiscover"); + LOG_DEBUG(log, "Worker thread started"); + + using namespace std::chrono_literals; + + constexpr auto force_update_interval = 2min; + bool finished = false; + while (!finished) + { + bool all_up_to_date = true; + auto & clusters = clusters_to_update->wait(5s, finished); + for (auto & [cluster_name, need_update] : clusters) + { + auto cluster_info_it = clusters_info.find(cluster_name); + if (cluster_info_it == clusters_info.end()) + { + LOG_ERROR(log, "Unknown cluster '{}'", cluster_name); + continue; + } + auto & cluster_info = cluster_info_it->second; + + if (!need_update.exchange(false)) + { + /// force updating periodically + bool force_update = cluster_info.watch.elapsedSeconds() > std::chrono::seconds(force_update_interval).count(); + if (!force_update) + continue; + } + + if (updateCluster(cluster_info)) + { + cluster_info.watch.restart(); + LOG_DEBUG(log, "Cluster '{}' updated successfully", cluster_name); + } + else + { + all_up_to_date = false; + /// no need to trigger convar, will retry after timeout in `wait` + need_update = true; + LOG_WARNING(log, "Cluster '{}' wasn't updated, will retry", cluster_name); + } + } + + if (all_up_to_date) + { + up_to_date_callback(); + } + } + LOG_DEBUG(log, "Worker thread stopped"); + return finished; +} + +void ClusterDiscovery::shutdown() +{ + LOG_DEBUG(log, "Shutting down"); + clusters_to_update->stop(); + + if (main_thread.joinable()) + main_thread.join(); +} + +ClusterDiscovery::~ClusterDiscovery() +{ + ClusterDiscovery::shutdown(); +} + +bool ClusterDiscovery::NodeInfo::parse(const String & data, NodeInfo & result) +{ + try + { + Poco::JSON::Parser parser; + auto json = parser.parse(data).extract(); + + size_t ver = json->optValue("version", data_ver); + if (ver == data_ver) + { + result.address = json->getValue("address"); + result.secure = json->optValue("secure", false); + result.shard_id = json->optValue("shard_id", 0); + } + else + { + LOG_ERROR( + &Poco::Logger::get("ClusterDiscovery"), + "Unsupported version '{}' of data in zk node '{}'", + ver, data.size() < 1024 ? data : "[data too long]"); + } + } + catch (Poco::Exception & e) + { + LOG_WARNING( + &Poco::Logger::get("ClusterDiscovery"), + "Can't parse '{}' from node: {}", + data.size() < 1024 ? data : "[data too long]", e.displayText()); + return false; + } + return true; +} + +String ClusterDiscovery::NodeInfo::serialize() const +{ + Poco::JSON::Object json; + json.set("version", data_ver); + json.set("address", address); + json.set("shard_id", shard_id); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + oss.exceptions(std::ios::failbit); + Poco::JSON::Stringifier::stringify(json, oss); + return oss.str(); +} + +} diff --git a/src/Interpreters/ClusterDiscovery.h b/src/Interpreters/ClusterDiscovery.h new file mode 100644 index 00000000000..2098652c069 --- /dev/null +++ b/src/Interpreters/ClusterDiscovery.h @@ -0,0 +1,124 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include + +#include + +namespace DB +{ + +/* + * Discover cluster nodes. + * + * Each node adds ephemernal node into specified path in zookeeper (each cluster have own path). + * Also node subscribed for updates for these paths, and at each child node chanhe cluster updated. + * When node goes down ephemernal node are destroyed, cluster configuration is updated on other node and gone node is removed from cluster. + */ +class ClusterDiscovery +{ + +public: + ClusterDiscovery( + const Poco::Util::AbstractConfiguration & config, + ContextPtr context_, + const String & config_prefix = "remote_servers"); + + void start(); + + ~ClusterDiscovery(); + +private: + struct NodeInfo + { + /// versioning for format of data stored in zk + static constexpr size_t data_ver = 1; + + /// host:port + String address; + /// is secure tcp port user + bool secure = false; + /// shard number + size_t shard_id = 0; + + NodeInfo() = default; + explicit NodeInfo(const String & address_, bool secure_, size_t shard_id_) + : address(address_) + , secure(secure_) + , shard_id(shard_id_) + {} + + static bool parse(const String & data, NodeInfo & result); + String serialize() const; + }; + + // node uuid -> address ("host:port") + using NodesInfo = std::unordered_map; + + struct ClusterInfo + { + const String name; + const String zk_root; + NodesInfo nodes_info; + + /// Track last update time + Stopwatch watch; + + NodeInfo current_node; + + explicit ClusterInfo(const String & name_, const String & zk_root_, UInt16 port, bool secure, size_t shard_id) + : name(name_) + , zk_root(zk_root_) + , current_node(getFQDNOrHostName() + ":" + toString(port), secure, shard_id) + { + } + }; + + void initialUpdate(); + + void registerInZk(zkutil::ZooKeeperPtr & zk, ClusterInfo & info); + + Strings getNodeNames(zkutil::ZooKeeperPtr & zk, + const String & zk_root, + const String & cluster_name, + int * version = nullptr, + bool set_callback = true); + + NodesInfo getNodes(zkutil::ZooKeeperPtr & zk, const String & zk_root, const Strings & node_uuids); + + ClusterPtr makeCluster(const ClusterInfo & cluster_info); + + bool needUpdate(const Strings & node_uuids, const NodesInfo & nodes); + bool updateCluster(ClusterInfo & cluster_info); + + bool runMainThread(std::function up_to_date_callback); + void shutdown(); + + /// cluster name -> cluster info (zk root, set of nodes) + std::unordered_map clusters_info; + + ContextMutablePtr context; + + String current_node_name; + + template class ConcurrentFlags; + using UpdateFlags = ConcurrentFlags; + + /// Cluster names to update. + /// The `shared_ptr` is used because it's passed to watch callback. + /// It prevents accessing to invalid object after ClusterDiscovery is destroyed. + std::shared_ptr clusters_to_update; + + ThreadFromGlobalPool main_thread; + + Poco::Logger * log; +}; + +} diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a71076cfdbb..39041b87768 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -86,6 +86,7 @@ #include #include #include +#include #include @@ -255,6 +256,7 @@ struct ContextSharedPart std::shared_ptr clusters; ConfigurationPtr clusters_config; /// Stores updated configs mutable std::mutex clusters_mutex; /// Guards clusters and clusters_config + std::unique_ptr cluster_discovery; std::shared_ptr async_insert_queue; std::map server_ports; @@ -2199,11 +2201,22 @@ std::shared_ptr Context::getClusters() const return shared->clusters; } +void Context::startClusterDiscovery() +{ + if (!shared->cluster_discovery) + return; + shared->cluster_discovery->start(); +} + /// On repeating calls updates existing clusters and adds new clusters, doesn't delete old clusters -void Context::setClustersConfig(const ConfigurationPtr & config, const String & config_name) +void Context::setClustersConfig(const ConfigurationPtr & config, bool enable_discovery, const String & config_name) { std::lock_guard lock(shared->clusters_mutex); + if (config->getBool("allow_experimental_cluster_discovery", false) && enable_discovery && !shared->cluster_discovery) + { + shared->cluster_discovery = std::make_unique(*config, getGlobalContext()); + } /// Do not update clusters if this part of config wasn't changed. if (shared->clusters && isSameConfiguration(*config, *shared->clusters_config, config_name)) @@ -2213,7 +2226,7 @@ void Context::setClustersConfig(const ConfigurationPtr & config, const String & shared->clusters_config = config; if (!shared->clusters) - shared->clusters = std::make_unique(*shared->clusters_config, settings, config_name); + shared->clusters = std::make_shared(*shared->clusters_config, settings, config_name); else shared->clusters->updateClusters(*shared->clusters_config, settings, config_name, old_clusters_config); } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index ffd35e53b7e..2a2783603a2 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -753,7 +753,10 @@ public: std::shared_ptr getClusters() const; std::shared_ptr getCluster(const std::string & cluster_name) const; std::shared_ptr tryGetCluster(const std::string & cluster_name) const; - void setClustersConfig(const ConfigurationPtr & config, const String & config_name = "remote_servers"); + void setClustersConfig(const ConfigurationPtr & config, bool enable_discovery = false, const String & config_name = "remote_servers"); + + void startClusterDiscovery(); + /// Sets custom cluster, but doesn't update configuration void setCluster(const String & cluster_name, const std::shared_ptr & cluster); void reloadClusterConfig() const; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 9b343bec055..c195cb93c5e 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -115,82 +115,62 @@ bool checkPositionalArguments(ASTPtr & argument, const ASTSelectQuery * select_q } } - /// In case of expression/function (order by 1+2 and 2*x1, greatest(1, 2)) replace - /// positions only if all literals are numbers, otherwise it is not positional. - bool positional = true; + const auto * ast_literal = typeid_cast(argument.get()); + if (!ast_literal) + return false; - /// Case when GROUP BY element is position. - if (const auto * ast_literal = typeid_cast(argument.get())) + auto which = ast_literal->value.getType(); + if (which != Field::Types::UInt64) + return false; + + auto pos = ast_literal->value.get(); + if (!pos || pos > columns.size()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Positional argument out of bounds: {} (exprected in range [1, {}]", + pos, columns.size()); + + const auto & column = columns[--pos]; + if (typeid_cast(column.get())) { - auto which = ast_literal->value.getType(); - if (which == Field::Types::UInt64) + argument = column->clone(); + } + else if (typeid_cast(column.get())) + { + std::function throw_if_aggregate_function = [&](ASTPtr node) { - auto pos = ast_literal->value.get(); - if (pos > 0 && pos <= columns.size()) + if (const auto * function = typeid_cast(node.get())) { - const auto & column = columns[--pos]; - if (typeid_cast(column.get())) + auto is_aggregate_function = AggregateFunctionFactory::instance().isAggregateFunctionName(function->name); + if (is_aggregate_function) { - argument = column->clone(); - } - else if (typeid_cast(column.get())) - { - std::function throw_if_aggregate_function = [&](ASTPtr node) - { - if (const auto * function = typeid_cast(node.get())) - { - auto is_aggregate_function = AggregateFunctionFactory::instance().isAggregateFunctionName(function->name); - if (is_aggregate_function) - { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal value (aggregate function) for positional argument in {}", - ASTSelectQuery::expressionToString(expression)); - } - else - { - if (function->arguments) - { - for (const auto & arg : function->arguments->children) - throw_if_aggregate_function(arg); - } - } - } - }; - - if (expression == ASTSelectQuery::Expression::GROUP_BY) - throw_if_aggregate_function(column); - - argument = column->clone(); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal value (aggregate function) for positional argument in {}", + ASTSelectQuery::expressionToString(expression)); } else { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal value for positional argument in {}", - ASTSelectQuery::expressionToString(expression)); + if (function->arguments) + { + for (const auto & arg : function->arguments->children) + throw_if_aggregate_function(arg); + } } } - else if (pos > columns.size() || !pos) - { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Positional argument out of bounds: {} (exprected in range [1, {}]", - pos, columns.size()); - } - } - else - positional = false; - } - else if (const auto * ast_function = typeid_cast(argument.get())) - { - if (ast_function->arguments) - { - for (auto & arg : ast_function->arguments->children) - positional &= checkPositionalArguments(arg, select_query, expression); - } + }; + + if (expression == ASTSelectQuery::Expression::GROUP_BY) + throw_if_aggregate_function(column); + + argument = column->clone(); } else - positional = false; + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal value for positional argument in {}", + ASTSelectQuery::expressionToString(expression)); + } - return positional; + return true; } void replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * select_query, ASTSelectQuery::Expression expression) @@ -1901,7 +1881,7 @@ std::string ExpressionAnalysisResult::dump() const if (!selected_columns.empty()) { ss << "selected_columns "; - for (size_t i = 0; i < selected_columns.size(); i++) + for (size_t i = 0; i < selected_columns.size(); ++i) { if (i > 0) { diff --git a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp b/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp index 2de7b4b7846..b266746642f 100644 --- a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp +++ b/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp @@ -1,5 +1,7 @@ #include "ExternalUserDefinedExecutableFunctionsLoader.h" +#include + #include #include @@ -54,29 +56,44 @@ ExternalLoader::LoadablePtr ExternalUserDefinedExecutableFunctionsLoader::create throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The aggregate function '{}' already exists", name); String type = config.getString(key_in_config + ".type"); - UserDefinedExecutableFunctionType function_type; + + bool is_executable_pool = false; if (type == "executable") - function_type = UserDefinedExecutableFunctionType::executable; + is_executable_pool = false; else if (type == "executable_pool") - function_type = UserDefinedExecutableFunctionType::executable_pool; + is_executable_pool = true; else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong user defined function type expected 'executable' or 'executable_pool' actual {}", - function_type); + type); + + bool execute_direct = config.getBool(key_in_config + ".execute_direct", true); + + String command_value = config.getString(key_in_config + ".command"); + std::vector command_arguments; + + if (execute_direct) + { + boost::split(command_arguments, command_value, [](char c) { return c == ' '; }); + + command_value = std::move(command_arguments[0]); + command_arguments.erase(command_arguments.begin()); + } - String command = config.getString(key_in_config + ".command"); String format = config.getString(key_in_config + ".format"); DataTypePtr result_type = DataTypeFactory::instance().get(config.getString(key_in_config + ".return_type")); bool send_chunk_header = config.getBool(key_in_config + ".send_chunk_header", false); + size_t command_termination_timeout_seconds = config.getUInt64(key_in_config + ".command_termination_timeout", 10); + size_t command_read_timeout_milliseconds = config.getUInt64(key_in_config + ".command_read_timeout", 10000); + size_t command_write_timeout_milliseconds = config.getUInt64(key_in_config + ".command_write_timeout", 10000); size_t pool_size = 0; - size_t command_termination_timeout = 0; size_t max_command_execution_time = 0; - if (function_type == UserDefinedExecutableFunctionType::executable_pool) + + if (is_executable_pool) { pool_size = config.getUInt64(key_in_config + ".pool_size", 16); - command_termination_timeout = config.getUInt64(key_in_config + ".command_termination_timeout", 10); max_command_execution_time = config.getUInt64(key_in_config + ".max_command_execution_time", 10); size_t max_execution_time_seconds = static_cast(getContext()->getSettings().max_execution_time.totalSeconds()); @@ -106,19 +123,28 @@ ExternalLoader::LoadablePtr ExternalUserDefinedExecutableFunctionsLoader::create UserDefinedExecutableFunctionConfiguration function_configuration { - .type = function_type, .name = std::move(name), //-V1030 - .script_path = std::move(command), //-V1030 - .format = std::move(format), //-V1030 + .command = std::move(command_value), //-V1030 + .command_arguments = std::move(command_arguments), //-V1030 .argument_types = std::move(argument_types), //-V1030 .result_type = std::move(result_type), //-V1030 - .pool_size = pool_size, - .command_termination_timeout = command_termination_timeout, - .max_command_execution_time = max_command_execution_time, - .send_chunk_header = send_chunk_header }; - return std::make_shared(function_configuration, lifetime); + ShellCommandSourceCoordinator::Configuration shell_command_coordinator_configration + { + .format = std::move(format), //-V1030 + .command_termination_timeout_seconds = command_termination_timeout_seconds, + .command_read_timeout_milliseconds = command_read_timeout_milliseconds, + .command_write_timeout_milliseconds = command_write_timeout_milliseconds, + .pool_size = pool_size, + .max_command_execution_time_seconds = max_command_execution_time, + .is_executable_pool = is_executable_pool, + .send_chunk_header = send_chunk_header, + .execute_direct = execute_direct + }; + + auto coordinator = std::make_shared(shell_command_coordinator_configration); + return std::make_shared(function_configuration, std::move(coordinator), lifetime); } } diff --git a/src/Interpreters/FillingRow.cpp b/src/Interpreters/FillingRow.cpp index df99c0d11ed..94f185a44cc 100644 --- a/src/Interpreters/FillingRow.cpp +++ b/src/Interpreters/FillingRow.cpp @@ -1,5 +1,4 @@ #include -#include #include diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 6ba9e7505f2..82d8356b7c7 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -268,11 +268,9 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s LOG_TRACE(log, "Joining on: {}", fmt::join(log_text, " | ")); } - JoinCommon::removeLowCardinalityInplace(right_table_keys); - + JoinCommon::convertToFullColumnsInplace(right_table_keys); initRightBlockStructure(data->sample_block); - JoinCommon::createMissedColumns(sample_block_with_columns_to_add); if (nullable_right_side) diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 5f7c54e427f..2475d437acb 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -48,10 +48,15 @@ BlockIO InterpreterAlterQuery::execute() FunctionNameNormalizer().visit(query_ptr.get()); const auto & alter = query_ptr->as(); if (alter.alter_object == ASTAlterQuery::AlterObjectType::DATABASE) + { return executeToDatabase(alter); + } else if (alter.alter_object == ASTAlterQuery::AlterObjectType::TABLE || alter.alter_object == ASTAlterQuery::AlterObjectType::LIVE_VIEW) + { return executeToTable(alter); + } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown alter object type"); } diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 638c671c3a3..36ea2949b6a 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -150,7 +150,7 @@ BlockIO InterpreterDescribeQuery::execute() res_columns[6]->insertDefault(); res_columns[7]->insert(1u); - }, column.type->getDefaultSerialization(), column.type, nullptr); + }, {column.type->getDefaultSerialization(), column.type, nullptr, nullptr}); } } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index d1b8a056053..a1f83c81a81 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -374,7 +375,7 @@ BlockIO InterpreterInsertQuery::execute() pipeline = interpreter_watch.buildQueryPipeline(); } - for (size_t i = 0; i < out_streams_size; i++) + for (size_t i = 0; i < out_streams_size; ++i) { auto out = buildChainImpl(table, metadata_snapshot, query_sample_block, nullptr, nullptr); out_chains.emplace_back(std::move(out)); @@ -402,6 +403,13 @@ BlockIO InterpreterInsertQuery::execute() return std::make_shared(in_header, actions); }); + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + size_t num_select_threads = pipeline.getNumThreads(); size_t num_insert_threads = std::max_element(out_chains.begin(), out_chains.end(), [&](const auto &a, const auto &b) { @@ -458,7 +466,7 @@ StorageID InterpreterInsertQuery::getDatabaseTable() const } -void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const +void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, ContextPtr context_) { elem.query_kind = "Insert"; const auto & insert_table = context_->getInsertionTable(); @@ -469,4 +477,9 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, cons } } +void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const +{ + extendQueryLogElemImpl(elem, context_); +} + } diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index e5733a8c28b..93de92a0680 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -40,6 +40,7 @@ public: ThreadStatus * thread_status = nullptr, std::atomic_uint64_t * elapsed_counter_ms = nullptr); + static void extendQueryLogElemImpl(QueryLogElement & elem, ContextPtr context_); void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr context_) const override; StoragePtr getTable(ASTInsertQuery & query); diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 960fddccb8c..b39ededaa91 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -469,7 +469,7 @@ void InterpreterSystemQuery::restoreReplica() { getContext()->checkAccess(AccessType::SYSTEM_RESTORE_REPLICA, table_id); - const zkutil::ZooKeeperPtr& zookeeper = getContext()->getZooKeeper(); + const zkutil::ZooKeeperPtr & zookeeper = getContext()->getZooKeeper(); if (zookeeper->expired()) throw Exception(ErrorCodes::NO_ZOOKEEPER, diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 6f4fef46886..7f22386f54b 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -534,8 +534,9 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right if (right_sample_block.getByName(right_key).type->lowCardinality()) lowcard_right_keys.push_back(right_key); } - JoinCommon::removeLowCardinalityInplace(right_table_keys); - JoinCommon::removeLowCardinalityInplace(right_sample_block, key_names_right); + + JoinCommon::convertToFullColumnsInplace(right_table_keys); + JoinCommon::convertToFullColumnsInplace(right_sample_block, key_names_right); const NameSet required_right_keys = table_join->requiredRightKeys(); for (const auto & column : right_table_keys) @@ -664,7 +665,7 @@ bool MergeJoin::saveRightBlock(Block && block) Block MergeJoin::modifyRightBlock(const Block & src_block) const { Block block = materializeBlock(src_block); - JoinCommon::removeLowCardinalityInplace(block, table_join->getOnlyClause().key_names_right); + JoinCommon::convertToFullColumnsInplace(block, table_join->getOnlyClause().key_names_right); return block; } @@ -706,7 +707,7 @@ void MergeJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) lowcard_keys.push_back(column_name); } - JoinCommon::removeLowCardinalityInplace(block, key_names_left, false); + JoinCommon::convertToFullColumnsInplace(block, key_names_left, false); sortBlock(block, left_sort_description); diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 8851a5eb6ec..9494c4133ff 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -20,10 +20,12 @@ #include #include #include +#include #include #include #include #include +#include #include namespace DB @@ -435,22 +437,6 @@ void InterpreterCreateImpl::validate(const InterpreterCreateImpl::TQuery & creat } } -static ASTPtr tryGetTableOverride(const String & mapped_database, const String & table) -{ - if (auto database_ptr = DatabaseCatalog::instance().tryGetDatabase(mapped_database)) - { - auto create_query = database_ptr->getCreateDatabaseQuery(); - if (auto * create_database_query = create_query->as()) - { - if (create_database_query->table_overrides) - { - return create_database_query->table_overrides->tryGetTableOverride(table); - } - } - } - return nullptr; -} - ASTs InterpreterCreateImpl::getRewrittenQueries( const TQuery & create_query, ContextPtr context, const String & mapped_to_database, const String & mysql_database) { @@ -535,10 +521,10 @@ ASTs InterpreterCreateImpl::getRewrittenQueries( rewritten_query->set(rewritten_query->storage, storage); rewritten_query->set(rewritten_query->columns_list, columns); - if (auto table_override = tryGetTableOverride(mapped_to_database, create_query.table)) + if (auto override_ast = tryGetTableOverride(mapped_to_database, create_query.table)) { - auto * override_ast = table_override->as(); - override_ast->applyToCreateTableQuery(rewritten_query.get()); + const auto & override = override_ast->as(); + applyTableOverrideToCreateQuery(override, rewritten_query.get()); } return ASTs{rewritten_query}; diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp index 5e18b0de2e0..02af07bc00c 100644 --- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp +++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp @@ -40,7 +40,7 @@ TEST(MySQLCreateRewritten, ColumnsDataType) {"TINYINT", "Int8"}, {"SMALLINT", "Int16"}, {"MEDIUMINT", "Int32"}, {"INT", "Int32"}, {"INTEGER", "Int32"}, {"BIGINT", "Int64"}, {"FLOAT", "Float32"}, {"DOUBLE", "Float64"}, {"VARCHAR(10)", "String"}, {"CHAR(10)", "String"}, {"Date", "Date"}, {"DateTime", "DateTime"}, - {"TIMESTAMP", "DateTime"}, {"BOOLEAN", "Bool"} + {"TIMESTAMP", "DateTime"}, {"BOOLEAN", "Bool"}, {"BIT", "UInt64"} }; for (const auto & [test_type, mapped_type] : test_types) diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index 1aec850e3dc..b2d18e4d40d 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -49,7 +49,6 @@ struct PartLogElement UInt16 error = 0; String exception; - static std::string name() { return "PartLog"; } static NamesAndTypesList getNamesAndTypes(); diff --git a/src/Interpreters/QueryViewsLog.cpp b/src/Interpreters/QueryViewsLog.cpp index 2c0f1ecd878..c0703d77691 100644 --- a/src/Interpreters/QueryViewsLog.cpp +++ b/src/Interpreters/QueryViewsLog.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index 709ecdc239c..bc95a940c18 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -41,6 +41,9 @@ struct SelectQueryOptions /// It is needed because lazy normal projections require special planning in FetchColumns stage, such as adding WHERE transform. /// It is also used to avoid adding aggregating step when aggregate projection is chosen. bool is_projection_query = false; + /// This flag is needed for projection description. + /// Otherwise, keys for GROUP BY may be removed as constants. + bool ignore_ast_optimizations = false; bool ignore_alias = false; bool is_internal = false; bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select @@ -120,6 +123,12 @@ struct SelectQueryOptions return *this; } + SelectQueryOptions & ignoreASTOptimizationsAlias(bool value = true) + { + ignore_ast_optimizations = value; + return *this; + } + SelectQueryOptions & setInternal(bool value = false) { is_internal = value; diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index c26d8b52049..2af9a2b6bbc 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -311,7 +311,7 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So try { - user_id = global_context->getAccessControl().login(credentials_, address.host()); + user_id = global_context->getAccessControl().authenticate(credentials_, address.host()); LOG_DEBUG(log, "{} Authenticated with global context as user {}", toString(auth_id), user_id ? toString(*user_id) : ""); } diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 90fc0f6c0d0..0ccaae9a795 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -177,7 +177,7 @@ bool Set::insertFromBlock(const ColumnsWithTypeAndName & columns) /// Remember the columns we will work with for (size_t i = 0; i < keys_size; ++i) { - materialized_columns.emplace_back(columns.at(i).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality()); + materialized_columns.emplace_back(columns.at(i).column->convertToFullIfNeeded()); key_columns.emplace_back(materialized_columns.back().get()); } diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 86fd9ae5ddd..64b25ca9777 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -744,8 +744,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, && result.storage->supportsSubcolumns() && result.metadata_snapshot) optimizeFunctionsToSubcolumns(query, result.metadata_snapshot); - optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif); - /// Move arithmetic operations out of aggregation functions if (settings.optimize_arithmetic_operations_in_aggregate_functions) optimizeAggregationFunctions(query); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 6b3a50d88e2..0285bdf333c 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -957,7 +957,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select unknown_required_source_columns.erase(column_name); if (!required.count(column_name)) - source_columns.erase(it++); + it = source_columns.erase(it); else ++it; } @@ -973,7 +973,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select if (column) { source_columns.push_back(*column); - unknown_required_source_columns.erase(it++); + it = unknown_required_source_columns.erase(it); } else ++it; @@ -1120,8 +1120,10 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( /// Push the predicate expression down to subqueries. The optimization should be applied to both initial and secondary queries. result.rewrite_subqueries = PredicateExpressionsOptimizer(getContext(), tables_with_columns, settings).optimize(*select_query); + TreeOptimizer::optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif); + /// Only apply AST optimization for initial queries. - if (getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) + if (getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && !select_options.ignore_ast_optimizations) TreeOptimizer::apply(query, result, tables_with_columns, getContext()); /// array_join_alias_to_name, array_join_result_to_source. diff --git a/src/Interpreters/UserDefinedExecutableFunction.cpp b/src/Interpreters/UserDefinedExecutableFunction.cpp index d57978d0fd6..e5a852b0e75 100644 --- a/src/Interpreters/UserDefinedExecutableFunction.cpp +++ b/src/Interpreters/UserDefinedExecutableFunction.cpp @@ -13,14 +13,12 @@ namespace DB UserDefinedExecutableFunction::UserDefinedExecutableFunction( const UserDefinedExecutableFunctionConfiguration & configuration_, - const ExternalLoadableLifetime & lifetime_, - std::shared_ptr process_pool_) + std::shared_ptr coordinator_, + const ExternalLoadableLifetime & lifetime_) : configuration(configuration_) + , coordinator(std::move(coordinator_)) , lifetime(lifetime_) - , process_pool(process_pool_) { - if (!process_pool && configuration.type == UserDefinedExecutableFunctionType::executable_pool) - process_pool = std::make_shared(configuration.pool_size == 0 ? std::numeric_limits::max() : configuration.pool_size); } }; diff --git a/src/Interpreters/UserDefinedExecutableFunction.h b/src/Interpreters/UserDefinedExecutableFunction.h index 1cb1de47578..a4fad8ceb7b 100644 --- a/src/Interpreters/UserDefinedExecutableFunction.h +++ b/src/Interpreters/UserDefinedExecutableFunction.h @@ -10,26 +10,13 @@ namespace DB { -enum class UserDefinedExecutableFunctionType -{ - executable, - executable_pool -}; - struct UserDefinedExecutableFunctionConfiguration { - UserDefinedExecutableFunctionType type = UserDefinedExecutableFunctionType::executable; std::string name; - std::string script_path; - std::string format; + std::string command; + std::vector command_arguments; std::vector argument_types; DataTypePtr result_type; - /// Pool settings - size_t pool_size = 0; - size_t command_termination_timeout = 0; - size_t max_command_execution_time = 0; - /// Send number_of_rows\n before sending chunk to process - bool send_chunk_header = false; }; class UserDefinedExecutableFunction final : public IExternalLoadable @@ -38,8 +25,8 @@ public: UserDefinedExecutableFunction( const UserDefinedExecutableFunctionConfiguration & configuration_, - const ExternalLoadableLifetime & lifetime_, - std::shared_ptr process_pool_ = nullptr); + std::shared_ptr coordinator_, + const ExternalLoadableLifetime & lifetime_); const ExternalLoadableLifetime & getLifetime() const override { @@ -63,7 +50,7 @@ public: std::shared_ptr clone() const override { - return std::make_shared(configuration, lifetime, process_pool); + return std::make_shared(configuration, coordinator, lifetime); } const UserDefinedExecutableFunctionConfiguration & getConfiguration() const @@ -71,9 +58,9 @@ public: return configuration; } - std::shared_ptr getProcessPool() const + std::shared_ptr getCoordinator() const { - return process_pool; + return coordinator; } std::shared_ptr shared_from_this() @@ -87,13 +74,9 @@ public: } private: - UserDefinedExecutableFunction(const UserDefinedExecutableFunctionConfiguration & configuration_, - std::shared_ptr process_pool_, - const ExternalLoadableLifetime & lifetime_); - UserDefinedExecutableFunctionConfiguration configuration; + std::shared_ptr coordinator; ExternalLoadableLifetime lifetime; - std::shared_ptr process_pool; }; } diff --git a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp index 4cb3e034b01..0cffd61eaf6 100644 --- a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp +++ b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp @@ -1,8 +1,13 @@ #include "UserDefinedExecutableFunctionFactory.h" +#include + +#include + #include #include +#include #include #include @@ -19,7 +24,6 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; - extern const int TIMEOUT_EXCEEDED; } class UserDefinedFunction final : public IFunction @@ -52,10 +56,36 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { + auto coordinator = executable_function->getCoordinator(); + const auto & coordinator_configuration = coordinator->getConfiguration(); const auto & configuration = executable_function->getConfiguration(); + + String command = configuration.command; + + if (coordinator_configuration.execute_direct) + { + auto user_scripts_path = context->getUserScriptsPath(); + auto script_path = user_scripts_path + '/' + command; + + if (!fileOrSymlinkPathStartsWith(script_path, user_scripts_path)) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Executable file {} must be inside user scripts folder {}", + command, + user_scripts_path); + + if (!std::filesystem::exists(std::filesystem::path(script_path))) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Executable file {} does not exist inside user scripts folder {}", + command, + user_scripts_path); + + command = std::move(script_path); + } + + size_t argument_size = arguments.size(); auto arguments_copy = arguments; - for (size_t i = 0; i < arguments.size(); ++i) + for (size_t i = 0; i < argument_size; ++i) { auto & column_with_type = arguments_copy[i]; column_with_type.column = column_with_type.column->convertToFullColumnIfConst(); @@ -71,53 +101,33 @@ public: column_with_type = column_to_cast; } - std::unique_ptr process = getProcess(); - ColumnWithTypeAndName result(result_type, "result"); Block result_block({result}); Block arguments_block(arguments_copy); - auto * process_in = &process->in; - - auto process_pool = executable_function->getProcessPool(); - bool is_executable_pool_function = (process_pool != nullptr); + auto source = std::make_shared(std::move(arguments_block)); + auto shell_input_pipe = Pipe(std::move(source)); ShellCommandSourceConfiguration shell_command_source_configuration; - if (is_executable_pool_function) + if (coordinator->getConfiguration().is_executable_pool) { shell_command_source_configuration.read_fixed_number_of_rows = true; shell_command_source_configuration.number_of_rows_to_read = input_rows_count; } - ShellCommandSource::SendDataTask task = {[process_in, arguments_block, &configuration, is_executable_pool_function, this]() - { - auto & out = *process_in; + Pipes shell_input_pipes; + shell_input_pipes.emplace_back(std::move(shell_input_pipe)); - if (configuration.send_chunk_header) - { - writeText(arguments_block.rows(), out); - writeChar('\n', out); - } - - auto output_format = context->getOutputFormat(configuration.format, out, arguments_block.cloneEmpty()); - formatBlock(output_format, arguments_block); - if (!is_executable_pool_function) - out.close(); - }}; - std::vector tasks = {std::move(task)}; - - Pipe pipe(std::make_unique( + Pipe pipe = coordinator->createPipe( + command, + configuration.command_arguments, + std::move(shell_input_pipes), + result_block, context, - configuration.format, - result_block.cloneEmpty(), - std::move(process), - std::move(tasks), - shell_command_source_configuration, - process_pool)); + shell_command_source_configuration); QueryPipeline pipeline(std::move(pipe)); - PullingPipelineExecutor executor(pipeline); auto result_column = result_type->createColumn(); @@ -143,36 +153,6 @@ public: private: - std::unique_ptr getProcess() const - { - auto process_pool = executable_function->getProcessPool(); - auto executable_function_configuration = executable_function->getConfiguration(); - - std::unique_ptr process; - bool is_executable_pool_function = (process_pool != nullptr); - if (is_executable_pool_function) - { - bool result = process_pool->tryBorrowObject(process, [&]() - { - ShellCommand::Config process_config(executable_function_configuration.script_path); - process_config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, executable_function_configuration.command_termination_timeout }; - auto shell_command = ShellCommand::execute(process_config); - return shell_command; - }, executable_function_configuration.max_command_execution_time * 1000); - - if (!result) - throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, - "Could not get process from pool, max command execution timeout exceeded {} seconds", - executable_function_configuration.max_command_execution_time); - } - else - { - process = ShellCommand::execute(executable_function_configuration.script_path); - } - - return process; - } - ExternalUserDefinedExecutableFunctionsLoader::UserDefinedExecutableFunctionPtr executable_function; ContextPtr context; }; diff --git a/src/Interpreters/applyTableOverride.cpp b/src/Interpreters/applyTableOverride.cpp new file mode 100644 index 00000000000..e614e58b06b --- /dev/null +++ b/src/Interpreters/applyTableOverride.cpp @@ -0,0 +1,125 @@ +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +void applyTableOverrideToCreateQuery(const ASTTableOverride & override, ASTCreateQuery * create_query) +{ + if (auto * columns = override.columns) + { + if (!create_query->columns_list) + create_query->set(create_query->columns_list, std::make_shared()); + if (columns->columns) + { + for (const auto & override_column_ast : columns->columns->children) + { + auto * override_column = override_column_ast->as(); + if (!override_column) + continue; + if (!create_query->columns_list->columns) + create_query->columns_list->set(create_query->columns_list->columns, std::make_shared()); + auto & dest_children = create_query->columns_list->columns->children; + auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool + { + return node->as()->name == override_column->name; + }); + /// For columns, only allow adding ALIAS (non-physical) for now. + /// TODO: This logic should instead be handled by validation that is + /// executed from InterpreterCreateQuery / InterpreterAlterQuery. + if (exists == dest_children.end()) + { + if (override_column->default_specifier == "ALIAS") + dest_children.emplace_back(override_column_ast); + } + else + dest_children[exists - dest_children.begin()] = override_column_ast; + } + } + if (columns->indices) + { + for (const auto & override_index_ast : columns->indices->children) + { + auto * override_index = override_index_ast->as(); + if (!override_index) + continue; + if (!create_query->columns_list->indices) + create_query->columns_list->set(create_query->columns_list->indices, std::make_shared()); + auto & dest_children = create_query->columns_list->indices->children; + auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool + { + return node->as()->name == override_index->name; + }); + if (exists == dest_children.end()) + dest_children.emplace_back(override_index_ast); + else + dest_children[exists - dest_children.begin()] = override_index_ast; + } + } + if (columns->constraints) + { + for (const auto & override_constraint_ast : columns->constraints->children) + { + auto * override_constraint = override_constraint_ast->as(); + if (!override_constraint) + continue; + if (!create_query->columns_list->constraints) + create_query->columns_list->set(create_query->columns_list->constraints, std::make_shared()); + auto & dest_children = create_query->columns_list->constraints->children; + auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool + { + return node->as()->name == override_constraint->name; + }); + if (exists == dest_children.end()) + dest_children.emplace_back(override_constraint_ast); + else + dest_children[exists - dest_children.begin()] = override_constraint_ast; + } + } + if (columns->projections) + { + for (const auto & override_projection_ast : columns->projections->children) + { + auto * override_projection = override_projection_ast->as(); + if (!override_projection) + continue; + if (!create_query->columns_list->projections) + create_query->columns_list->set(create_query->columns_list->projections, std::make_shared()); + auto & dest_children = create_query->columns_list->projections->children; + auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool + { + return node->as()->name == override_projection->name; + }); + if (exists == dest_children.end()) + dest_children.emplace_back(override_projection_ast); + else + dest_children[exists - dest_children.begin()] = override_projection_ast; + } + } + } + if (auto * storage = override.storage) + { + if (!create_query->storage) + create_query->set(create_query->storage, std::make_shared()); + if (storage->partition_by) + create_query->storage->set(create_query->storage->partition_by, storage->partition_by->clone()); + if (storage->primary_key) + create_query->storage->set(create_query->storage->primary_key, storage->primary_key->clone()); + if (storage->order_by) + create_query->storage->set(create_query->storage->order_by, storage->order_by->clone()); + if (storage->sample_by) + create_query->storage->set(create_query->storage->sample_by, storage->sample_by->clone()); + if (storage->ttl_table) + create_query->storage->set(create_query->storage->ttl_table, storage->ttl_table->clone()); + // No support for overriding ENGINE and SETTINGS + } + +} + +} diff --git a/src/Interpreters/applyTableOverride.h b/src/Interpreters/applyTableOverride.h new file mode 100644 index 00000000000..1c51c3b8506 --- /dev/null +++ b/src/Interpreters/applyTableOverride.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class ASTTableOverride; +class ASTCreateQuery; +class ASTIndentifier; + +void applyTableOverrideToCreateQuery(const ASTTableOverride & override, ASTCreateQuery * create_query); + +} diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index fa8e77e4f4c..d0f15a4c595 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -22,7 +22,7 @@ #include #include -#include +#include #include diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index e177fd8e6b3..bd3c35c12f6 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -556,9 +556,14 @@ static std::tuple executeQueryImpl( auto * insert_query = ast->as(); - if (insert_query && insert_query->table_id) - /// Resolve database before trying to use async insert feature - to properly hash the query. - insert_query->table_id = context->resolveStorageID(insert_query->table_id); + /// Resolve database before trying to use async insert feature - to properly hash the query. + if (insert_query) + { + if (insert_query->table_id) + insert_query->table_id = context->resolveStorageID(insert_query->table_id); + else if (auto table = insert_query->getTable(); !table.empty()) + insert_query->table_id = context->resolveStorageID(StorageID{insert_query->getDatabase(), table}); + } if (insert_query && insert_query->select) { @@ -579,8 +584,14 @@ static std::tuple executeQueryImpl( } } else + { /// reset Input callbacks if query is not INSERT SELECT context->resetInputCallbacks(); + } + + StreamLocalLimits limits; + std::shared_ptr quota; + std::unique_ptr interpreter; auto * queue = context->getAsynchronousInsertQueue(); const bool async_insert = queue @@ -591,65 +602,71 @@ static std::tuple executeQueryImpl( { queue->push(ast, context); - BlockIO io; if (settings.wait_for_async_insert) { auto timeout = settings.wait_for_async_insert_timeout.totalMilliseconds(); auto query_id = context->getCurrentQueryId(); auto source = std::make_shared(query_id, timeout, *queue); - io.pipeline = QueryPipeline(Pipe(std::move(source))); + res.pipeline = QueryPipeline(Pipe(std::move(source))); } - return std::make_tuple(ast, std::move(io)); - } - - auto interpreter = InterpreterFactory::get(ast, context, SelectQueryOptions(stage).setInternal(internal)); - - std::shared_ptr quota; - if (!interpreter->ignoreQuota()) - { quota = context->getQuota(); if (quota) { - if (ast->as() || ast->as()) - { - quota->used(QuotaType::QUERY_SELECTS, 1); - } - else if (ast->as()) - { - quota->used(QuotaType::QUERY_INSERTS, 1); - } + quota->used(QuotaType::QUERY_INSERTS, 1); quota->used(QuotaType::QUERIES, 1); - quota->checkExceeded(QuotaType::ERRORS); } - } - StreamLocalLimits limits; - if (!interpreter->ignoreLimits()) - { - limits.mode = LimitsMode::LIMITS_CURRENT; //-V1048 - limits.size_limits = SizeLimits(settings.max_result_rows, settings.max_result_bytes, settings.result_overflow_mode); - } - - { - std::unique_ptr span; - if (context->query_trace_context.trace_id != UUID()) - { - auto * raw_interpreter_ptr = interpreter.get(); - std::string class_name(abi::__cxa_demangle(typeid(*raw_interpreter_ptr).name(), nullptr, nullptr, nullptr)); - span = std::make_unique(class_name + "::execute()"); - } - res = interpreter->execute(); - } - - QueryPipeline & pipeline = res.pipeline; - - if (const auto * insert_interpreter = typeid_cast(&*interpreter)) - { - /// Save insertion table (not table function). TODO: support remote() table function. - auto table_id = insert_interpreter->getDatabaseTable(); + const auto & table_id = insert_query->table_id; if (!table_id.empty()) - context->setInsertionTable(std::move(table_id)); + context->setInsertionTable(table_id); + } + else + { + interpreter = InterpreterFactory::get(ast, context, SelectQueryOptions(stage).setInternal(internal)); + + if (!interpreter->ignoreQuota()) + { + quota = context->getQuota(); + if (quota) + { + if (ast->as() || ast->as()) + { + quota->used(QuotaType::QUERY_SELECTS, 1); + } + else if (ast->as()) + { + quota->used(QuotaType::QUERY_INSERTS, 1); + } + quota->used(QuotaType::QUERIES, 1); + quota->checkExceeded(QuotaType::ERRORS); + } + } + + if (!interpreter->ignoreLimits()) + { + limits.mode = LimitsMode::LIMITS_CURRENT; //-V1048 + limits.size_limits = SizeLimits(settings.max_result_rows, settings.max_result_bytes, settings.result_overflow_mode); + } + + { + std::unique_ptr span; + if (context->query_trace_context.trace_id != UUID()) + { + auto * raw_interpreter_ptr = interpreter.get(); + std::string class_name(abi::__cxa_demangle(typeid(*raw_interpreter_ptr).name(), nullptr, nullptr, nullptr)); + span = std::make_unique(class_name + "::execute()"); + } + res = interpreter->execute(); + } + + if (const auto * insert_interpreter = typeid_cast(&*interpreter)) + { + /// Save insertion table (not table function). TODO: support remote() table function. + auto table_id = insert_interpreter->getDatabaseTable(); + if (!table_id.empty()) + context->setInsertionTable(std::move(table_id)); + } } if (process_list_entry) @@ -663,6 +680,8 @@ static std::tuple executeQueryImpl( /// Hold element of process list till end of query execution. res.process_list_entry = process_list_entry; + auto & pipeline = res.pipeline; + if (pipeline.pulling() || pipeline.completed()) { /// Limits on the result, the quota on the result, and also callback for progress. @@ -712,7 +731,10 @@ static std::tuple executeQueryImpl( elem.query_views = info.views; } - interpreter->extendQueryLogElem(elem, ast, context, query_database, query_table); + if (async_insert) + InterpreterInsertQuery::extendQueryLogElemImpl(elem, context); + else if (interpreter) + interpreter->extendQueryLogElem(elem, ast, context, query_database, query_table); if (settings.log_query_settings) elem.query_settings = std::make_shared(context->getSettingsRef()); diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp index d82c7fc1332..830f0ea4411 100644 --- a/src/Interpreters/getTableExpressions.cpp +++ b/src/Interpreters/getTableExpressions.cpp @@ -16,7 +16,7 @@ NameSet removeDuplicateColumns(NamesAndTypesList & columns) if (names.emplace(it->name).second) ++it; else - columns.erase(it++); + it = columns.erase(it); } return names; } diff --git a/src/Interpreters/getTableOverride.cpp b/src/Interpreters/getTableOverride.cpp new file mode 100644 index 00000000000..903d9e80836 --- /dev/null +++ b/src/Interpreters/getTableOverride.cpp @@ -0,0 +1,27 @@ +#include "getTableOverride.h" + +#include +#include +#include +#include + +namespace DB +{ + +ASTPtr tryGetTableOverride(const String & mapped_database, const String & table) +{ + if (auto database_ptr = DatabaseCatalog::instance().tryGetDatabase(mapped_database)) + { + auto create_query = database_ptr->getCreateDatabaseQuery(); + if (auto * create_database_query = create_query->as()) + { + if (create_database_query->table_overrides) + { + return create_database_query->table_overrides->tryGetTableOverride(table); + } + } + } + return nullptr; +} + +} diff --git a/src/Interpreters/getTableOverride.h b/src/Interpreters/getTableOverride.h new file mode 100644 index 00000000000..1a0a15e6fe2 --- /dev/null +++ b/src/Interpreters/getTableOverride.h @@ -0,0 +1,8 @@ +#pragma once +#include +#include + +namespace DB +{ +ASTPtr tryGetTableOverride(const String & mapped_database, const String & table); +} diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index b571a8e8e10..ca55fde0740 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -133,8 +134,11 @@ DataTypePtr convertTypeToNullable(const DataTypePtr & type) /// Convert column to nullable. If column LowCardinality or Const, convert nested column. /// Returns nullptr if conversion cannot be performed. -static ColumnPtr tryConvertColumnToNullable(const ColumnPtr & col) +static ColumnPtr tryConvertColumnToNullable(ColumnPtr col) { + if (col->isSparse()) + col = recursiveRemoveSparse(col); + if (isColumnNullable(*col) || col->canBeInsideNullable()) return makeNullable(col); @@ -297,7 +301,7 @@ ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names) for (const auto & column_name : names) { auto & column = block.getByName(column_name).column; - column = recursiveRemoveLowCardinality(column->convertToFullColumnIfConst()); + column = recursiveRemoveLowCardinality(recursiveRemoveSparse(column->convertToFullColumnIfConst())); ptrs.push_back(column.get()); } @@ -322,7 +326,8 @@ ColumnRawPtrMap materializeColumnsInplaceMap(Block & block, const Names & names) ColumnPtr materializeColumn(const Block & block, const String & column_name) { const auto & src_column = block.getByName(column_name).column; - return recursiveRemoveLowCardinality(src_column->convertToFullColumnIfConst()); + return recursiveRemoveLowCardinality( + recursiveRemoveSparse(src_column->convertToFullColumnIfConst())); } Columns materializeColumns(const Block & block, const Names & names) @@ -349,22 +354,22 @@ ColumnRawPtrs getRawPointers(const Columns & columns) return ptrs; } -void removeLowCardinalityInplace(Block & block) +void convertToFullColumnsInplace(Block & block) { for (size_t i = 0; i < block.columns(); ++i) { auto & col = block.getByPosition(i); - col.column = recursiveRemoveLowCardinality(col.column); + col.column = recursiveRemoveLowCardinality(recursiveRemoveSparse(col.column)); col.type = recursiveRemoveLowCardinality(col.type); } } -void removeLowCardinalityInplace(Block & block, const Names & names, bool change_type) +void convertToFullColumnsInplace(Block & block, const Names & names, bool change_type) { for (const String & column_name : names) { auto & col = block.getByName(column_name); - col.column = recursiveRemoveLowCardinality(col.column); + col.column = recursiveRemoveLowCardinality(recursiveRemoveSparse(col.column)); if (change_type) col.type = recursiveRemoveLowCardinality(col.type); } @@ -401,6 +406,9 @@ ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_nam /// We will join only keys, where all components are not NULL. if (const auto * nullable = checkAndGetColumn(*key_columns[i])) key_columns[i] = &nullable->getNestedColumn(); + + if (const auto * sparse = checkAndGetColumn(*key_columns[i])) + key_columns[i] = &sparse->getValuesColumn(); } return key_columns; diff --git a/src/Interpreters/join_common.h b/src/Interpreters/join_common.h index d3d2a442e41..3e5a22f33bf 100644 --- a/src/Interpreters/join_common.h +++ b/src/Interpreters/join_common.h @@ -72,8 +72,8 @@ Columns materializeColumns(const Block & block, const Names & names); ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names); ColumnRawPtrMap materializeColumnsInplaceMap(Block & block, const Names & names); ColumnRawPtrs getRawPointers(const Columns & columns); -void removeLowCardinalityInplace(Block & block); -void removeLowCardinalityInplace(Block & block, const Names & names, bool change_type = true); +void convertToFullColumnsInplace(Block & block); +void convertToFullColumnsInplace(Block & block, const Names & names, bool change_type = true); void restoreLowCardinalityInplace(Block & block, const Names & lowcard_keys); ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_names_right); diff --git a/src/Interpreters/tests/gtest_table_overrides.cpp b/src/Interpreters/tests/gtest_table_overrides.cpp new file mode 100644 index 00000000000..779bc7a53a4 --- /dev/null +++ b/src/Interpreters/tests/gtest_table_overrides.cpp @@ -0,0 +1,88 @@ +#include +#include +#include +#include + +#include +#include + +namespace +{ +using namespace DB; +using namespace std::literals; +} + + +struct TableOverrideTestCase +{ + String create_database_query; + String create_table_query; + String expected_create_table_query; +}; + +std::ostream & operator<<(std::ostream & ostr, const TableOverrideTestCase & test_case) +{ + return ostr << "database: " << test_case.create_database_query << ", table: " << test_case.create_table_query + << ", expected: " << test_case.expected_create_table_query; +} + +class TableOverrideTest : public ::testing::TestWithParam +{}; + +TEST_P(TableOverrideTest, applyOverrides) +{ + const auto & [database_query, table_query, expected_query] = GetParam(); + ParserCreateQuery parser; + ASTPtr database_ast; + ASSERT_NO_THROW(database_ast = parseQuery(parser, database_query, 0, 0)); + auto * database = database_ast->as(); + ASSERT_NE(nullptr, database); + ASTPtr table_ast; + ASSERT_NO_THROW(table_ast = parseQuery(parser, table_query, 0, 0)); + auto * table = table_ast->as(); + ASSERT_NE(nullptr, table); + auto table_name = table->table->as()->name(); + if (database->table_overrides) + { + auto override_ast = database->table_overrides->tryGetTableOverride(table_name); + ASSERT_NE(nullptr, override_ast); + auto * override_table_ast = override_ast->as(); + ASSERT_NE(nullptr, override_table_ast); + applyTableOverrideToCreateQuery(*override_table_ast, table); + } + EXPECT_EQ(expected_query, serializeAST(*table)); +} + +INSTANTIATE_TEST_SUITE_P(ApplyTableOverrides, TableOverrideTest, + ::testing::ValuesIn(std::initializer_list{ + { + "CREATE DATABASE db", + "CREATE TABLE db.t (id Int64) ENGINE=Log", + "CREATE TABLE db.t (`id` Int64) ENGINE = Log" + }, + { + "CREATE DATABASE db TABLE OVERRIDE t (PARTITION BY tuple())", + "CREATE TABLE db.t (id Int64) ENGINE=MergeTree", + "CREATE TABLE db.t (`id` Int64) ENGINE = MergeTree PARTITION BY tuple()" + }, + { + "CREATE DATABASE db TABLE OVERRIDE t (COLUMNS (id UInt64 CODEC(Delta), shard UInt8 ALIAS modulo(id, 16)) PARTITION BY shard)", + "CREATE TABLE db.t (id Int64) ENGINE=MergeTree", + "CREATE TABLE db.t (`id` UInt64 CODEC(Delta), `shard` UInt8 ALIAS id % 16) ENGINE = MergeTree PARTITION BY shard" + }, + { + "CREATE DATABASE db TABLE OVERRIDE a (PARTITION BY modulo(id, 3)), TABLE OVERRIDE b (PARTITION BY modulo(id, 5))", + "CREATE TABLE db.a (id Int64) ENGINE=MergeTree", + "CREATE TABLE db.a (`id` Int64) ENGINE = MergeTree PARTITION BY id % 3" + }, + { + "CREATE DATABASE db TABLE OVERRIDE a (PARTITION BY modulo(id, 3)), TABLE OVERRIDE b (PARTITION BY modulo(id, 5))", + "CREATE TABLE db.b (id Int64) ENGINE=MergeTree", + "CREATE TABLE db.b (`id` Int64) ENGINE = MergeTree PARTITION BY id % 5" + }, + { + "CREATE DATABASE db TABLE OVERRIDE `tbl` (PARTITION BY toYYYYMM(created))", + "CREATE TABLE db.tbl (id Int64, created DateTime) ENGINE=Foo", + "CREATE TABLE db.tbl (`id` Int64, `created` DateTime) ENGINE = Foo PARTITION BY toYYYYMM(created)", + } +})); diff --git a/src/Parsers/ASTTableOverrides.cpp b/src/Parsers/ASTTableOverrides.cpp index 0270c2cad82..d2625bf19b4 100644 --- a/src/Parsers/ASTTableOverrides.cpp +++ b/src/Parsers/ASTTableOverrides.cpp @@ -71,111 +71,6 @@ void ASTTableOverride::formatImpl(const FormatSettings & settings_, FormatState settings.ostr << nl_or_nothing << ')'; } -void ASTTableOverride::applyToCreateTableQuery(ASTCreateQuery * create_query) const -{ - if (columns) - { - if (!create_query->columns_list) - create_query->set(create_query->columns_list, std::make_shared()); - if (columns->columns) - { - for (const auto & override_column_ast : columns->columns->children) - { - auto * override_column = override_column_ast->as(); - if (!override_column) - continue; - if (!create_query->columns_list->columns) - create_query->columns_list->set(create_query->columns_list->columns, std::make_shared()); - auto & dest_children = create_query->columns_list->columns->children; - auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool - { - return node->as()->name == override_column->name; - }); - if (exists == dest_children.end()) - dest_children.emplace_back(override_column_ast); - else - dest_children[exists - dest_children.begin()] = override_column_ast; - } - } - if (columns->indices) - { - for (const auto & override_index_ast : columns->indices->children) - { - auto * override_index = override_index_ast->as(); - if (!override_index) - continue; - if (!create_query->columns_list->indices) - create_query->columns_list->set(create_query->columns_list->indices, std::make_shared()); - auto & dest_children = create_query->columns_list->indices->children; - auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool - { - return node->as()->name == override_index->name; - }); - if (exists == dest_children.end()) - dest_children.emplace_back(override_index_ast); - else - dest_children[exists - dest_children.begin()] = override_index_ast; - } - } - if (columns->constraints) - { - for (const auto & override_constraint_ast : columns->constraints->children) - { - auto * override_constraint = override_constraint_ast->as(); - if (!override_constraint) - continue; - if (!create_query->columns_list->constraints) - create_query->columns_list->set(create_query->columns_list->constraints, std::make_shared()); - auto & dest_children = create_query->columns_list->constraints->children; - auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool - { - return node->as()->name == override_constraint->name; - }); - if (exists == dest_children.end()) - dest_children.emplace_back(override_constraint_ast); - else - dest_children[exists - dest_children.begin()] = override_constraint_ast; - } - } - if (columns->projections) - { - for (const auto & override_projection_ast : columns->projections->children) - { - auto * override_projection = override_projection_ast->as(); - if (!override_projection) - continue; - if (!create_query->columns_list->projections) - create_query->columns_list->set(create_query->columns_list->projections, std::make_shared()); - auto & dest_children = create_query->columns_list->projections->children; - auto exists = std::find_if(dest_children.begin(), dest_children.end(), [&](ASTPtr node) -> bool - { - return node->as()->name == override_projection->name; - }); - if (exists == dest_children.end()) - dest_children.emplace_back(override_projection_ast); - else - dest_children[exists - dest_children.begin()] = override_projection_ast; - } - } - } - if (storage) - { - if (!create_query->storage) - create_query->set(create_query->storage, std::make_shared()); - if (storage->partition_by) - create_query->storage->set(create_query->storage->partition_by, storage->partition_by->clone()); - if (storage->primary_key) - create_query->storage->set(create_query->storage->primary_key, storage->primary_key->clone()); - if (storage->order_by) - create_query->storage->set(create_query->storage->order_by, storage->order_by->clone()); - if (storage->sample_by) - create_query->storage->set(create_query->storage->sample_by, storage->sample_by->clone()); - if (storage->ttl_table) - create_query->storage->set(create_query->storage->ttl_table, storage->ttl_table->clone()); - // not supporting overriding ENGINE - } -} - ASTPtr ASTTableOverrideList::clone() const { auto res = std::make_shared(*this); diff --git a/src/Parsers/ASTTableOverrides.h b/src/Parsers/ASTTableOverrides.h index 7a84ed25121..62e96b16b01 100644 --- a/src/Parsers/ASTTableOverrides.h +++ b/src/Parsers/ASTTableOverrides.h @@ -26,7 +26,6 @@ public: String getID(char) const override { return "TableOverride " + table_name; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; - void applyToCreateTableQuery(ASTCreateQuery * create_query) const; }; /// List of table overrides, for example: diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 485df2d3662..680d3f6031b 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -402,7 +402,7 @@ bool ParserVariableArityOperatorList::parseImpl(Pos & pos, ASTPtr & node, Expect bool ParserBetweenExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { /// For the expression (subject [NOT] BETWEEN left AND right) - /// create an AST the same as for (subject> = left AND subject <= right). + /// create an AST the same as for (subject >= left AND subject <= right). ParserKeyword s_not("NOT"); ParserKeyword s_between("BETWEEN"); diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index e7abcee6b43..fac79de4c5b 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -57,46 +58,6 @@ TEST_P(ParserTest, parseQuery) } } -struct TableOverrideTestCase -{ - DB::String create_database_query; - DB::String create_table_query; - DB::String expected_create_table_query; -}; - -std::ostream & operator<<(std::ostream & ostr, const TableOverrideTestCase & test_case) -{ - return ostr << "database: " << test_case.create_database_query << ", table: " << test_case.create_table_query - << ", expected: " << test_case.expected_create_table_query; -} - -class TableOverrideTest : public ::testing::TestWithParam -{}; - -TEST_P(TableOverrideTest, applyOverrides) -{ - const auto & [database_query, table_query, expected_query] = GetParam(); - ParserCreateQuery parser; - ASTPtr database_ast; - ASSERT_NO_THROW(database_ast = parseQuery(parser, database_query, 0, 0)); - auto * database = database_ast->as(); - ASSERT_NE(nullptr, database); - ASTPtr table_ast; - ASSERT_NO_THROW(table_ast = parseQuery(parser, table_query, 0, 0)); - auto * table = table_ast->as(); - ASSERT_NE(nullptr, table); - auto table_name = table->table->as()->name(); - if (database->table_overrides) - { - auto override_ast = database->table_overrides->tryGetTableOverride(table_name); - ASSERT_NE(nullptr, override_ast); - auto * override_table_ast = override_ast->as(); - ASSERT_NE(nullptr, override_table_ast); - override_table_ast->applyToCreateTableQuery(table); - } - EXPECT_EQ(expected_query, serializeAST(*table)); -} - INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery, ParserTest, ::testing::Combine( ::testing::Values(std::make_shared()), @@ -265,37 +226,3 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateDatabaseQuery, ParserTest, "CREATE DATABASE db\nENGINE = Foo\nSETTINGS a = 1, b = 2\nTABLE OVERRIDE `a`\n(\n ORDER BY (`id`, `version`)\n)\nCOMMENT 'db comment'" } }))); - -INSTANTIATE_TEST_SUITE_P(ApplyTableOverrides, TableOverrideTest, - ::testing::ValuesIn(std::initializer_list{ - { - "CREATE DATABASE db", - "CREATE TABLE db.t (id Int64) ENGINE=Log", - "CREATE TABLE db.t (`id` Int64) ENGINE = Log" - }, - { - "CREATE DATABASE db TABLE OVERRIDE t (PARTITION BY tuple())", - "CREATE TABLE db.t (id Int64) ENGINE=MergeTree", - "CREATE TABLE db.t (`id` Int64) ENGINE = MergeTree PARTITION BY tuple()" - }, - { - "CREATE DATABASE db TABLE OVERRIDE t (COLUMNS (id UInt64, shard UInt8 MATERIALIZED modulo(id, 16)) PARTITION BY shard)", - "CREATE TABLE db.t (id Int64) ENGINE=MergeTree", - "CREATE TABLE db.t (`id` UInt64, `shard` UInt8 MATERIALIZED id % 16) ENGINE = MergeTree PARTITION BY shard" - }, - { - "CREATE DATABASE db TABLE OVERRIDE a (PARTITION BY modulo(id, 3)), TABLE OVERRIDE b (PARTITION BY modulo(id, 5))", - "CREATE TABLE db.a (id Int64) ENGINE=MergeTree", - "CREATE TABLE db.a (`id` Int64) ENGINE = MergeTree PARTITION BY id % 3" - }, - { - "CREATE DATABASE db TABLE OVERRIDE a (PARTITION BY modulo(id, 3)), TABLE OVERRIDE b (PARTITION BY modulo(id, 5))", - "CREATE TABLE db.b (id Int64) ENGINE=MergeTree", - "CREATE TABLE db.b (`id` Int64) ENGINE = MergeTree PARTITION BY id % 5" - }, - { - "CREATE DATABASE db ENGINE=MaterializeMySQL('addr:port', 'db', 'user', 'pw') TABLE OVERRIDE `tbl` (PARTITION BY toYYYYMM(created))", - "CREATE TABLE db.tbl (id Int64, created DateTime) ENGINE=Foo", - "CREATE TABLE db.tbl (`id` Int64, `created` DateTime) ENGINE = Foo PARTITION BY toYYYYMM(created)", - } -})); diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index c1c98e2931c..8c7c09abf01 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { @@ -185,4 +186,14 @@ const ChunkMissingValues::RowsBitMask & ChunkMissingValues::getDefaultsBitmask(s return none; } +void convertToFullIfSparse(Chunk & chunk) +{ + size_t num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + for (auto & column : columns) + column = recursiveRemoveSparse(column); + + chunk.setColumns(std::move(columns), num_rows); +} + } diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index c26180453c7..e70ba57a267 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -131,4 +131,10 @@ private: RowsMaskByColumnId rows_mask_by_column_id; }; +/// Converts all columns to full serialization in chunk. +/// It's needed, when you have to access to the internals of the column, +/// or when you need to perform operation with two columns +/// and their structure must be equal (e.g. compareAt). +void convertToFullIfSparse(Chunk & chunk); + } diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index b1f9eaa59a1..1f6b530d72f 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -37,6 +37,9 @@ Chunk ArrowBlockInputFormat::generate() if (!stream_reader) prepareReader(); + if (is_stopped) + return {}; + batch_result = stream_reader->Next(); if (batch_result.ok() && !(*batch_result)) return res; @@ -46,6 +49,9 @@ Chunk ArrowBlockInputFormat::generate() if (!file_reader) prepareReader(); + if (is_stopped) + return {}; + if (record_batch_current >= record_batch_total) return res; @@ -94,7 +100,11 @@ void ArrowBlockInputFormat::prepareReader() } else { - auto file_reader_status = arrow::ipc::RecordBatchFileReader::Open(asArrowFile(*in, format_settings)); + auto arrow_file = asArrowFile(*in, format_settings, is_stopped); + if (is_stopped) + return; + + auto file_reader_status = arrow::ipc::RecordBatchFileReader::Open(std::move(arrow_file)); if (!file_reader_status.ok()) throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while opening a table: {}", file_reader_status.status().ToString()); diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h index 44e18e3f852..bb8a000477c 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h @@ -27,6 +27,11 @@ public: private: Chunk generate() override; + void onCancel() override + { + is_stopped = 1; + } + // Whether to use ArrowStream format bool stream; // This field is only used for ArrowStream format @@ -42,6 +47,8 @@ private: const FormatSettings format_settings; void prepareReader(); + + std::atomic is_stopped{0}; }; } diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp index 148faabf352..86d278397c2 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp @@ -140,7 +140,7 @@ arrow::Status ArrowInputStreamFromReadBuffer::Close() return arrow::Status(); } -std::shared_ptr asArrowFile(ReadBuffer & in, const FormatSettings & settings) +std::shared_ptr asArrowFile(ReadBuffer & in, const FormatSettings & settings, std::atomic & is_cancelled) { if (auto * fd_in = dynamic_cast(&in)) { @@ -160,7 +160,7 @@ std::shared_ptr asArrowFile(ReadBuffer & in, const std::string file_data; { WriteBufferFromString file_buffer(file_data); - copyData(in, file_buffer); + copyData(in, file_buffer, is_cancelled); } return std::make_shared(arrow::Buffer::FromString(std::move(file_data))); diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.h b/src/Processors/Formats/Impl/ArrowBufferedStreams.h index 29c869e4152..d649c52557f 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.h +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.h @@ -86,7 +86,7 @@ private: ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowInputStreamFromReadBuffer); }; -std::shared_ptr asArrowFile(ReadBuffer & in, const FormatSettings & settings); +std::shared_ptr asArrowFile(ReadBuffer & in, const FormatSettings & settings, std::atomic & is_cancelled); } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index fee5c77f9ec..272907022a1 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 1f806d47c45..11e56ecbe0c 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -68,6 +68,7 @@ namespace DB { + namespace ErrorCodes { extern const int BAD_ARGUMENTS; @@ -280,7 +281,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node for (size_t n = decoder.arrayStart(); n != 0; n = decoder.arrayNext()) { total += n; - for (size_t i = 0; i < n; i++) + for (size_t i = 0; i < n; ++i) { nested_deserialize(nested_column, decoder); } @@ -344,7 +345,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node if (target.isString()) { std::vector symbols; - for (size_t i = 0; i < root_node->names(); i++) + for (size_t i = 0; i < root_node->names(); ++i) { symbols.push_back(root_node->nameAt(i)); } @@ -359,7 +360,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node { const auto & enum_type = dynamic_cast(*target_type); Row symbol_mapping; - for (size_t i = 0; i < root_node->names(); i++) + for (size_t i = 0; i < root_node->names(); ++i) { symbol_mapping.push_back(enum_type.castToValue(root_node->nameAt(i))); } @@ -443,11 +444,19 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) case avro::AVRO_UNION: { std::vector union_skip_fns; - for (size_t i = 0; i < root_node->leaves(); i++) + for (size_t i = 0; i < root_node->leaves(); ++i) { union_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); } - return [union_skip_fns](avro::Decoder & decoder) { union_skip_fns[decoder.decodeUnionIndex()](decoder); }; + return [union_skip_fns](avro::Decoder & decoder) + { + auto index = decoder.decodeUnionIndex(); + if (index >= union_skip_fns.size()) + { + throw Exception("Union index out of boundary", ErrorCodes::INCORRECT_DATA); + } + union_skip_fns[index](decoder); + }; } case avro::AVRO_NULL: return [](avro::Decoder & decoder) { decoder.decodeNull(); }; @@ -476,7 +485,7 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) case avro::AVRO_RECORD: { std::vector field_skip_fns; - for (size_t i = 0; i < root_node->leaves(); i++) + for (size_t i = 0; i < root_node->leaves(); ++i) { field_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); } diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index 2de11178e96..73237369e56 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -22,6 +22,12 @@ namespace DB { + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + class AvroDeserializer { public: @@ -81,7 +87,12 @@ private: action.execute(columns, decoder, ext); break; case Union: - actions[decoder.decodeUnionIndex()].execute(columns, decoder, ext); + auto index = decoder.decodeUnionIndex(); + if (index >= actions.size()) + { + throw Exception("Union index out of boundary", ErrorCodes::INCORRECT_DATA); + } + actions[index].execute(columns, decoder, ext); break; } } diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index 5033176ca4b..74070252ebb 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -48,7 +48,7 @@ void MySQLOutputFormat::writePrefix() { packet_endpoint->sendPacket(LengthEncodedNumber(header.columns())); - for (size_t i = 0; i < header.columns(); i++) + for (size_t i = 0; i < header.columns(); ++i) { const auto & column_name = header.getColumnsWithTypeAndName()[i].name; packet_endpoint->sendPacket(getColumnDefinition(column_name, data_types[i]->getTypeId())); @@ -63,7 +63,7 @@ void MySQLOutputFormat::writePrefix() void MySQLOutputFormat::consume(Chunk chunk) { - for (size_t i = 0; i < chunk.getNumRows(); i++) + for (size_t i = 0; i < chunk.getNumRows(); ++i) { ProtocolText::ResultSetRow row_packet(serializations, chunk.getColumns(), i); packet_endpoint->sendPacket(row_packet); diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 8768e2f5f14..87eec459aa3 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include "ArrowBufferedStreams.h" #include "ArrowColumnToCHColumn.h" #include @@ -19,13 +18,6 @@ namespace ErrorCodes extern const int CANNOT_READ_ALL_DATA; } -#define THROW_ARROW_NOT_OK(status) \ - do \ - { \ - if (::arrow::Status _s = (status); !_s.ok()) \ - throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ - } while (false) - ORCBlockInputFormat::ORCBlockInputFormat(ReadBuffer & in_, Block header_, const FormatSettings & format_settings_) : IInputFormat(std::move(header_), in_), format_settings(format_settings_) { @@ -38,37 +30,28 @@ Chunk ORCBlockInputFormat::generate() if (!file_reader) prepareReader(); + if (is_stopped) + return {}; + + std::shared_ptr batch_reader; + auto result = file_reader->NextStripeReader(format_settings.orc.row_batch_size, include_indices); + if (!result.ok()) + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Failed to create batch reader: {}", result.status().ToString()); + batch_reader = std::move(result).ValueOrDie(); if (!batch_reader) { - arrow::Status reader_status = file_reader->NextStripeReader( - DBMS_DEFAULT_BUFFER_SIZE, include_indices, &batch_reader); - if (!reader_status.ok()) - throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, - "Failed to create batch reader: {}", - reader_status.ToString()); - if (!batch_reader) - return res; + return res; } - std::shared_ptr batch_result; - arrow::Status batch_status = batch_reader->ReadNext(&batch_result); - if (!batch_status.ok()) - throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, - "Error while reading batch of ORC data: {}", - batch_status.ToString()); + std::shared_ptr table; + arrow::Status table_status = batch_reader->ReadAll(&table); + if (!table_status.ok()) + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading batch of ORC data: {}", table_status.ToString()); - if (!batch_result || !batch_result->num_rows()) + if (!table || !table->num_rows()) return res; - ArrowColumnToCHColumn::NameToColumnPtr name_to_column_ptr; - for (const auto & column_name : column_names) - { - arrow::ArrayVector vec = {batch_result->GetColumnByName(column_name)}; - std::shared_ptr arrow_column = std::make_shared(vec); - name_to_column_ptr[column_name] = arrow_column; - } - arrow_column_to_ch_column->arrowColumnsToCHChunk(res, name_to_column_ptr); - batch_reader.reset(); + arrow_column_to_ch_column->arrowTableToCHChunk(res, table); return res; } @@ -79,7 +62,6 @@ void ORCBlockInputFormat::resetParser() file_reader.reset(); include_indices.clear(); - stripe_current = 0; } static size_t countIndicesForType(std::shared_ptr type) @@ -107,12 +89,19 @@ static size_t countIndicesForType(std::shared_ptr type) void ORCBlockInputFormat::prepareReader() { - THROW_ARROW_NOT_OK(arrow::adapters::orc::ORCFileReader::Open(asArrowFile(*in, format_settings), arrow::default_memory_pool(), &file_reader)); - stripe_total = file_reader->NumberOfStripes(); - stripe_current = 0; + auto arrow_file = asArrowFile(*in, format_settings, is_stopped); + if (is_stopped) + return; - std::shared_ptr schema; - THROW_ARROW_NOT_OK(file_reader->ReadSchema(&schema)); + auto result = arrow::adapters::orc::ORCFileReader::Open(std::move(arrow_file), arrow::default_memory_pool()); + if (!result.ok()) + throw Exception(result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); + file_reader = std::move(result).ValueOrDie(); + + auto read_schema_result = file_reader->ReadSchema(); + if (!read_schema_result.ok()) + throw Exception(read_schema_result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); + std::shared_ptr schema = std::move(read_schema_result).ValueOrDie(); arrow_column_to_ch_column = std::make_unique(getPort().getHeader(), "ORC", format_settings.orc.import_nested); diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index 857ec7937b7..c7dc1c4a710 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -29,28 +29,29 @@ public: protected: Chunk generate() override; + void onCancel() override + { + is_stopped = 1; + } + private: // TODO: check that this class implements every part of its parent std::unique_ptr file_reader; - std::shared_ptr batch_reader; - std::unique_ptr arrow_column_to_ch_column; std::vector column_names; - int stripe_total = 0; - - int stripe_current = 0; - // indices of columns to read from ORC file std::vector include_indices; const FormatSettings format_settings; void prepareReader(); + + std::atomic is_stopped{0}; }; } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 901531d81cf..0b6cd006300 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -44,6 +44,9 @@ Chunk ParquetBlockInputFormat::generate() if (!file_reader) prepareReader(); + if (is_stopped) + return {}; + if (row_group_current >= row_group_total) return res; @@ -93,7 +96,11 @@ static size_t countIndicesForType(std::shared_ptr type) void ParquetBlockInputFormat::prepareReader() { - THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(asArrowFile(*in, format_settings), arrow::default_memory_pool(), &file_reader)); + auto arrow_file = asArrowFile(*in, format_settings, is_stopped); + if (is_stopped) + return; + + THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(std::move(arrow_file), arrow::default_memory_pool(), &file_reader)); row_group_total = file_reader->num_row_groups(); row_group_current = 0; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index 472aec66da3..3e04c523442 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -28,6 +28,11 @@ private: void prepareReader(); + void onCancel() override + { + is_stopped = 1; + } + std::unique_ptr file_reader; int row_group_total = 0; // indices of columns to read from Parquet file @@ -35,6 +40,8 @@ private: std::unique_ptr arrow_column_to_ch_column; int row_group_current = 0; const FormatSettings format_settings; + + std::atomic is_stopped{0}; }; } diff --git a/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp b/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp index f46488fd0a8..0450051daf8 100644 --- a/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp @@ -21,7 +21,7 @@ void PostgreSQLOutputFormat::writePrefix() std::vector columns; columns.reserve(header.columns()); - for (size_t i = 0; i < header.columns(); i++) + for (size_t i = 0; i < header.columns(); ++i) { const auto & column_name = header.getColumnsWithTypeAndName()[i].name; columns.emplace_back(column_name, data_types[i]->getTypeId()); diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 57413759e7b..ad65a5f707d 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -160,7 +160,7 @@ void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) Serializations serializations(num_columns); for (size_t i = 0; i < num_columns; ++i) - serializations[i] = header.getByPosition(i).type->getDefaultSerialization(); + serializations[i] = header.getByPosition(i).type->getSerialization(*columns[i]->getSerializationInfo()); WidthsPerColumn widths; Widths max_widths; diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index cf47a26efc7..85b27a6fb57 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -26,7 +26,7 @@ void PrettySpaceBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind Serializations serializations(num_columns); for (size_t i = 0; i < num_columns; ++i) - serializations[i] = header.getByPosition(i).type->getDefaultSerialization(); + serializations[i] = header.getByPosition(i).type->getSerialization(*columns[i]->getSerializationInfo()); WidthsPerColumn widths; Widths max_widths; diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index 328c34823a0..d5a35fef7bd 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -1,7 +1,7 @@ #include #include -#include -#include +#include +#include #include diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h index 5c8d18875e7..c56324c7d9a 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h @@ -39,6 +39,7 @@ public: void set(Chunk chunk_) { + convertToFullIfSparse(chunk_); chunk = std::move(chunk_); skip_last_row = false; } diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 9ec0939f3a8..5f9f9f9b1a1 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -6,7 +6,7 @@ namespace DB SinkToStorage::SinkToStorage(const Block & header) : ExceptionKeepingTransform(header, header, false) {} -void SinkToStorage::transform(Chunk & chunk) +void SinkToStorage::onConsume(Chunk chunk) { /** Throw an exception if the sizes of arrays - elements of nested data structures doesn't match. * We have to make this assertion before writing to table, because storage engine may assume that they have equal sizes. @@ -16,8 +16,16 @@ void SinkToStorage::transform(Chunk & chunk) Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); consume(chunk.clone()); - if (lastBlockIsDuplicate()) - chunk.clear(); + if (!lastBlockIsDuplicate()) + cur_chunk = std::move(chunk); +} + +SinkToStorage::GenerateResult SinkToStorage::onGenerate() +{ + GenerateResult res; + res.chunk = std::move(cur_chunk); + res.is_done = true; + return res; } } diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 01d51940d64..023bbd8b094 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -24,7 +24,10 @@ protected: private: std::vector table_locks; - void transform(Chunk & chunk) override; + void onConsume(Chunk chunk) override; + GenerateResult onGenerate() override; + + Chunk cur_chunk; }; using SinkToStoragePtr = std::shared_ptr; diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index 8e9cdcfda48..b0cb62340e9 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -2,6 +2,7 @@ #if USE_MYSQL #include +#include #include #include #include @@ -126,7 +127,7 @@ namespace { using ValueType = ExternalResultDescription::ValueType; - void insertValue(const IDataType & data_type, IColumn & column, const ValueType type, const mysqlxx::Value & value, size_t & read_bytes_size) + void insertValue(const IDataType & data_type, IColumn & column, const ValueType type, const mysqlxx::Value & value, size_t & read_bytes_size, enum enum_field_types mysql_type) { switch (type) { @@ -143,9 +144,24 @@ namespace read_bytes_size += 4; break; case ValueType::vtUInt64: - assert_cast(column).insertValue(value.getUInt()); - read_bytes_size += 8; + { + //we don't have enum enum_field_types definition in mysqlxx/Types.h, so we use literal values directly here. + if (static_cast(mysql_type) == 16) + { + size_t n = value.size(); + UInt64 val = 0UL; + ReadBufferFromMemory payload(const_cast(value.data()), n); + MySQLReplication::readBigEndianStrict(payload, reinterpret_cast(&val), n); + assert_cast(column).insertValue(val); + read_bytes_size += n; + } + else + { + assert_cast(column).insertValue(value.getUInt()); + read_bytes_size += 8; + } break; + } case ValueType::vtInt8: assert_cast(column).insertValue(value.getInt()); read_bytes_size += 1; @@ -258,12 +274,12 @@ Chunk MySQLSource::generate() { ColumnNullable & column_nullable = assert_cast(*columns[index]); const auto & data_type = assert_cast(*sample.type); - insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value, read_bytes_size); + insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value, read_bytes_size, row.getFieldType(position_mapping[index])); column_nullable.getNullMapData().emplace_back(false); } else { - insertValue(*sample.type, *columns[index], description.types[index].first, value, read_bytes_size); + insertValue(*sample.type, *columns[index], description.types[index].first, value, read_bytes_size, row.getFieldType(position_mapping[index])); } } else diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp new file mode 100644 index 00000000000..dc272ace01e --- /dev/null +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -0,0 +1,586 @@ +#include + +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNSUPPORTED_METHOD; + extern const int TIMEOUT_EXCEEDED; + extern const int CANNOT_FCNTL; + extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; + extern const int CANNOT_POLL; + extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; +} + +static bool tryMakeFdNonBlocking(int fd) +{ + int flags = fcntl(fd, F_GETFL, 0); + if (-1 == flags) + return false; + if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK)) + return false; + + return true; +} + +static void makeFdNonBlocking(int fd) +{ + bool result = tryMakeFdNonBlocking(fd); + if (!result) + throwFromErrno("Cannot set non-blocking mode of pipe", ErrorCodes::CANNOT_FCNTL); +} + +static bool tryMakeFdBlocking(int fd) +{ + int flags = fcntl(fd, F_GETFL, 0); + if (-1 == flags) + return false; + + if (-1 == fcntl(fd, F_SETFL, flags & (~O_NONBLOCK))) + return false; + + return true; +} + +static void makeFdBlocking(int fd) +{ + bool result = tryMakeFdBlocking(fd); + if (!result) + throwFromErrno("Cannot set blocking mode of pipe", ErrorCodes::CANNOT_FCNTL); +} + +static bool pollFd(int fd, size_t timeout_milliseconds, int events) +{ + pollfd pfd; + pfd.fd = fd; + pfd.events = events; + pfd.revents = 0; + + Stopwatch watch; + + int res; + + while (true) + { + res = poll(&pfd, 1, timeout_milliseconds); + + if (res < 0) + { + if (errno == EINTR) + { + watch.stop(); + timeout_milliseconds -= watch.elapsedMilliseconds(); + watch.start(); + + continue; + } + else + { + throwFromErrno("Cannot poll", ErrorCodes::CANNOT_POLL); + } + } + else + { + break; + } + } + + return res > 0; +} + +class TimeoutReadBufferFromFileDescriptor : public BufferWithOwnMemory +{ +public: + explicit TimeoutReadBufferFromFileDescriptor(int fd_, size_t timeout_milliseconds_) + : fd(fd_) + , timeout_milliseconds(timeout_milliseconds_) + { + makeFdNonBlocking(fd); + } + + bool nextImpl() override + { + size_t bytes_read = 0; + + while (!bytes_read) + { + if (!pollFd(fd, timeout_milliseconds, POLLIN)) + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Pipe read timeout exceeded {} milliseconds", timeout_milliseconds); + + ssize_t res = ::read(fd, internal_buffer.begin(), internal_buffer.size()); + + if (-1 == res && errno != EINTR) + throwFromErrno("Cannot read from pipe ", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); + + if (res == 0) + break; + + if (res > 0) + bytes_read += res; + } + + if (bytes_read > 0) + { + working_buffer = internal_buffer; + working_buffer.resize(bytes_read); + } + else + { + return false; + } + + return true; + } + + void reset() const + { + makeFdBlocking(fd); + } + + ~TimeoutReadBufferFromFileDescriptor() override + { + tryMakeFdBlocking(fd); + } + +private: + int fd; + size_t timeout_milliseconds; +}; + +class TimeoutWriteBufferFromFileDescriptor : public BufferWithOwnMemory +{ +public: + explicit TimeoutWriteBufferFromFileDescriptor(int fd_, size_t timeout_milliseconds_) + : fd(fd_) + , timeout_milliseconds(timeout_milliseconds_) + { + makeFdNonBlocking(fd); + } + + void nextImpl() override + { + if (!offset()) + return; + + size_t bytes_written = 0; + + while (bytes_written != offset()) + { + if (!pollFd(fd, timeout_milliseconds, POLLOUT)) + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Pipe write timeout exceeded {} milliseconds", timeout_milliseconds); + + ssize_t res = ::write(fd, working_buffer.begin() + bytes_written, offset() - bytes_written); + + if ((-1 == res || 0 == res) && errno != EINTR) + throwFromErrno("Cannot write into pipe ", ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); + + if (res > 0) + bytes_written += res; + } + } + + void reset() const + { + makeFdBlocking(fd); + } + + ~TimeoutWriteBufferFromFileDescriptor() override + { + tryMakeFdBlocking(fd); + } + +private: + int fd; + size_t timeout_milliseconds; +}; + +class ShellCommandHolder +{ +public: + using ShellCommandBuilderFunc = std::function()>; + + explicit ShellCommandHolder(ShellCommandBuilderFunc && func_) + : func(std::move(func_)) + {} + + std::unique_ptr buildCommand() + { + if (returned_command) + return std::move(returned_command); + + return func(); + } + + void returnCommand(std::unique_ptr command) + { + returned_command = std::move(command); + } + +private: + std::unique_ptr returned_command; + ShellCommandBuilderFunc func; +}; + +namespace +{ + /** A stream, that get child process and sends data using tasks in background threads. + * For each send data task background thread is created. Send data task must send data to process input pipes. + * ShellCommandPoolSource receives data from process stdout. + * + * If process_pool is passed in constructor then after source is destroyed process is returned to pool. + */ + class ShellCommandSource final : public SourceWithProgress + { + public: + + using SendDataTask = std::function; + + ShellCommandSource( + ContextPtr context_, + const std::string & format_, + size_t command_read_timeout_milliseconds, + const Block & sample_block_, + std::unique_ptr && command_, + std::vector && send_data_tasks = {}, + const ShellCommandSourceConfiguration & configuration_ = {}, + std::unique_ptr && command_holder_ = nullptr, + std::shared_ptr process_pool_ = nullptr) + : SourceWithProgress(sample_block_) + , context(context_) + , format(format_) + , sample_block(sample_block_) + , command(std::move(command_)) + , configuration(configuration_) + , timeout_command_out(command->out.getFD(), command_read_timeout_milliseconds) + , command_holder(std::move(command_holder_)) + , process_pool(process_pool_) + { + for (auto && send_data_task : send_data_tasks) + { + send_data_threads.emplace_back([task = std::move(send_data_task), this]() + { + try + { + task(); + } + catch (...) + { + std::lock_guard lock(send_data_lock); + exception_during_send_data = std::current_exception(); + } + }); + } + + size_t max_block_size = configuration.max_block_size; + + if (configuration.read_fixed_number_of_rows) + { + /** Currently parallel parsing input format cannot read exactly max_block_size rows from input, + * so it will be blocked on ReadBufferFromFileDescriptor because this file descriptor represent pipe that does not have eof. + */ + auto context_for_reading = Context::createCopy(context); + context_for_reading->setSetting("input_format_parallel_parsing", false); + context = context_for_reading; + + if (configuration.read_number_of_rows_from_process_output) + { + /// Initialize executor in generate + return; + } + + max_block_size = configuration.number_of_rows_to_read; + } + + pipeline = QueryPipeline(Pipe(context->getInputFormat(format, timeout_command_out, sample_block, max_block_size))); + executor = std::make_unique(pipeline); + } + + ~ShellCommandSource() override + { + for (auto & thread : send_data_threads) + if (thread.joinable()) + thread.join(); + + if (command_is_invalid) + command = nullptr; + + if (command_holder && process_pool) + { + bool valid_command = configuration.read_fixed_number_of_rows && current_read_rows >= configuration.number_of_rows_to_read; + + if (command && valid_command) + command_holder->returnCommand(std::move(command)); + + process_pool->returnObject(std::move(command_holder)); + } + } + + protected: + + Chunk generate() override + { + rethrowExceptionDuringSendDataIfNeeded(); + + Chunk chunk; + + try + { + if (configuration.read_fixed_number_of_rows) + { + if (!executor && configuration.read_number_of_rows_from_process_output) + { + readText(configuration.number_of_rows_to_read, timeout_command_out); + char dummy; + readChar(dummy, timeout_command_out); + + size_t max_block_size = configuration.number_of_rows_to_read; + pipeline = QueryPipeline(Pipe(context->getInputFormat(format, timeout_command_out, sample_block, max_block_size))); + executor = std::make_unique(pipeline); + } + + if (current_read_rows >= configuration.number_of_rows_to_read) + return {}; + } + + if (!executor->pull(chunk)) + return {}; + + current_read_rows += chunk.getNumRows(); + } + catch (...) + { + command_is_invalid = true; + throw; + } + + return chunk; + } + + Status prepare() override + { + auto status = SourceWithProgress::prepare(); + + if (status == Status::Finished) + { + for (auto & thread : send_data_threads) + if (thread.joinable()) + thread.join(); + + rethrowExceptionDuringSendDataIfNeeded(); + } + + return status; + } + + String getName() const override { return "ShellCommandSource"; } + + private: + + void rethrowExceptionDuringSendDataIfNeeded() + { + std::lock_guard lock(send_data_lock); + if (exception_during_send_data) + { + command_is_invalid = true; + std::rethrow_exception(exception_during_send_data); + } + } + + ContextPtr context; + std::string format; + Block sample_block; + + std::unique_ptr command; + ShellCommandSourceConfiguration configuration; + + TimeoutReadBufferFromFileDescriptor timeout_command_out; + + size_t current_read_rows = 0; + + ShellCommandHolderPtr command_holder; + std::shared_ptr process_pool; + + QueryPipeline pipeline; + std::unique_ptr executor; + + std::vector send_data_threads; + + std::mutex send_data_lock; + std::exception_ptr exception_during_send_data; + + std::atomic command_is_invalid {false}; + }; + + class SendingChunkHeaderTransform final : public ISimpleTransform + { + public: + SendingChunkHeaderTransform(const Block & header, std::shared_ptr buffer_) + : ISimpleTransform(header, header, false) + , buffer(buffer_) + { + } + + String getName() const override { return "SendingChunkHeaderTransform"; } + + protected: + + void transform(Chunk & chunk) override + { + writeText(chunk.getNumRows(), *buffer); + writeChar('\n', *buffer); + } + + private: + std::shared_ptr buffer; + }; + +} + +ShellCommandSourceCoordinator::ShellCommandSourceCoordinator(const Configuration & configuration_) + : configuration(configuration_) +{ + if (configuration.is_executable_pool) + process_pool = std::make_shared(configuration.pool_size ? configuration.pool_size : std::numeric_limits::max()); +} + +Pipe ShellCommandSourceCoordinator::createPipe( + const std::string & command, + const std::vector & arguments, + std::vector && input_pipes, + Block sample_block, + ContextPtr context, + const ShellCommandSourceConfiguration & source_configuration) +{ + ShellCommand::Config command_config(command); + command_config.arguments = arguments; + for (size_t i = 1; i < input_pipes.size(); ++i) + command_config.write_fds.emplace_back(i + 2); + + std::unique_ptr process; + std::unique_ptr process_holder; + + auto destructor_strategy = ShellCommand::DestructorStrategy{true /*terminate_in_destructor*/, configuration.command_termination_timeout_seconds}; + command_config.terminate_in_destructor_strategy = destructor_strategy; + + bool is_executable_pool = (process_pool != nullptr); + if (is_executable_pool) + { + bool execute_direct = configuration.execute_direct; + + bool result = process_pool->tryBorrowObject( + process_holder, + [command_config, execute_direct]() + { + ShellCommandHolder::ShellCommandBuilderFunc func = [command_config, execute_direct]() mutable + { + if (execute_direct) + return ShellCommand::executeDirect(command_config); + else + return ShellCommand::execute(command_config); + }; + + return std::make_unique(std::move(func)); + }, + configuration.max_command_execution_time_seconds * 10000); + + if (!result) + throw Exception( + ErrorCodes::TIMEOUT_EXCEEDED, + "Could not get process from pool, max command execution timeout exceeded {} seconds", + configuration.max_command_execution_time_seconds); + + process = process_holder->buildCommand(); + } + else + { + if (configuration.execute_direct) + process = ShellCommand::executeDirect(command_config); + else + process = ShellCommand::execute(command_config); + } + + std::vector tasks; + tasks.reserve(input_pipes.size()); + + for (size_t i = 0; i < input_pipes.size(); ++i) + { + WriteBufferFromFile * write_buffer = nullptr; + + if (i == 0) + { + write_buffer = &process->in; + } + else + { + auto descriptor = i + 2; + auto it = process->write_fds.find(descriptor); + if (it == process->write_fds.end()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Process does not contain descriptor to write {}", descriptor); + + write_buffer = &it->second; + } + + int write_buffer_fd = write_buffer->getFD(); + auto timeout_write_buffer = std::make_shared(write_buffer_fd, configuration.command_write_timeout_milliseconds); + + input_pipes[i].resize(1); + + if (configuration.send_chunk_header) + { + auto transform = std::make_shared(input_pipes[i].getHeader(), timeout_write_buffer); + input_pipes[i].addTransform(std::move(transform)); + } + + auto pipeline = std::make_shared(std::move(input_pipes[i])); + auto out = context->getOutputFormat(configuration.format, *timeout_write_buffer, materializeBlock(pipeline->getHeader())); + out->setAutoFlush(); + pipeline->complete(std::move(out)); + + ShellCommandSource::SendDataTask task = [pipeline, timeout_write_buffer, write_buffer, is_executable_pool]() + { + CompletedPipelineExecutor executor(*pipeline); + executor.execute(); + + if (!is_executable_pool) + { + timeout_write_buffer->next(); + timeout_write_buffer->reset(); + + write_buffer->close(); + } + }; + + tasks.emplace_back(std::move(task)); + } + + auto source = std::make_unique( + context, + configuration.format, + configuration.command_read_timeout_milliseconds, + std::move(sample_block), + std::move(process), + std::move(tasks), + source_configuration, + std::move(process_holder), + process_pool); + auto pipe = Pipe(std::move(source)); + + return pipe; +} + +} diff --git a/src/Processors/Sources/ShellCommandSource.h b/src/Processors/Sources/ShellCommandSource.h index 4974c33f290..649c713afcb 100644 --- a/src/Processors/Sources/ShellCommandSource.h +++ b/src/Processors/Sources/ShellCommandSource.h @@ -19,14 +19,10 @@ namespace DB { -/** A stream, that get child process and sends data using tasks in background threads. - * For each send data task background thread is created. Send data task must send data to process input pipes. - * ShellCommandPoolSource receives data from process stdout. - * - * If process_pool is passed in constructor then after source is destroyed process is returned to pool. - */ +class ShellCommandHolder; +using ShellCommandHolderPtr = std::unique_ptr; -using ProcessPool = BorrowedObjectPool>; +using ProcessPool = BorrowedObjectPool; struct ShellCommandSourceConfiguration { @@ -37,148 +33,92 @@ struct ShellCommandSourceConfiguration /// Valid only if read_fixed_number_of_rows = true size_t number_of_rows_to_read = 0; /// Max block size - size_t max_block_size = DBMS_DEFAULT_BUFFER_SIZE; + size_t max_block_size = DEFAULT_BLOCK_SIZE; }; -class ShellCommandSource final : public SourceWithProgress +class ShellCommandSourceCoordinator { public: - using SendDataTask = std::function; + struct Configuration + { - ShellCommandSource( + /// Script output format + std::string format; + + /// Command termination timeout in seconds + size_t command_termination_timeout_seconds = 10; + + /// Timeout for reading data from command stdout + size_t command_read_timeout_milliseconds = 10000; + + /// Timeout for writing data to command stdin + size_t command_write_timeout_milliseconds = 10000; + + /// Pool size valid only if executable_pool = true + size_t pool_size = 16; + + /// Max command execution time in milliseconds. Valid only if executable_pool = true + size_t max_command_execution_time_seconds = 10; + + /// Should pool of processes be created. + bool is_executable_pool = false; + + /// Send number_of_rows\n before sending chunk to process. + bool send_chunk_header = false; + + /// Execute script direct or with /bin/bash. + bool execute_direct = true; + + }; + + explicit ShellCommandSourceCoordinator(const Configuration & configuration_); + + const Configuration & getConfiguration() const + { + return configuration; + } + + Pipe createPipe( + const std::string & command, + const std::vector & arguments, + std::vector && input_pipes, + Block sample_block, ContextPtr context, - const std::string & format, - const Block & sample_block, - std::unique_ptr && command_, - std::vector && send_data_tasks = {}, - const ShellCommandSourceConfiguration & configuration_ = {}, - std::shared_ptr process_pool_ = nullptr) - : SourceWithProgress(sample_block) - , command(std::move(command_)) - , configuration(configuration_) - , process_pool(process_pool_) + const ShellCommandSourceConfiguration & source_configuration = {}); + + Pipe createPipe( + const std::string & command, + std::vector && input_pipes, + Block sample_block, + ContextPtr context, + const ShellCommandSourceConfiguration & source_configuration = {}) { - for (auto && send_data_task : send_data_tasks) - { - send_data_threads.emplace_back([task = std::move(send_data_task), this]() - { - try - { - task(); - } - catch (...) - { - std::lock_guard lock(send_data_lock); - exception_during_send_data = std::current_exception(); - } - }); - } - - size_t max_block_size = configuration.max_block_size; - - if (configuration.read_fixed_number_of_rows) - { - /** Currently parallel parsing input format cannot read exactly max_block_size rows from input, - * so it will be blocked on ReadBufferFromFileDescriptor because this file descriptor represent pipe that does not have eof. - */ - auto context_for_reading = Context::createCopy(context); - context_for_reading->setSetting("input_format_parallel_parsing", false); - context = context_for_reading; - - if (configuration.read_number_of_rows_from_process_output) - { - readText(configuration.number_of_rows_to_read, command->out); - char dummy; - readChar(dummy, command->out); - } - - max_block_size = configuration.number_of_rows_to_read; - } - - pipeline = QueryPipeline(Pipe(context->getInputFormat(format, command->out, sample_block, max_block_size))); - executor = std::make_unique(pipeline); + return createPipe(command, {}, std::move(input_pipes), std::move(sample_block), std::move(context), source_configuration); } - ~ShellCommandSource() override + Pipe createPipe( + const std::string & command, + const std::vector & arguments, + Block sample_block, + ContextPtr context) { - for (auto & thread : send_data_threads) - if (thread.joinable()) - thread.join(); - - if (command && process_pool) - process_pool->returnObject(std::move(command)); + return createPipe(command, arguments, {}, std::move(sample_block), std::move(context), {}); } -protected: - - Chunk generate() override + Pipe createPipe( + const std::string & command, + Block sample_block, + ContextPtr context) { - rethrowExceptionDuringSendDataIfNeeded(); - - if (configuration.read_fixed_number_of_rows && configuration.number_of_rows_to_read == current_read_rows) - return {}; - - Chunk chunk; - - try - { - if (!executor->pull(chunk)) - return {}; - - current_read_rows += chunk.getNumRows(); - } - catch (...) - { - command = nullptr; - throw; - } - - return chunk; + return createPipe(command, {}, {}, std::move(sample_block), std::move(context), {}); } - Status prepare() override - { - auto status = SourceWithProgress::prepare(); - - if (status == Status::Finished) - { - for (auto & thread : send_data_threads) - if (thread.joinable()) - thread.join(); - - rethrowExceptionDuringSendDataIfNeeded(); - } - - return status; - } - - String getName() const override { return "ShellCommandSource"; } - private: - void rethrowExceptionDuringSendDataIfNeeded() - { - std::lock_guard lock(send_data_lock); - if (exception_during_send_data) - { - command = nullptr; - std::rethrow_exception(exception_during_send_data); - } - } + Configuration configuration; - std::unique_ptr command; - ShellCommandSourceConfiguration configuration; - - size_t current_read_rows = 0; - - std::shared_ptr process_pool; - - QueryPipeline pipeline; - std::unique_ptr executor; - - std::vector send_data_threads; - std::mutex send_data_lock; - std::exception_ptr exception_during_send_data; + std::shared_ptr process_pool = nullptr; }; + } diff --git a/src/Processors/TTL/ITTLAlgorithm.h b/src/Processors/TTL/ITTLAlgorithm.h index d219f9f7ad3..49cd2c46d9d 100644 --- a/src/Processors/TTL/ITTLAlgorithm.h +++ b/src/Processors/TTL/ITTLAlgorithm.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 3602b7f7d03..fae1ede1f9c 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -231,8 +231,10 @@ IProcessor::Status AggregatingInOrderTransform::prepare() input.setNeeded(); return Status::NeedData; } + assert(!is_consume_finished); current_chunk = input.pull(true /* set_not_needed */); + convertToFullIfSparse(current_chunk); return Status::Ready; } diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 8357a997960..5b58530f3d5 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -329,7 +329,7 @@ private: if (num_threads > first->aggregates_pools.size()) { Arenas & first_pool = first->aggregates_pools; - for (size_t j = first_pool.size(); j < num_threads; j++) + for (size_t j = first_pool.size(); j < num_threads; ++j) first_pool.emplace_back(std::make_shared()); } diff --git a/src/Processors/Transforms/CheckConstraintsTransform.cpp b/src/Processors/Transforms/CheckConstraintsTransform.cpp index b7849b8a627..50ec86f33b6 100644 --- a/src/Processors/Transforms/CheckConstraintsTransform.cpp +++ b/src/Processors/Transforms/CheckConstraintsTransform.cpp @@ -35,7 +35,7 @@ CheckConstraintsTransform::CheckConstraintsTransform( } -void CheckConstraintsTransform::transform(Chunk & chunk) +void CheckConstraintsTransform::onConsume(Chunk chunk) { if (chunk.getNumRows() > 0) { @@ -123,6 +123,7 @@ void CheckConstraintsTransform::transform(Chunk & chunk) } rows_written += chunk.getNumRows(); + cur_chunk = std::move(chunk); } } diff --git a/src/Processors/Transforms/CheckConstraintsTransform.h b/src/Processors/Transforms/CheckConstraintsTransform.h index 3198ec84198..09833ff396b 100644 --- a/src/Processors/Transforms/CheckConstraintsTransform.h +++ b/src/Processors/Transforms/CheckConstraintsTransform.h @@ -23,12 +23,19 @@ public: String getName() const override { return "CheckConstraintsTransform"; } - void transform(Chunk & chunk) override; + void onConsume(Chunk chunk) override; + GenerateResult onGenerate() override + { + GenerateResult res; + res.chunk = std::move(cur_chunk); + return res; + } private: StorageID table_id; const ASTs constraints_to_check; const ConstraintsExpressions expressions; size_t rows_written = 0; + Chunk cur_chunk; }; } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 88ecbe6adc3..79b6360f22e 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -16,7 +16,7 @@ namespace ProfileEvents namespace DB { -void CountingTransform::transform(Chunk & chunk) +void CountingTransform::onConsume(Chunk chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); progress.incrementPiecewiseAtomically(local_progress); @@ -39,6 +39,8 @@ void CountingTransform::transform(Chunk & chunk) if (progress_callback) progress_callback(local_progress); + + cur_chunk = std::move(chunk); } } diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index e7100e8510b..877f6a0a543 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -34,13 +34,20 @@ public: return progress; } - void transform(Chunk & chunk) override; + void onConsume(Chunk chunk) override; + GenerateResult onGenerate() override + { + GenerateResult res; + res.chunk = std::move(cur_chunk); + return res; + } protected: Progress progress; ProgressCallback progress_callback; QueryStatus * process_elem = nullptr; ThreadStatus * thread_status = nullptr; + Chunk cur_chunk; }; } diff --git a/src/Processors/Transforms/DistinctTransform.cpp b/src/Processors/Transforms/DistinctTransform.cpp index fddfe663af5..cf6a128aa40 100644 --- a/src/Processors/Transforms/DistinctTransform.cpp +++ b/src/Processors/Transforms/DistinctTransform.cpp @@ -54,6 +54,9 @@ void DistinctTransform::buildFilter( void DistinctTransform::transform(Chunk & chunk) { + /// Convert to full column, because SetVariant for sparse column is not implemented. + convertToFullIfSparse(chunk); + auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); diff --git a/src/Processors/Transforms/ExceptionKeepingTransform.cpp b/src/Processors/Transforms/ExceptionKeepingTransform.cpp index 5c968471e1b..f2b29a45f84 100644 --- a/src/Processors/Transforms/ExceptionKeepingTransform.cpp +++ b/src/Processors/Transforms/ExceptionKeepingTransform.cpp @@ -21,8 +21,13 @@ ExceptionKeepingTransform::ExceptionKeepingTransform(const Block & in_header, co IProcessor::Status ExceptionKeepingTransform::prepare() { - if (!ignore_on_start_and_finish && !was_on_start_called) - return Status::Ready; + if (stage == Stage::Start) + { + if (ignore_on_start_and_finish) + stage = Stage::Consume; + else + return Status::Ready; + } /// Check can output. @@ -43,12 +48,19 @@ IProcessor::Status ExceptionKeepingTransform::prepare() return Status::PortFull; } - if (!ready_input) + if (stage == Stage::Generate) + return Status::Ready; + + while (!ready_input) { if (input.isFinished()) { - if (!ignore_on_start_and_finish && !was_on_finish_called && !has_exception) - return Status::Ready; + if (stage != Stage::Exception && stage != Stage::Finish) + { + stage = Stage::Finish; + if (!ignore_on_start_and_finish) + return Status::Ready; + } output.finish(); return Status::Finished; @@ -63,12 +75,13 @@ IProcessor::Status ExceptionKeepingTransform::prepare() if (data.exception) { - has_exception = true; + stage = Stage::Exception; + onException(); output.pushData(std::move(data)); return Status::PortFull; } - if (has_exception) + if (stage == Stage::Exception) /// In case of exception, just drop all other data. /// If transform is stateful, it's state may be broken after exception from transform() data.chunk.clear(); @@ -117,40 +130,66 @@ static std::exception_ptr runStep(std::function step, ThreadStatus * thr void ExceptionKeepingTransform::work() { - if (!ignore_on_start_and_finish && !was_on_start_called) + if (stage == Stage::Start) { - was_on_start_called = true; + stage = Stage::Consume; if (auto exception = runStep([this] { onStart(); }, thread_status, elapsed_counter_ms)) { - has_exception = true; + stage = Stage::Exception; ready_output = true; data.exception = std::move(exception); + onException(); } } - else if (ready_input) + else if (stage == Stage::Consume || stage == Stage::Generate) { - ready_input = false; - - if (auto exception = runStep([this] { transform(data.chunk); }, thread_status, elapsed_counter_ms)) + if (stage == Stage::Consume) { - has_exception = true; - data.chunk.clear(); - data.exception = std::move(exception); + ready_input = false; + + if (auto exception = runStep([this] { onConsume(std::move(data.chunk)); }, thread_status, elapsed_counter_ms)) + { + stage = Stage::Exception; + ready_output = true; + data.exception = std::move(exception); + onException(); + } + else + stage = Stage::Generate; } - if (data.chunk || data.exception) - ready_output = true; - } - else if (!ignore_on_start_and_finish && !was_on_finish_called) - { - was_on_finish_called = true; + if (stage == Stage::Generate) + { + GenerateResult res; + if (auto exception = runStep([this, &res] { res = onGenerate(); }, thread_status, elapsed_counter_ms)) + { + stage = Stage::Exception; + ready_output = true; + data.exception = std::move(exception); + onException(); + } + else + { + if (res.chunk) + { + data.chunk = std::move(res.chunk); + ready_output = true; + } + if (res.is_done) + stage = Stage::Consume; + } + } + } + else if (stage == Stage::Finish) + { if (auto exception = runStep([this] { onFinish(); }, thread_status, elapsed_counter_ms)) { - has_exception = true; + stage = Stage::Exception; ready_output = true; data.exception = std::move(exception); + onException(); } } } diff --git a/src/Processors/Transforms/ExceptionKeepingTransform.h b/src/Processors/Transforms/ExceptionKeepingTransform.h index 867f13bf53a..e2bc161971e 100644 --- a/src/Processors/Transforms/ExceptionKeepingTransform.h +++ b/src/Processors/Transforms/ExceptionKeepingTransform.h @@ -28,18 +28,31 @@ protected: OutputPort & output; Port::Data data; + enum class Stage + { + Start, + Consume, + Generate, + Finish, + Exception, + }; + + Stage stage = Stage::Start; bool ready_input = false; bool ready_output = false; - bool has_exception = false; - const bool ignore_on_start_and_finish = true; - bool was_on_start_called = false; - bool was_on_finish_called = false; -//protected: - virtual void transform(Chunk & chunk) = 0; + struct GenerateResult + { + Chunk chunk; + bool is_done = true; + }; + virtual void onStart() {} + virtual void onConsume(Chunk chunk) = 0; + virtual GenerateResult onGenerate() = 0; virtual void onFinish() {} + virtual void onException() {} public: ExceptionKeepingTransform(const Block & in_header, const Block & out_header, bool ignore_on_start_and_finish_ = true); diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index ca788f1dd9f..0d3341b000c 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -31,7 +31,7 @@ ConvertingTransform::ConvertingTransform(const Block & header_, ExpressionAction { } -void ConvertingTransform::transform(Chunk & chunk) +void ConvertingTransform::onConsume(Chunk chunk) { size_t num_rows = chunk.getNumRows(); auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); @@ -39,6 +39,7 @@ void ConvertingTransform::transform(Chunk & chunk) expression->execute(block, num_rows); chunk.setColumns(block.getColumns(), num_rows); + cur_chunk = std::move(chunk); } } diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h index a76dc733e14..ea73c8fb1da 100644 --- a/src/Processors/Transforms/ExpressionTransform.h +++ b/src/Processors/Transforms/ExpressionTransform.h @@ -43,10 +43,17 @@ public: String getName() const override { return "ConvertingTransform"; } protected: - void transform(Chunk & chunk) override; + void onConsume(Chunk chunk) override; + GenerateResult onGenerate() override + { + GenerateResult res; + res.chunk = std::move(cur_chunk); + return res; + } private: ExpressionActionsPtr expression; + Chunk cur_chunk; }; } diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index abf416e8047..1eaa5458d37 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -12,7 +13,7 @@ void MaterializingTransform::transform(Chunk & chunk) auto columns = chunk.detachColumns(); for (auto & col : columns) - col = col->convertToFullColumnIfConst(); + col = recursiveRemoveSparse(col->convertToFullColumnIfConst()); chunk.setColumns(std::move(columns), num_rows); } diff --git a/src/Processors/Transforms/PostgreSQLSource.cpp b/src/Processors/Transforms/PostgreSQLSource.cpp index ac8408d8338..88f092a2533 100644 --- a/src/Processors/Transforms/PostgreSQLSource.cpp +++ b/src/Processors/Transforms/PostgreSQLSource.cpp @@ -74,7 +74,17 @@ template void PostgreSQLSource::onStart() { if (!tx) - tx = std::make_shared(connection_holder->get()); + { + try + { + tx = std::make_shared(connection_holder->get()); + } + catch (const pqxx::broken_connection &) + { + connection_holder->update(); + tx = std::make_shared(connection_holder->get()); + } + } stream = std::make_unique(*tx, pqxx::from_query, std::string_view(query_str)); } diff --git a/src/Processors/Transforms/SortingTransform.cpp b/src/Processors/Transforms/SortingTransform.cpp index 7bdc927d0d8..8fa9d7adb84 100644 --- a/src/Processors/Transforms/SortingTransform.cpp +++ b/src/Processors/Transforms/SortingTransform.cpp @@ -31,6 +31,11 @@ MergeSorter::MergeSorter(Chunks chunks_, SortDescription & description_, size_t if (chunk.getNumRows() == 0) continue; + /// Convert to full column, because sparse column has + /// access to element in O(log(K)), where K is number of non-default rows, + /// which can be inefficient. + convertToFullIfSparse(chunk); + cursors.emplace_back(chunk.getColumns(), description); has_collation |= cursors.back().has_collation; diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index f5aef01463a..908f6c0ff34 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -11,14 +11,22 @@ SquashingChunksTransform::SquashingChunksTransform( { } -void SquashingChunksTransform::transform(Chunk & chunk) +void SquashingChunksTransform::onConsume(Chunk chunk) { if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) { - chunk.setColumns(block.getColumns(), block.rows()); + cur_chunk.setColumns(block.getColumns(), block.rows()); } } +SquashingChunksTransform::GenerateResult SquashingChunksTransform::onGenerate() +{ + GenerateResult res; + res.chunk = std::move(cur_chunk); + res.is_done = true; + return res; +} + void SquashingChunksTransform::onFinish() { auto block = squashing.add({}); @@ -27,7 +35,7 @@ void SquashingChunksTransform::onFinish() void SquashingChunksTransform::work() { - if (has_exception) + if (stage == Stage::Exception) { data.chunk.clear(); ready_input = false; diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index bf4a051891b..531efe0d6a2 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -17,12 +17,14 @@ public: void work() override; protected: - void transform(Chunk & chunk) override; + void onConsume(Chunk chunk) override; + GenerateResult onGenerate() override; void onFinish() override; private: SquashingTransform squashing; + Chunk cur_chunk; Chunk finish_chunk; }; diff --git a/src/Processors/Transforms/TTLCalcTransform.h b/src/Processors/Transforms/TTLCalcTransform.h index 14592c07155..495879400dc 100644 --- a/src/Processors/Transforms/TTLCalcTransform.h +++ b/src/Processors/Transforms/TTLCalcTransform.h @@ -6,7 +6,7 @@ #include #include -#include +#include namespace DB { diff --git a/src/Processors/Transforms/TTLTransform.cpp b/src/Processors/Transforms/TTLTransform.cpp index a515a50fafb..7d0da3dca91 100644 --- a/src/Processors/Transforms/TTLTransform.cpp +++ b/src/Processors/Transforms/TTLTransform.cpp @@ -105,6 +105,7 @@ void TTLTransform::consume(Chunk chunk) return; } + convertToFullIfSparse(chunk); auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); for (const auto & algorithm : algorithms) diff --git a/src/Processors/Transforms/TTLTransform.h b/src/Processors/Transforms/TTLTransform.h index 9207c68448b..3f0dffd1998 100644 --- a/src/Processors/Transforms/TTLTransform.h +++ b/src/Processors/Transforms/TTLTransform.h @@ -7,7 +7,7 @@ #include #include -#include +#include namespace DB { diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 7a3bb25d2c6..0da7541556b 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -385,7 +385,7 @@ void WindowTransform::advancePartitionEnd() // prev_frame_start, partition_end); size_t i = 0; - for (; i < partition_by_columns; i++) + for (; i < partition_by_columns; ++i) { const auto * reference_column = inputAt(prev_frame_start)[partition_by_indices[i]].get(); @@ -667,7 +667,7 @@ bool WindowTransform::arePeers(const RowNumber & x, const RowNumber & y) const } size_t i = 0; - for (; i < n; i++) + for (; i < n; ++i) { const auto * column_x = inputAt(x)[order_by_indices[i]].get(); const auto * column_y = inputAt(y)[order_by_indices[i]].get(); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 503f34593c7..82c2a337a45 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -83,11 +83,26 @@ public: String getName() const override { return "ExecutingInnerQueryFromView"; } protected: - void transform(Chunk & chunk) override; + void onConsume(Chunk chunk) override; + GenerateResult onGenerate() override; private: ViewsDataPtr views_data; ViewRuntimeData & view; + + struct State + { + QueryPipeline pipeline; + PullingPipelineExecutor executor; + + explicit State(QueryPipeline pipeline_) + : pipeline(std::move(pipeline_)) + , executor(pipeline) + { + } + }; + + std::optional state; }; /// Insert into LiveView. @@ -389,7 +404,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static void process(Block & block, ViewRuntimeData & view, const ViewsData & views_data) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) { const auto & context = views_data.context; @@ -400,7 +415,7 @@ static void process(Block & block, ViewRuntimeData & view, const ViewsData & vie local_context->addViewSource(StorageValues::create( views_data.source_storage_id, views_data.source_metadata_snapshot->getColumns(), - block, + std::move(block), views_data.source_storage->getVirtuals())); /// We need keep InterpreterSelectQuery, until the processing will be finished, since: @@ -443,16 +458,7 @@ static void process(Block & block, ViewRuntimeData & view, const ViewsData & vie callback(progress); }); - auto query_pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); - PullingPipelineExecutor executor(query_pipeline); - if (!executor.pull(block)) - { - block.clear(); - return; - } - - if (executor.pull(block)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Single chunk is expected from view inner query {}", view.query); + return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } static void logQueryViews(std::list & views, ContextPtr context) @@ -550,14 +556,33 @@ ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( { } -void ExecutingInnerQueryFromViewTransform::transform(Chunk & chunk) +void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); - process(block, view, *views_data); - chunk.setColumns(block.getColumns(), block.rows()); + state.emplace(process(block, view, *views_data)); } +ExecutingInnerQueryFromViewTransform::GenerateResult ExecutingInnerQueryFromViewTransform::onGenerate() +{ + GenerateResult res; + if (!state.has_value()) + return res; + + res.is_done = false; + while (!res.is_done) + { + res.is_done = !state->executor.pull(res.chunk); + if (res.chunk) + break; + } + + if (res.is_done) + state.reset(); + + return res; +} + PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_) : SinkToStorage(header) , live_view(live_view_) diff --git a/src/QueryPipeline/ConnectionCollector.cpp b/src/QueryPipeline/ConnectionCollector.cpp index a6a0afb68d3..c2cdd1a1133 100644 --- a/src/QueryPipeline/ConnectionCollector.cpp +++ b/src/QueryPipeline/ConnectionCollector.cpp @@ -46,7 +46,7 @@ struct AsyncDrainTask std::shared_ptr shared_connections; void operator()() const { - ConnectionCollector::drainConnections(*shared_connections); + ConnectionCollector::drainConnections(*shared_connections, /* throw_error= */ false); } // We don't have std::unique_function yet. Wrap it in shared_ptr to make the functor copyable. @@ -71,7 +71,7 @@ std::shared_ptr ConnectionCollector::enqueueConnectionCleanup( return connections; } -void ConnectionCollector::drainConnections(IConnections & connections) noexcept +void ConnectionCollector::drainConnections(IConnections & connections, bool throw_error) { bool is_drained = false; try @@ -90,6 +90,9 @@ void ConnectionCollector::drainConnections(IConnections & connections) noexcept break; default: + /// Connection should be closed in case of unknown packet, + /// since this means that the connection in some bad state. + is_drained = false; throw Exception( ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from one of the following replicas: {}", @@ -111,6 +114,9 @@ void ConnectionCollector::drainConnections(IConnections & connections) noexcept tryLogCurrentException(&Poco::Logger::get("ConnectionCollector"), __PRETTY_FUNCTION__); } } + + if (throw_error) + throw; } } diff --git a/src/QueryPipeline/ConnectionCollector.h b/src/QueryPipeline/ConnectionCollector.h index 5b6e82d000e..44482607277 100644 --- a/src/QueryPipeline/ConnectionCollector.h +++ b/src/QueryPipeline/ConnectionCollector.h @@ -17,7 +17,7 @@ public: static ConnectionCollector & init(ContextMutablePtr global_context_, size_t max_threads); static std::shared_ptr enqueueConnectionCleanup(const ConnectionPoolWithFailoverPtr & pool, std::shared_ptr connections) noexcept; - static void drainConnections(IConnections & connections) noexcept; + static void drainConnections(IConnections & connections, bool throw_error); private: explicit ConnectionCollector(ContextMutablePtr global_context_, size_t max_threads); diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 653d9a2bbf8..142e56ceb25 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -495,14 +495,26 @@ void RemoteQueryExecutor::finish(std::unique_ptr * read_context) /// Send the request to abort the execution of the request, if not already sent. tryCancel("Cancelling query because enough data has been read", read_context); - /// Try to drain connections asynchronously. - if (auto conn = ConnectionCollector::enqueueConnectionCleanup(pool, connections)) + + if (context->getSettingsRef().drain_timeout != Poco::Timespan(-1000000)) { - /// Drain connections synchronously. + auto connections_left = ConnectionCollector::enqueueConnectionCleanup(pool, connections); + if (connections_left) + { + /// Drain connections synchronously and suppress errors. + CurrentMetrics::Increment metric_increment(CurrentMetrics::ActiveSyncDrainedConnections); + ConnectionCollector::drainConnections(*connections_left, /* throw_error= */ false); + CurrentMetrics::add(CurrentMetrics::SyncDrainedConnections, 1); + } + } + else + { + /// Drain connections synchronously w/o suppressing errors. CurrentMetrics::Increment metric_increment(CurrentMetrics::ActiveSyncDrainedConnections); - ConnectionCollector::drainConnections(*conn); + ConnectionCollector::drainConnections(*connections, /* throw_error= */ true); CurrentMetrics::add(CurrentMetrics::SyncDrainedConnections, 1); } + finished = true; } diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index d5603fd2281..655bd5603de 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -227,7 +227,7 @@ private: void processMergeTreeReadTaskRequest(PartitionReadRequest request); - /// Cancell query and restart it with info about duplicated UUIDs + /// Cancel query and restart it with info about duplicate UUIDs /// only for `allow_experimental_query_deduplication`. std::variant restartQueryWithoutDuplicatedUUIDs(std::unique_ptr * read_context = nullptr); diff --git a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp index fb10601216e..e19d2c7114b 100644 --- a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp +++ b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp @@ -16,7 +16,7 @@ static Block getBlockWithSize(const std::vector & columns, size_t r ColumnsWithTypeAndName cols; size_t size_of_row_in_bytes = columns.size() * sizeof(UInt64); - for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; i++) + for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; ++i) { auto column = ColumnUInt64::create(rows, 0); for (size_t j = 0; j < rows; ++j) diff --git a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp index 751f7ef8635..7b30958f0c4 100644 --- a/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp +++ b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp @@ -29,7 +29,7 @@ static Block getSortedBlockWithSize( { ColumnsWithTypeAndName cols; size_t size_of_row_in_bytes = columns.size() * sizeof(UInt64); - for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; i++) + for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; ++i) { auto column = ColumnUInt64::create(rows, 0); for (size_t j = 0; j < rows; ++j) @@ -47,7 +47,7 @@ static Block getUnSortedBlockWithSize(const std::vector & columns, { ColumnsWithTypeAndName cols; size_t size_of_row_in_bytes = columns.size() * sizeof(UInt64); - for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; i++) + for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; ++i) { auto column = ColumnUInt64::create(rows, 0); for (size_t j = 0; j < rows; ++j) @@ -71,7 +71,7 @@ static Block getEqualValuesBlockWithSize( { ColumnsWithTypeAndName cols; size_t size_of_row_in_bytes = columns.size() * sizeof(UInt64); - for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; i++) + for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; ++i) { auto column = ColumnUInt64::create(rows, 0); for (size_t j = 0; j < rows; ++j) diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 096194455b1..589bdd63f41 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -618,7 +619,11 @@ namespace ASTInsertQuery * insert_query = nullptr; String input_format; String input_data_delimiter; + PODArray output; String output_format; + CompressionMethod compression_method = CompressionMethod::None; + int compression_level = 0; + uint64_t interactive_delay = 100000; bool send_exception_with_stacktrace = true; bool input_function_is_used = false; @@ -635,8 +640,10 @@ namespace bool responder_finished = false; bool cancelled = false; - std::optional read_buffer; - std::optional write_buffer; + std::unique_ptr read_buffer; + std::unique_ptr write_buffer; + WriteBufferFromVector> * nested_write_buffer = nullptr; + WriteBuffer * compressing_write_buffer = nullptr; std::unique_ptr pipeline; std::unique_ptr pipeline_executor; std::shared_ptr output_format_processor; @@ -818,6 +825,10 @@ namespace if (output_format.empty()) output_format = query_context->getDefaultFormat(); + /// Choose compression. + compression_method = chooseCompressionMethod("", query_info.compression_type()); + compression_level = query_info.compression_level(); + /// Set callback to create and fill external tables query_context->setExternalTablesInitializer([this] (ContextPtr context) { @@ -891,7 +902,7 @@ namespace void Call::initializeBlockInputStream(const Block & header) { assert(!read_buffer); - read_buffer.emplace([this]() -> std::pair + read_buffer = std::make_unique([this]() -> std::pair { if (need_input_data_from_insert_query) { @@ -947,6 +958,8 @@ namespace return {nullptr, 0}; /// no more input data }); + read_buffer = wrapReadBufferWithCompressionMethod(std::move(read_buffer), compression_method); + assert(!pipeline); auto source = query_context->getInputFormat( input_format, *read_buffer, header, query_context->getSettings().max_insert_block_size); @@ -1030,7 +1043,10 @@ namespace /// The data will be written directly to the table. auto metadata_snapshot = storage->getInMemoryMetadataPtr(); auto sink = storage->write(ASTPtr(), metadata_snapshot, query_context); - ReadBufferFromMemory data(external_table.data().data(), external_table.data().size()); + + std::unique_ptr buf = std::make_unique(external_table.data().data(), external_table.data().size()); + buf = wrapReadBufferWithCompressionMethod(std::move(buf), chooseCompressionMethod("", external_table.compression_type())); + String format = external_table.format(); if (format.empty()) format = "TabSeparated"; @@ -1047,7 +1063,7 @@ namespace external_table_context->applySettingsChanges(settings_changes); } auto in = external_table_context->getInputFormat( - format, data, metadata_snapshot->getSampleBlock(), + format, *buf, metadata_snapshot->getSampleBlock(), external_table_context->getSettings().max_insert_block_size); QueryPipelineBuilder cur_pipeline; @@ -1101,7 +1117,18 @@ namespace if (io.pipeline.pulling()) header = io.pipeline.getHeader(); - write_buffer.emplace(*result.mutable_output()); + if (compression_method != CompressionMethod::None) + output.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data. + write_buffer = std::make_unique>>(output); + nested_write_buffer = static_cast> *>(write_buffer.get()); + if (compression_method != CompressionMethod::None) + { + write_buffer = wrapWriteBufferWithCompressionMethod(std::move(write_buffer), compression_method, compression_level); + compressing_write_buffer = write_buffer.get(); + } + + auto has_output = [&] { return (nested_write_buffer->position() != output.data()) || (compressing_write_buffer && compressing_write_buffer->offset()); }; + output_format_processor = query_context->getOutputFormat(output_format, *write_buffer, header); Stopwatch after_send_progress; @@ -1143,8 +1170,7 @@ namespace addLogsToResult(); - bool has_output = write_buffer->offset(); - if (has_output || result.has_progress() || result.logs_size()) + if (has_output() || result.has_progress() || result.logs_size()) sendResult(); throwIfFailedToSendResult(); @@ -1164,13 +1190,11 @@ namespace auto executor = std::make_shared(io.pipeline); auto callback = [&]() -> bool { - throwIfFailedToSendResult(); addProgressToResult(); addLogsToResult(); - bool has_output = write_buffer->offset(); - if (has_output || result.has_progress() || result.logs_size()) + if (has_output() || result.has_progress() || result.logs_size()) sendResult(); throwIfFailedToSendResult(); @@ -1260,6 +1284,8 @@ namespace /// immediately after it receives our final result, and it's prohibited to have /// two queries executed at the same time with the same query ID or session ID. io.process_list_entry.reset(); + if (query_context) + query_context->setProcessListElement(nullptr); if (session) session->releaseSessionID(); } @@ -1272,6 +1298,8 @@ namespace output_format_processor.reset(); read_buffer.reset(); write_buffer.reset(); + nested_write_buffer = nullptr; + compressing_write_buffer = nullptr; io = {}; query_scope.reset(); query_context.reset(); @@ -1390,10 +1418,17 @@ namespace if (!totals) return; - WriteBufferFromString buf{*result.mutable_totals()}; - auto format = query_context->getOutputFormat(output_format, buf, totals); + PODArray memory; + if (compression_method != CompressionMethod::None) + memory.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data. + std::unique_ptr buf = std::make_unique>>(memory); + buf = wrapWriteBufferWithCompressionMethod(std::move(buf), compression_method, compression_level); + auto format = query_context->getOutputFormat(output_format, *buf, totals); format->write(materializeBlock(totals)); format->finalize(); + buf->finalize(); + + result.mutable_totals()->assign(memory.data(), memory.size()); } void Call::addExtremesToResult(const Block & extremes) @@ -1401,10 +1436,17 @@ namespace if (!extremes) return; - WriteBufferFromString buf{*result.mutable_extremes()}; - auto format = query_context->getOutputFormat(output_format, buf, extremes); + PODArray memory; + if (compression_method != CompressionMethod::None) + memory.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data. + std::unique_ptr buf = std::make_unique>>(memory); + buf = wrapWriteBufferWithCompressionMethod(std::move(buf), compression_method, compression_level); + auto format = query_context->getOutputFormat(output_format, *buf, extremes); format->write(materializeBlock(extremes)); format->finalize(); + buf->finalize(); + + result.mutable_extremes()->assign(memory.data(), memory.size()); } void Call::addProfileInfoToResult(const ProfileInfo & info) @@ -1475,6 +1517,38 @@ namespace if (!send_final_message && !isOutputStreaming(call_type)) return; + /// Copy output to `result.output`, with optional compressing. + if (write_buffer) + { + size_t output_size; + if (send_final_message) + { + if (compressing_write_buffer) + LOG_DEBUG(log, "Compressing final {} bytes", compressing_write_buffer->offset()); + write_buffer->finalize(); + output_size = output.size(); + } + else + { + if (compressing_write_buffer && compressing_write_buffer->offset()) + { + LOG_DEBUG(log, "Compressing {} bytes", compressing_write_buffer->offset()); + compressing_write_buffer->sync(); + } + output_size = nested_write_buffer->position() - output.data(); + } + + if (output_size) + { + result.mutable_output()->assign(output.data(), output_size); + nested_write_buffer->restart(); /// We're going to reuse the same buffer again for next block of data. + } + } + + if (!send_final_message && result.output().empty() && result.totals().empty() && result.extremes().empty() && !result.logs_size() + && !result.has_progress() && !result.has_stats() && !result.has_exception() && !result.cancelled()) + return; /// Nothing to send. + /// Wait for previous write to finish. /// (gRPC doesn't allow to start sending another result while the previous is still being sending.) if (sending_result.get()) @@ -1488,9 +1562,6 @@ namespace /// Start sending the result. LOG_DEBUG(log, "Sending {} result to the client: {}", (send_final_message ? "final" : "intermediate"), getResultDescription(result)); - if (write_buffer) - write_buffer->finalize(); - sending_result.set(true); auto callback = [this](bool ok) { @@ -1511,8 +1582,6 @@ namespace /// gRPC has already retrieved all data from `result`, so we don't have to keep it. result.Clear(); - if (write_buffer) - write_buffer->restart(); if (send_final_message) { diff --git a/src/Server/GRPCServer.h b/src/Server/GRPCServer.h index 25c3813c11d..e2b48f1c16b 100644 --- a/src/Server/GRPCServer.h +++ b/src/Server/GRPCServer.h @@ -4,6 +4,7 @@ #if USE_GRPC #include +#include #include "clickhouse_grpc.grpc.pb.h" namespace Poco { class Logger; } @@ -30,6 +31,9 @@ public: /// Stops the server. No new connections will be accepted. void stop(); + /// Returns the port this server is listening to. + UInt16 portNumber() const { return address_to_listen.port(); } + /// Returns the number of currently handled connections. size_t currentConnections() const; diff --git a/src/Server/HTTP/HTMLForm.cpp b/src/Server/HTTP/HTMLForm.cpp index 86e08f3c8e7..d9d897d20c4 100644 --- a/src/Server/HTTP/HTMLForm.cpp +++ b/src/Server/HTTP/HTMLForm.cpp @@ -183,8 +183,8 @@ void HTMLForm::readMultipart(ReadBuffer & in_, PartHandler & handler) size_t fields = 0; MultipartReadBuffer in(in_, boundary); - /// Assume there is at least one part - in.skipToNextBoundary(); + if (!in.skipToNextBoundary()) + throw Poco::Net::HTMLFormException("No boundary line found"); /// Read each part until next boundary (or last boundary) while (!in.eof()) @@ -241,7 +241,9 @@ HTMLForm::MultipartReadBuffer::MultipartReadBuffer(ReadBuffer & in_, const std:: bool HTMLForm::MultipartReadBuffer::skipToNextBoundary() { - assert(working_buffer.empty() || eof()); + if (in.eof()) + return false; + assert(boundary_hit); boundary_hit = false; @@ -257,7 +259,7 @@ bool HTMLForm::MultipartReadBuffer::skipToNextBoundary() } } - throw Poco::Net::HTMLFormException("No boundary line found"); + return false; } std::string HTMLForm::MultipartReadBuffer::readLine(bool append_crlf) diff --git a/src/Server/HTTP/HTTPServer.cpp b/src/Server/HTTP/HTTPServer.cpp index 42e6467d0af..2e91fad1c0f 100644 --- a/src/Server/HTTP/HTTPServer.cpp +++ b/src/Server/HTTP/HTTPServer.cpp @@ -5,31 +5,13 @@ namespace DB { -HTTPServer::HTTPServer( - ContextPtr context, - HTTPRequestHandlerFactoryPtr factory_, - UInt16 port_number, - Poco::Net::HTTPServerParams::Ptr params) - : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), port_number, params), factory(factory_) -{ -} - -HTTPServer::HTTPServer( - ContextPtr context, - HTTPRequestHandlerFactoryPtr factory_, - const Poco::Net::ServerSocket & socket, - Poco::Net::HTTPServerParams::Ptr params) - : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), socket, params), factory(factory_) -{ -} - HTTPServer::HTTPServer( ContextPtr context, HTTPRequestHandlerFactoryPtr factory_, Poco::ThreadPool & thread_pool, - const Poco::Net::ServerSocket & socket, + Poco::Net::ServerSocket & socket_, Poco::Net::HTTPServerParams::Ptr params) - : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), thread_pool, socket, params), factory(factory_) + : TCPServer(new HTTPServerConnectionFactory(context, params, factory_), thread_pool, socket_, params), factory(factory_) { } diff --git a/src/Server/HTTP/HTTPServer.h b/src/Server/HTTP/HTTPServer.h index 3518fd66d20..07ad54d267f 100644 --- a/src/Server/HTTP/HTTPServer.h +++ b/src/Server/HTTP/HTTPServer.h @@ -1,9 +1,9 @@ #pragma once #include +#include #include -#include #include @@ -13,26 +13,14 @@ namespace DB class Context; -class HTTPServer : public Poco::Net::TCPServer +class HTTPServer : public TCPServer { public: explicit HTTPServer( - ContextPtr context, - HTTPRequestHandlerFactoryPtr factory, - UInt16 port_number = 80, - Poco::Net::HTTPServerParams::Ptr params = new Poco::Net::HTTPServerParams); - - HTTPServer( - ContextPtr context, - HTTPRequestHandlerFactoryPtr factory, - const Poco::Net::ServerSocket & socket, - Poco::Net::HTTPServerParams::Ptr params); - - HTTPServer( ContextPtr context, HTTPRequestHandlerFactoryPtr factory, Poco::ThreadPool & thread_pool, - const Poco::Net::ServerSocket & socket, + Poco::Net::ServerSocket & socket, Poco::Net::HTTPServerParams::Ptr params); ~HTTPServer() override; diff --git a/src/Server/HTTP/HTTPServerConnection.cpp b/src/Server/HTTP/HTTPServerConnection.cpp index de81da20ead..7020b8e9a23 100644 --- a/src/Server/HTTP/HTTPServerConnection.cpp +++ b/src/Server/HTTP/HTTPServerConnection.cpp @@ -1,4 +1,5 @@ #include +#include #include @@ -7,10 +8,11 @@ namespace DB HTTPServerConnection::HTTPServerConnection( ContextPtr context_, + TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket, Poco::Net::HTTPServerParams::Ptr params_, HTTPRequestHandlerFactoryPtr factory_) - : TCPServerConnection(socket), context(Context::createCopy(context_)), params(params_), factory(factory_), stopped(false) + : TCPServerConnection(socket), context(Context::createCopy(context_)), tcp_server(tcp_server_), params(params_), factory(factory_), stopped(false) { poco_check_ptr(factory); } @@ -20,12 +22,12 @@ void HTTPServerConnection::run() std::string server = params->getSoftwareVersion(); Poco::Net::HTTPServerSession session(socket(), params); - while (!stopped && session.hasMoreRequests()) + while (!stopped && tcp_server.isOpen() && session.hasMoreRequests()) { try { std::unique_lock lock(mutex); - if (!stopped) + if (!stopped && tcp_server.isOpen()) { HTTPServerResponse response(session); HTTPServerRequest request(context, response, session); @@ -48,6 +50,11 @@ void HTTPServerConnection::run() response.set("Server", server); try { + if (!tcp_server.isOpen()) + { + sendErrorResponse(session, Poco::Net::HTTPResponse::HTTP_SERVICE_UNAVAILABLE); + break; + } std::unique_ptr handler(factory->createRequestHandler(request)); if (handler) diff --git a/src/Server/HTTP/HTTPServerConnection.h b/src/Server/HTTP/HTTPServerConnection.h index 1c7ae6cd2b7..db3969f6ffb 100644 --- a/src/Server/HTTP/HTTPServerConnection.h +++ b/src/Server/HTTP/HTTPServerConnection.h @@ -9,12 +9,14 @@ namespace DB { +class TCPServer; class HTTPServerConnection : public Poco::Net::TCPServerConnection { public: HTTPServerConnection( ContextPtr context, + TCPServer & tcp_server, const Poco::Net::StreamSocket & socket, Poco::Net::HTTPServerParams::Ptr params, HTTPRequestHandlerFactoryPtr factory); @@ -26,6 +28,7 @@ protected: private: ContextPtr context; + TCPServer & tcp_server; Poco::Net::HTTPServerParams::Ptr params; HTTPRequestHandlerFactoryPtr factory; bool stopped; diff --git a/src/Server/HTTP/HTTPServerConnectionFactory.cpp b/src/Server/HTTP/HTTPServerConnectionFactory.cpp index 0e4fb6cfcec..008da222c79 100644 --- a/src/Server/HTTP/HTTPServerConnectionFactory.cpp +++ b/src/Server/HTTP/HTTPServerConnectionFactory.cpp @@ -11,9 +11,9 @@ HTTPServerConnectionFactory::HTTPServerConnectionFactory( poco_check_ptr(factory); } -Poco::Net::TCPServerConnection * HTTPServerConnectionFactory::createConnection(const Poco::Net::StreamSocket & socket) +Poco::Net::TCPServerConnection * HTTPServerConnectionFactory::createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) { - return new HTTPServerConnection(context, socket, params, factory); + return new HTTPServerConnection(context, tcp_server, socket, params, factory); } } diff --git a/src/Server/HTTP/HTTPServerConnectionFactory.h b/src/Server/HTTP/HTTPServerConnectionFactory.h index 3f11eca0f69..a19dc6d4d5c 100644 --- a/src/Server/HTTP/HTTPServerConnectionFactory.h +++ b/src/Server/HTTP/HTTPServerConnectionFactory.h @@ -2,19 +2,19 @@ #include #include +#include #include -#include namespace DB { -class HTTPServerConnectionFactory : public Poco::Net::TCPServerConnectionFactory +class HTTPServerConnectionFactory : public TCPServerConnectionFactory { public: HTTPServerConnectionFactory(ContextPtr context, Poco::Net::HTTPServerParams::Ptr params, HTTPRequestHandlerFactoryPtr factory); - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override; + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override; private: ContextPtr context; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 384799c4687..673edfb6719 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -719,9 +719,16 @@ void HTTPHandler::processQuery( context->checkSettingsConstraints(settings_changes); context->applySettingsChanges(settings_changes); - // Set the query id supplied by the user, if any, and also update the OpenTelemetry fields. + /// Set the query id supplied by the user, if any, and also update the OpenTelemetry fields. context->setCurrentQueryId(params.get("query_id", request.get("X-ClickHouse-Query-Id", ""))); + /// Initialize query scope, once query_id is initialized. + /// (To track as much allocations as possible) + query_scope.emplace(context); + + /// NOTE: this may create pretty huge allocations that will not be accounted in trace_log, + /// because memory_profiler_sample_probability/memory_profiler_step are not applied yet, + /// they will be applied in ProcessList::insert() from executeQuery() itself. const auto & query = getQuery(request, params, context); std::unique_ptr in_param = std::make_unique(query); in = has_external_data ? std::move(in_param) : std::make_unique(*in_param, *in_post_maybe_compressed); @@ -769,7 +776,7 @@ void HTTPHandler::processQuery( if (settings.readonly > 0 && settings.cancel_http_readonly_queries_on_client_close) { - append_callback([context = context, &request](const Progress &) + append_callback([&context, &request](const Progress &) { /// Assume that at the point this method is called no one is reading data from the socket any more: /// should be true for read-only queries. @@ -780,8 +787,6 @@ void HTTPHandler::processQuery( customizeContext(request, context); - query_scope.emplace(context); - executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context, [&response] (const String & current_query_id, const String & content_type, const String & format, const String & timezone) { diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 67abd6db13a..0c5d7d93689 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -557,6 +557,8 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response std::lock_guard lock(conn_stats_mutex); conn_stats.updateLatency(elapsed); } + + operations.erase(response->xid); keeper_dispatcher->updateKeeperStatLatency(elapsed); last_op.set(std::make_unique(LastOp{ diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index fb6541d1f53..f98b269b8be 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -93,7 +93,7 @@ private: Poco::Timestamp established; - using Operations = std::map; + using Operations = std::unordered_map; Operations operations; LastOpMultiVersion last_op; diff --git a/src/Server/KeeperTCPHandlerFactory.h b/src/Server/KeeperTCPHandlerFactory.h index 67bb3dab268..58dc73d7c27 100644 --- a/src/Server/KeeperTCPHandlerFactory.h +++ b/src/Server/KeeperTCPHandlerFactory.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include @@ -10,7 +10,7 @@ namespace DB { -class KeeperTCPHandlerFactory : public Poco::Net::TCPServerConnectionFactory +class KeeperTCPHandlerFactory : public TCPServerConnectionFactory { private: IServer & server; @@ -29,7 +29,7 @@ public: { } - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &) override { try { diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index deebc073ad5..2836ee05c30 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -62,10 +63,11 @@ static String showTableStatusReplacementQuery(const String & query); static String killConnectionIdReplacementQuery(const String & query); static String selectLimitReplacementQuery(const String & query); -MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, +MySQLHandler::MySQLHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_) : Poco::Net::TCPServerConnection(socket_) , server(server_) + , tcp_server(tcp_server_) , log(&Poco::Logger::get("MySQLHandler")) , connection_id(connection_id_) , auth_plugin(new MySQLProtocol::Authentication::Native41()) @@ -138,11 +140,14 @@ void MySQLHandler::run() OKPacket ok_packet(0, handshake_response.capability_flags, 0, 0, 0); packet_endpoint->sendPacket(ok_packet, true); - while (true) + while (tcp_server.isOpen()) { packet_endpoint->resetSequenceId(); MySQLPacketPayloadReadBuffer payload = packet_endpoint->getPayload(); + while (!in->poll(1000000)) + if (!tcp_server.isOpen()) + return; char command = 0; payload.readStrict(command); @@ -152,6 +157,8 @@ void MySQLHandler::run() LOG_DEBUG(log, "Received command: {}. Connection id: {}.", static_cast(static_cast(command)), connection_id); + if (!tcp_server.isOpen()) + return; try { switch (command) @@ -369,8 +376,8 @@ void MySQLHandler::finishHandshakeSSL( } #if USE_SSL -MySQLHandlerSSL::MySQLHandlerSSL(IServer & server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_, RSA & public_key_, RSA & private_key_) - : MySQLHandler(server_, socket_, ssl_enabled, connection_id_) +MySQLHandlerSSL::MySQLHandlerSSL(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_, RSA & public_key_, RSA & private_key_) + : MySQLHandler(server_, tcp_server_, socket_, ssl_enabled, connection_id_) , public_key(public_key_) , private_key(private_key_) {} diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 7ef212bf36e..3af5f7a0eb2 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -24,11 +24,14 @@ namespace CurrentMetrics namespace DB { +class ReadBufferFromPocoSocket; +class TCPServer; + /// Handler for MySQL wire protocol connections. Allows to connect to ClickHouse using MySQL client. class MySQLHandler : public Poco::Net::TCPServerConnection { public: - MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_); + MySQLHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_); void run() final; @@ -52,6 +55,7 @@ protected: virtual void finishHandshakeSSL(size_t packet_size, char * buf, size_t pos, std::function read_bytes, MySQLProtocol::ConnectionPhase::HandshakeResponse & packet); IServer & server; + TCPServer & tcp_server; Poco::Logger * log; UInt64 connection_id = 0; @@ -68,7 +72,7 @@ protected: Replacements replacements; std::unique_ptr auth_plugin; - std::shared_ptr in; + std::shared_ptr in; std::shared_ptr out; bool secure_connection = false; }; @@ -77,7 +81,7 @@ protected: class MySQLHandlerSSL : public MySQLHandler { public: - MySQLHandlerSSL(IServer & server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_, RSA & public_key_, RSA & private_key_); + MySQLHandlerSSL(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_, RSA & public_key_, RSA & private_key_); private: void authPluginSSL() override; diff --git a/src/Server/MySQLHandlerFactory.cpp b/src/Server/MySQLHandlerFactory.cpp index 7a0bfd8ab09..f7bb073e275 100644 --- a/src/Server/MySQLHandlerFactory.cpp +++ b/src/Server/MySQLHandlerFactory.cpp @@ -118,14 +118,14 @@ void MySQLHandlerFactory::generateRSAKeys() } #endif -Poco::Net::TCPServerConnection * MySQLHandlerFactory::createConnection(const Poco::Net::StreamSocket & socket) +Poco::Net::TCPServerConnection * MySQLHandlerFactory::createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) { size_t connection_id = last_connection_id++; LOG_TRACE(log, "MySQL connection. Id: {}. Address: {}", connection_id, socket.peerAddress().toString()); #if USE_SSL - return new MySQLHandlerSSL(server, socket, ssl_enabled, connection_id, *public_key, *private_key); + return new MySQLHandlerSSL(server, tcp_server, socket, ssl_enabled, connection_id, *public_key, *private_key); #else - return new MySQLHandler(server, socket, ssl_enabled, connection_id); + return new MySQLHandler(server, tcp_server, socket, ssl_enabled, connection_id); #endif } diff --git a/src/Server/MySQLHandlerFactory.h b/src/Server/MySQLHandlerFactory.h index 106fdfdf341..25f1af85273 100644 --- a/src/Server/MySQLHandlerFactory.h +++ b/src/Server/MySQLHandlerFactory.h @@ -1,9 +1,9 @@ #pragma once -#include #include #include #include +#include #include @@ -13,8 +13,9 @@ namespace DB { +class TCPServer; -class MySQLHandlerFactory : public Poco::Net::TCPServerConnectionFactory +class MySQLHandlerFactory : public TCPServerConnectionFactory { private: IServer & server; @@ -43,7 +44,7 @@ public: void generateRSAKeys(); - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override; + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override; }; } diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index fee4ace3452..9808b538280 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -6,6 +6,7 @@ #include #include "PostgreSQLHandler.h" #include +#include #include #include #include @@ -28,11 +29,13 @@ namespace ErrorCodes PostgreSQLHandler::PostgreSQLHandler( const Poco::Net::StreamSocket & socket_, IServer & server_, + TCPServer & tcp_server_, bool ssl_enabled_, Int32 connection_id_, std::vector> & auth_methods_) : Poco::Net::TCPServerConnection(socket_) , server(server_) + , tcp_server(tcp_server_) , ssl_enabled(ssl_enabled_) , connection_id(connection_id_) , authentication_manager(auth_methods_) @@ -60,11 +63,18 @@ void PostgreSQLHandler::run() if (!startup()) return; - while (true) + while (tcp_server.isOpen()) { message_transport->send(PostgreSQLProtocol::Messaging::ReadyForQuery(), true); + + constexpr size_t connection_check_timeout = 1; // 1 second + while (!in->poll(1000000 * connection_check_timeout)) + if (!tcp_server.isOpen()) + return; PostgreSQLProtocol::Messaging::FrontMessageType message_type = message_transport->receiveMessageType(); + if (!tcp_server.isOpen()) + return; switch (message_type) { case PostgreSQLProtocol::Messaging::FrontMessageType::QUERY: diff --git a/src/Server/PostgreSQLHandler.h b/src/Server/PostgreSQLHandler.h index 1d33f41f255..4fd08cc2606 100644 --- a/src/Server/PostgreSQLHandler.h +++ b/src/Server/PostgreSQLHandler.h @@ -18,8 +18,9 @@ namespace CurrentMetrics namespace DB { - +class ReadBufferFromPocoSocket; class Session; +class TCPServer; /** PostgreSQL wire protocol implementation. * For more info see https://www.postgresql.org/docs/current/protocol.html @@ -30,6 +31,7 @@ public: PostgreSQLHandler( const Poco::Net::StreamSocket & socket_, IServer & server_, + TCPServer & tcp_server_, bool ssl_enabled_, Int32 connection_id_, std::vector> & auth_methods_); @@ -40,12 +42,13 @@ private: Poco::Logger * log = &Poco::Logger::get("PostgreSQLHandler"); IServer & server; + TCPServer & tcp_server; std::unique_ptr session; bool ssl_enabled = false; Int32 connection_id = 0; Int32 secret_key = 0; - std::shared_ptr in; + std::shared_ptr in; std::shared_ptr out; std::shared_ptr message_transport; diff --git a/src/Server/PostgreSQLHandlerFactory.cpp b/src/Server/PostgreSQLHandlerFactory.cpp index 1158cf5835e..6f2124861e7 100644 --- a/src/Server/PostgreSQLHandlerFactory.cpp +++ b/src/Server/PostgreSQLHandlerFactory.cpp @@ -1,5 +1,4 @@ #include "PostgreSQLHandlerFactory.h" -#include #include #include @@ -17,11 +16,11 @@ PostgreSQLHandlerFactory::PostgreSQLHandlerFactory(IServer & server_) }; } -Poco::Net::TCPServerConnection * PostgreSQLHandlerFactory::createConnection(const Poco::Net::StreamSocket & socket) +Poco::Net::TCPServerConnection * PostgreSQLHandlerFactory::createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) { Int32 connection_id = last_connection_id++; LOG_TRACE(log, "PostgreSQL connection. Id: {}. Address: {}", connection_id, socket.peerAddress().toString()); - return new PostgreSQLHandler(socket, server, ssl_enabled, connection_id, auth_methods); + return new PostgreSQLHandler(socket, server, tcp_server, ssl_enabled, connection_id, auth_methods); } } diff --git a/src/Server/PostgreSQLHandlerFactory.h b/src/Server/PostgreSQLHandlerFactory.h index dc3d4047d2a..e9241da6f0e 100644 --- a/src/Server/PostgreSQLHandlerFactory.h +++ b/src/Server/PostgreSQLHandlerFactory.h @@ -1,16 +1,16 @@ #pragma once -#include #include #include #include +#include #include #include namespace DB { -class PostgreSQLHandlerFactory : public Poco::Net::TCPServerConnectionFactory +class PostgreSQLHandlerFactory : public TCPServerConnectionFactory { private: IServer & server; @@ -28,6 +28,6 @@ private: public: explicit PostgreSQLHandlerFactory(IServer & server_); - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override; + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & server) override; }; } diff --git a/src/Server/ProtocolServerAdapter.cpp b/src/Server/ProtocolServerAdapter.cpp index 6ec1ec572f7..b41ad2376f1 100644 --- a/src/Server/ProtocolServerAdapter.cpp +++ b/src/Server/ProtocolServerAdapter.cpp @@ -1,5 +1,5 @@ #include -#include +#include #if USE_GRPC #include @@ -11,20 +11,29 @@ namespace DB class ProtocolServerAdapter::TCPServerAdapterImpl : public Impl { public: - explicit TCPServerAdapterImpl(std::unique_ptr tcp_server_) : tcp_server(std::move(tcp_server_)) {} + explicit TCPServerAdapterImpl(std::unique_ptr tcp_server_) : tcp_server(std::move(tcp_server_)) {} ~TCPServerAdapterImpl() override = default; void start() override { tcp_server->start(); } void stop() override { tcp_server->stop(); } + bool isStopping() const override { return !tcp_server->isOpen(); } + UInt16 portNumber() const override { return tcp_server->portNumber(); } size_t currentConnections() const override { return tcp_server->currentConnections(); } size_t currentThreads() const override { return tcp_server->currentThreads(); } private: - std::unique_ptr tcp_server; + std::unique_ptr tcp_server; }; -ProtocolServerAdapter::ProtocolServerAdapter(const char * port_name_, std::unique_ptr tcp_server_) - : port_name(port_name_), impl(std::make_unique(std::move(tcp_server_))) +ProtocolServerAdapter::ProtocolServerAdapter( + const std::string & listen_host_, + const char * port_name_, + const std::string & description_, + std::unique_ptr tcp_server_) + : listen_host(listen_host_) + , port_name(port_name_) + , description(description_) + , impl(std::make_unique(std::move(tcp_server_))) { } @@ -36,16 +45,30 @@ public: ~GRPCServerAdapterImpl() override = default; void start() override { grpc_server->start(); } - void stop() override { grpc_server->stop(); } + void stop() override + { + is_stopping = true; + grpc_server->stop(); + } + bool isStopping() const override { return is_stopping; } + UInt16 portNumber() const override { return grpc_server->portNumber(); } size_t currentConnections() const override { return grpc_server->currentConnections(); } size_t currentThreads() const override { return grpc_server->currentThreads(); } private: std::unique_ptr grpc_server; + bool is_stopping = false; }; -ProtocolServerAdapter::ProtocolServerAdapter(const char * port_name_, std::unique_ptr grpc_server_) - : port_name(port_name_), impl(std::make_unique(std::move(grpc_server_))) +ProtocolServerAdapter::ProtocolServerAdapter( + const std::string & listen_host_, + const char * port_name_, + const std::string & description_, + std::unique_ptr grpc_server_) + : listen_host(listen_host_) + , port_name(port_name_) + , description(description_) + , impl(std::make_unique(std::move(grpc_server_))) { } #endif diff --git a/src/Server/ProtocolServerAdapter.h b/src/Server/ProtocolServerAdapter.h index 04c46b53356..9b3b1af0301 100644 --- a/src/Server/ProtocolServerAdapter.h +++ b/src/Server/ProtocolServerAdapter.h @@ -2,14 +2,14 @@ #include +#include #include #include -namespace Poco::Net { class TCPServer; } - namespace DB { class GRPCServer; +class TCPServer; /// Provides an unified interface to access a protocol implementing server /// no matter what type it has (HTTPServer, TCPServer, MySQLServer, GRPCServer, ...). @@ -19,10 +19,10 @@ class ProtocolServerAdapter public: ProtocolServerAdapter(ProtocolServerAdapter && src) = default; ProtocolServerAdapter & operator =(ProtocolServerAdapter && src) = default; - ProtocolServerAdapter(const char * port_name_, std::unique_ptr tcp_server_); + ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr tcp_server_); #if USE_GRPC - ProtocolServerAdapter(const char * port_name_, std::unique_ptr grpc_server_); + ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr grpc_server_); #endif /// Starts the server. A new thread will be created that waits for and accepts incoming connections. @@ -31,14 +31,23 @@ public: /// Stops the server. No new connections will be accepted. void stop() { impl->stop(); } + bool isStopping() const { return impl->isStopping(); } + /// Returns the number of currently handled connections. size_t currentConnections() const { return impl->currentConnections(); } /// Returns the number of current threads. size_t currentThreads() const { return impl->currentThreads(); } + /// Returns the port this server is listening to. + UInt16 portNumber() const { return impl->portNumber(); } + + const std::string & getListenHost() const { return listen_host; } + const std::string & getPortName() const { return port_name; } + const std::string & getDescription() const { return description; } + private: class Impl { @@ -46,13 +55,17 @@ private: virtual ~Impl() {} virtual void start() = 0; virtual void stop() = 0; + virtual bool isStopping() const = 0; + virtual UInt16 portNumber() const = 0; virtual size_t currentConnections() const = 0; virtual size_t currentThreads() const = 0; }; class TCPServerAdapterImpl; class GRPCServerAdapterImpl; + std::string listen_host; std::string port_name; + std::string description; std::unique_ptr impl; }; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index cdf1838c06b..6b4f77dd7d0 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -81,9 +82,10 @@ namespace ErrorCodes extern const int UNKNOWN_PROTOCOL; } -TCPHandler::TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_) +TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_) : Poco::Net::TCPServerConnection(socket_) , server(server_) + , tcp_server(tcp_server_) , parse_proxy_protocol(parse_proxy_protocol_) , log(&Poco::Logger::get("TCPHandler")) , server_display_name(std::move(server_display_name_)) @@ -172,13 +174,13 @@ void TCPHandler::runImpl() throw; } - while (true) + while (tcp_server.isOpen()) { /// We are waiting for a packet from the client. Thus, every `poll_interval` seconds check whether we need to shut down. { Stopwatch idle_time; UInt64 timeout_ms = std::min(poll_interval, idle_connection_timeout) * 1000000; - while (!server.isCancelled() && !static_cast(*in).poll(timeout_ms)) + while (tcp_server.isOpen() && !server.isCancelled() && !static_cast(*in).poll(timeout_ms)) { if (idle_time.elapsedSeconds() > idle_connection_timeout) { @@ -189,7 +191,7 @@ void TCPHandler::runImpl() } /// If we need to shut down, or client disconnects. - if (server.isCancelled() || in->eof()) + if (!tcp_server.isOpen() || server.isCancelled() || in->eof()) break; Stopwatch watch; @@ -233,8 +235,6 @@ void TCPHandler::runImpl() /// NOTE: these settings are applied only for current connection (not for distributed tables' connections) state.timeout_setter = std::make_unique(socket(), receive_timeout, send_timeout); - std::mutex fatal_error_mutex; - /// Should we send internal logs to client? const auto client_logs_level = query_context->getSettingsRef().send_logs_level; if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_LOGS @@ -243,7 +243,7 @@ void TCPHandler::runImpl() state.logs_queue = std::make_shared(); state.logs_queue->max_priority = Poco::Logger::parseLevel(client_logs_level.toString()); CurrentThread::attachInternalTextLogsQueue(state.logs_queue, client_logs_level); - CurrentThread::setFatalErrorCallback([this, &fatal_error_mutex] + CurrentThread::setFatalErrorCallback([this] { std::lock_guard lock(fatal_error_mutex); sendLogs(); @@ -351,7 +351,7 @@ void TCPHandler::runImpl() /// Should not check for cancel in case of input. if (!state.need_receive_data_for_input) { - auto callback = [this, &fatal_error_mutex]() + auto callback = [this]() { std::lock_guard lock(fatal_error_mutex); @@ -949,28 +949,27 @@ void TCPHandler::sendProfileEvents() ThreadIdToCountersSnapshot new_snapshots; ProfileEventsSnapshot group_snapshot; { - std::lock_guard guard(thread_group->mutex); - snapshots.reserve(thread_group->threads.size()); - for (auto * thread : thread_group->threads) + auto stats = thread_group->getProfileEventsCountersAndMemoryForThreads(); + snapshots.reserve(stats.size()); + + for (auto & stat : stats) { - auto const thread_id = thread->thread_id; + auto const thread_id = stat.thread_id; if (thread_id == current_thread_id) continue; auto current_time = time(nullptr); - auto counters = thread->performance_counters.getPartiallyAtomicSnapshot(); - auto memory_usage = thread->memory_tracker.get(); auto previous_snapshot = last_sent_snapshots.find(thread_id); auto increment = previous_snapshot != last_sent_snapshots.end() - ? CountersIncrement(counters, previous_snapshot->second) - : CountersIncrement(counters); + ? CountersIncrement(stat.counters, previous_snapshot->second) + : CountersIncrement(stat.counters); snapshots.push_back(ProfileEventsSnapshot{ thread_id, std::move(increment), - memory_usage, + stat.memory_usage, current_time }); - new_snapshots[thread_id] = std::move(counters); + new_snapshots[thread_id] = std::move(stat.counters); } group_snapshot.thread_id = 0; diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 4a340e328ed..4c4aeb0d913 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -35,6 +35,7 @@ class Session; struct Settings; class ColumnsDescription; struct ProfileInfo; +class TCPServer; /// State of query processing. struct QueryState @@ -127,7 +128,7 @@ public: * because it allows to check the IP ranges of the trusted proxy. * Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP. */ - TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_); + TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_); ~TCPHandler() override; void run() override; @@ -137,6 +138,7 @@ public: private: IServer & server; + TCPServer & tcp_server; bool parse_proxy_protocol = false; Poco::Logger * log; @@ -177,6 +179,7 @@ private: String cluster_secret; std::mutex task_callback_mutex; + std::mutex fatal_error_mutex; /// At the moment, only one ongoing query in the connection is supported at a time. QueryState state; diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h index e610bea330c..03b2592198d 100644 --- a/src/Server/TCPHandlerFactory.h +++ b/src/Server/TCPHandlerFactory.h @@ -1,17 +1,17 @@ #pragma once -#include #include #include #include #include +#include namespace Poco { class Logger; } namespace DB { -class TCPHandlerFactory : public Poco::Net::TCPServerConnectionFactory +class TCPHandlerFactory : public TCPServerConnectionFactory { private: IServer & server; @@ -38,13 +38,13 @@ public: server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override { try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TCPHandler(server, socket, parse_proxy_protocol, server_display_name); + return new TCPHandler(server, tcp_server, socket, parse_proxy_protocol, server_display_name); } catch (const Poco::Net::NetException &) { diff --git a/src/Server/TCPServer.cpp b/src/Server/TCPServer.cpp new file mode 100644 index 00000000000..380c4ef9924 --- /dev/null +++ b/src/Server/TCPServer.cpp @@ -0,0 +1,36 @@ +#include +#include + +namespace DB +{ + +class TCPServerConnectionFactoryImpl : public Poco::Net::TCPServerConnectionFactory +{ +public: + TCPServerConnectionFactoryImpl(TCPServer & tcp_server_, DB::TCPServerConnectionFactory::Ptr factory_) + : tcp_server(tcp_server_) + , factory(factory_) + {} + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override + { + return factory->createConnection(socket, tcp_server); + } +private: + TCPServer & tcp_server; + DB::TCPServerConnectionFactory::Ptr factory; +}; + +TCPServer::TCPServer( + TCPServerConnectionFactory::Ptr factory_, + Poco::ThreadPool & thread_pool, + Poco::Net::ServerSocket & socket_, + Poco::Net::TCPServerParams::Ptr params) + : Poco::Net::TCPServer(new TCPServerConnectionFactoryImpl(*this, factory_), thread_pool, socket_, params) + , factory(factory_) + , socket(socket_) + , is_open(true) + , port_number(socket.address().port()) +{} + +} diff --git a/src/Server/TCPServer.h b/src/Server/TCPServer.h new file mode 100644 index 00000000000..219fed5342b --- /dev/null +++ b/src/Server/TCPServer.h @@ -0,0 +1,47 @@ +#pragma once + +#include + +#include +#include + + +namespace DB +{ +class Context; + +class TCPServer : public Poco::Net::TCPServer +{ +public: + explicit TCPServer( + TCPServerConnectionFactory::Ptr factory, + Poco::ThreadPool & thread_pool, + Poco::Net::ServerSocket & socket, + Poco::Net::TCPServerParams::Ptr params = new Poco::Net::TCPServerParams); + + /// Close the socket and ask existing connections to stop serving queries + void stop() + { + Poco::Net::TCPServer::stop(); + // This notifies already established connections that they should stop serving + // queries and close their socket as soon as they can. + is_open = false; + // Poco's stop() stops listening on the socket but leaves it open. + // To be able to hand over control of the listening port to a new server, and + // to get fast connection refusal instead of timeouts, we also need to close + // the listening socket. + socket.close(); + } + + bool isOpen() const { return is_open; } + + UInt16 portNumber() const { return port_number; } + +private: + TCPServerConnectionFactory::Ptr factory; + Poco::Net::ServerSocket socket; + std::atomic is_open; + UInt16 port_number; +}; + +} diff --git a/src/Server/TCPServerConnectionFactory.h b/src/Server/TCPServerConnectionFactory.h new file mode 100644 index 00000000000..613f98352bd --- /dev/null +++ b/src/Server/TCPServerConnectionFactory.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace Poco +{ +namespace Net +{ + class StreamSocket; + class TCPServerConnection; +} +} +namespace DB +{ +class TCPServer; + +class TCPServerConnectionFactory +{ +public: + using Ptr = Poco::SharedPtr; + + virtual ~TCPServerConnectionFactory() = default; + + /// Same as Poco::Net::TCPServerConnectionFactory except we can pass the TCPServer + virtual Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) = 0; +}; +} diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index 1dcad049f49..50aa0be4778 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include namespace DB diff --git a/src/Server/grpc_protos/clickhouse_grpc.proto b/src/Server/grpc_protos/clickhouse_grpc.proto index c6cafaf6e40..c86c74535c5 100644 --- a/src/Server/grpc_protos/clickhouse_grpc.proto +++ b/src/Server/grpc_protos/clickhouse_grpc.proto @@ -37,6 +37,10 @@ message ExternalTable { // Format of the data to insert to the external table. string format = 4; + // Compression type used to compress `data`. + // Supported values: none, gzip(gz), deflate, brotli(br), lzma(xz), zstd(zst), lz4, bz2. + string compression_type = 6; + // Settings for executing that insertion, applied after QueryInfo.settings. map settings = 5; } @@ -101,6 +105,25 @@ message QueryInfo { /// Controls how a ClickHouse server will compress query execution results before sending back to the client. /// If not set the compression settings from the configuration file will be used. Compression result_compression = 17; + + // Compression type for `input_data`, `output_data`, `totals` and `extremes`. + // Supported compression types: none, gzip(gz), deflate, brotli(br), lzma(xz), zstd(zst), lz4, bz2. + // When used for `input_data` the client is responsible to compress data before putting it into `input_data`. + // When used for `output_data` or `totals` or `extremes` the client receives compressed data and should decompress it by itself. + // In the latter case consider to specify also `compression_level`. + string compression_type = 18; + + // Compression level. + // WARNING: If it's not specified the compression level is set to zero by default which might be not the best choice for some compression types (see below). + // The compression level should be in the following range (the higher the number, the better the compression): + // none: compression level isn't used + // gzip: 0..9; 0 means no compression, 6 is recommended by default (compression level -1 also means 6) + // brotli: 0..11 + // lzma: 0..9; 6 is recommended by default + // zstd: 1..22; 3 is recommended by default (compression level 0 also means 3) + // lz4: 0..16; values < 0 mean fast acceleration + // bz2: 1..9 + int32 compression_level = 19; } enum LogsLevel { diff --git a/src/Storages/ExecutableSettings.h b/src/Storages/ExecutableSettings.h index 9c0cfc05fa5..c6c1f0b9eb2 100644 --- a/src/Storages/ExecutableSettings.h +++ b/src/Storages/ExecutableSettings.h @@ -9,16 +9,23 @@ namespace DB class ASTStorage; #define LIST_OF_EXECUTABLE_SETTINGS(M) \ - M(UInt64, send_chunk_header, false, "Send number_of_rows\n before sending chunk to process", 0) \ - M(UInt64, pool_size, 16, "Processes pool size. If size == 0, then there is no size restrictions", 0) \ + M(Bool, send_chunk_header, false, "Send number_of_rows\n before sending chunk to process.", 0) \ + M(UInt64, pool_size, 16, "Processes pool size. If size == 0, then there is no size restrictions.", 0) \ M(UInt64, max_command_execution_time, 10, "Max command execution time in seconds.", 0) \ M(UInt64, command_termination_timeout, 10, "Command termination timeout in seconds.", 0) \ + M(UInt64, command_read_timeout, 10000, "Timeout for reading data from command stdout in milliseconds.", 0) \ + M(UInt64, command_write_timeout, 10000, "Timeout for writing data to command stdin in milliseconds.", 0) DECLARE_SETTINGS_TRAITS(ExecutableSettingsTraits, LIST_OF_EXECUTABLE_SETTINGS) /// Settings for ExecutablePool engine. struct ExecutableSettings : public BaseSettings { + std::string script_name; + std::vector script_arguments; + + bool is_executable_pool = false; + void loadFromQuery(ASTStorage & storage_def); }; diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp index ade89ea7228..42b3b148551 100644 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ b/src/Storages/ExternalDataSourceConfiguration.cpp @@ -54,6 +54,7 @@ void ExternalDataSourceConfiguration::set(const ExternalDataSourceConfiguration database = conf.database; table = conf.table; schema = conf.schema; + addresses = conf.addresses; addresses_expr = conf.addresses_expr; } diff --git a/src/Storages/FileLog/DirectoryWatcherBase.cpp b/src/Storages/FileLog/DirectoryWatcherBase.cpp index f2737219fd8..005e1e5fd1b 100644 --- a/src/Storages/FileLog/DirectoryWatcherBase.cpp +++ b/src/Storages/FileLog/DirectoryWatcherBase.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index 68d5f60a2aa..8aceed05b72 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes } const String HDFSBuilderWrapper::CONFIG_PREFIX = "hdfs"; -const String HDFS_URL_REGEXP = "^hdfs://[^:/]*:[0-9]*/.*"; +const String HDFS_URL_REGEXP = "^hdfs://[^/]*/.*"; void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const String & config_path, bool isUser) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 70aa3d28174..2105228abf6 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -54,6 +53,9 @@ namespace ErrorCodes extern const int ACCESS_DENIED; } +static Strings listFilesWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match); + + StorageHDFS::StorageHDFS( const String & uri_, const StorageID & table_id_, @@ -62,13 +64,15 @@ StorageHDFS::StorageHDFS( const ConstraintsDescription & constraints_, const String & comment, ContextPtr context_, - const String & compression_method_ = "", + const String & compression_method_, + const bool distributed_processing_, ASTPtr partition_by_) : IStorage(table_id_) , WithContext(context_) , uri(uri_) , format_name(format_name_) , compression_method(compression_method_) + , distributed_processing(distributed_processing_) , partition_by(partition_by_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); @@ -81,154 +85,182 @@ StorageHDFS::StorageHDFS( setInMemoryMetadata(storage_metadata); } -using StorageHDFSPtr = std::shared_ptr; - -class HDFSSource : public SourceWithProgress, WithContext +class HDFSSource::DisclosedGlobIterator::Impl { public: - struct SourcesInfo + Impl(ContextPtr context_, const String & uri) { - std::vector uris; - std::atomic next_uri_to_read = 0; + const size_t begin_of_path = uri.find('/', uri.find("//") + 2); + const String path_from_uri = uri.substr(begin_of_path); + const String uri_without_path = uri.substr(0, begin_of_path); /// ends without '/' - bool need_path_column = false; - bool need_file_column = false; - }; + HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context_->getGlobalContext()->getConfigRef()); + HDFSFSPtr fs = createHDFSFS(builder.get()); - using SourcesInfoPtr = std::shared_ptr; - - static Block getHeader(const StorageMetadataPtr & metadata_snapshot, bool need_path_column, bool need_file_column) - { - auto header = metadata_snapshot->getSampleBlock(); - - /// Note: AddingDefaultsBlockInputStream doesn't change header. - - if (need_path_column) - header.insert({DataTypeString().createColumn(), std::make_shared(), "_path"}); - if (need_file_column) - header.insert({DataTypeString().createColumn(), std::make_shared(), "_file"}); - - return header; + uris = listFilesWithRegexpMatching("/", fs, path_from_uri); + for (auto & elem : uris) + elem = uri_without_path + elem; + uris_iter = uris.begin(); } - static Block getBlockForSource( - const StorageHDFSPtr & storage, - const StorageMetadataPtr & metadata_snapshot, - const ColumnsDescription & columns_description, - const SourcesInfoPtr & files_info) + String next() + { + std::lock_guard lock(mutex); + if (uris_iter != uris.end()) + { + auto answer = *uris_iter; + ++uris_iter; + return answer; + } + return {}; + } +private: + std::mutex mutex; + Strings uris; + Strings::iterator uris_iter; +}; + +Block HDFSSource::getHeader(const StorageMetadataPtr & metadata_snapshot, bool need_path_column, bool need_file_column) +{ + auto header = metadata_snapshot->getSampleBlock(); + /// Note: AddingDefaultsBlockInputStream doesn't change header. + if (need_path_column) + header.insert({DataTypeString().createColumn(), std::make_shared(), "_path"}); + if (need_file_column) + header.insert({DataTypeString().createColumn(), std::make_shared(), "_file"}); + return header; +} + +Block HDFSSource::getBlockForSource( + const StorageHDFSPtr & storage, + const StorageMetadataPtr & metadata_snapshot, + const ColumnsDescription & columns_description, + bool need_path_column, + bool need_file_column) +{ + if (storage->isColumnOriented()) + return metadata_snapshot->getSampleBlockForColumns( + columns_description.getNamesOfPhysical(), storage->getVirtuals(), storage->getStorageID()); + else + return getHeader(metadata_snapshot, need_path_column, need_file_column); +} + +HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(ContextPtr context_, const String & uri) + : pimpl(std::make_shared(context_, uri)) {} + +String HDFSSource::DisclosedGlobIterator::next() +{ + return pimpl->next(); +} + + +HDFSSource::HDFSSource( + StorageHDFSPtr storage_, + const StorageMetadataPtr & metadata_snapshot_, + ContextPtr context_, + UInt64 max_block_size_, + bool need_path_column_, + bool need_file_column_, + std::shared_ptr file_iterator_, + ColumnsDescription columns_description_) + : SourceWithProgress(getBlockForSource(storage_, metadata_snapshot_, columns_description_, need_path_column_, need_file_column_)) + , WithContext(context_) + , storage(std::move(storage_)) + , metadata_snapshot(metadata_snapshot_) + , max_block_size(max_block_size_) + , need_path_column(need_path_column_) + , need_file_column(need_file_column_) + , file_iterator(file_iterator_) + , columns_description(std::move(columns_description_)) +{ + initialize(); +} + +void HDFSSource::onCancel() +{ + if (reader) + reader->cancel(); +} + +bool HDFSSource::initialize() +{ + current_path = (*file_iterator)(); + if (current_path.empty()) + return false; + const size_t begin_of_path = current_path.find('/', current_path.find("//") + 2); + const String path_from_uri = current_path.substr(begin_of_path); + const String uri_without_path = current_path.substr(0, begin_of_path); + + auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); + read_buf = wrapReadBufferWithCompressionMethod(std::make_unique(uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef()), compression); + + auto get_block_for_format = [&]() -> Block { if (storage->isColumnOriented()) - return metadata_snapshot->getSampleBlockForColumns( - columns_description.getNamesOfPhysical(), storage->getVirtuals(), storage->getStorageID()); - else - return getHeader(metadata_snapshot, files_info->need_path_column, files_info->need_file_column); - } + return metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); + return metadata_snapshot->getSampleBlock(); + }; - HDFSSource( - StorageHDFSPtr storage_, - const StorageMetadataPtr & metadata_snapshot_, - ContextPtr context_, - UInt64 max_block_size_, - SourcesInfoPtr source_info_, - String uri_without_path_, - ColumnsDescription columns_description_) - : SourceWithProgress(getBlockForSource(storage_, metadata_snapshot_, columns_description_, source_info_)) - , WithContext(context_) - , storage(std::move(storage_)) - , metadata_snapshot(metadata_snapshot_) - , source_info(std::move(source_info_)) - , uri_without_path(std::move(uri_without_path_)) - , max_block_size(max_block_size_) - , columns_description(std::move(columns_description_)) - { - } + auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, get_block_for_format(), max_block_size); - String getName() const override + QueryPipelineBuilder builder; + builder.init(Pipe(input_format)); + if (columns_description.hasDefaults()) { - return "HDFS"; - } - - Chunk generate() override - { - while (true) + builder.addSimpleTransform([&](const Block & header) { - if (!reader) - { - auto pos = source_info->next_uri_to_read.fetch_add(1); - if (pos >= source_info->uris.size()) - return {}; + return std::make_shared(header, columns_description, *input_format, getContext()); + }); + } + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + reader = std::make_unique(*pipeline); + return true; +} - auto path = source_info->uris[pos]; - current_path = uri_without_path + path; +String HDFSSource::getName() const +{ + return "HDFSSource"; +} - auto compression = chooseCompressionMethod(path, storage->compression_method); - read_buf = wrapReadBufferWithCompressionMethod(std::make_unique(uri_without_path, path, getContext()->getGlobalContext()->getConfigRef()), compression); +Chunk HDFSSource::generate() +{ + if (!reader) + return {}; - auto get_block_for_format = [&]() -> Block - { - if (storage->isColumnOriented()) - return metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); - return metadata_snapshot->getSampleBlock(); - }; - auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, get_block_for_format(), max_block_size); + Chunk chunk; + if (reader->pull(chunk)) + { + Columns columns = chunk.getColumns(); + UInt64 num_rows = chunk.getNumRows(); - QueryPipelineBuilder builder; - builder.init(Pipe(input_format)); - if (columns_description.hasDefaults()) - { - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, columns_description, *input_format, getContext()); - }); - } - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - reader = std::make_unique(*pipeline); - } - - Block res; - if (reader->pull(res)) - { - Columns columns = res.getColumns(); - UInt64 num_rows = res.rows(); - - /// Enrich with virtual columns. - if (source_info->need_path_column) - { - auto column = DataTypeString().createColumnConst(num_rows, current_path); - columns.push_back(column->convertToFullColumnIfConst()); - } - - if (source_info->need_file_column) - { - size_t last_slash_pos = current_path.find_last_of('/'); - auto file_name = current_path.substr(last_slash_pos + 1); - - auto column = DataTypeString().createColumnConst(num_rows, std::move(file_name)); - columns.push_back(column->convertToFullColumnIfConst()); - } - - return Chunk(std::move(columns), num_rows); - } - - reader.reset(); - pipeline.reset(); - read_buf.reset(); + /// Enrich with virtual columns. + if (need_path_column) + { + auto column = DataTypeString().createColumnConst(num_rows, current_path); + columns.push_back(column->convertToFullColumnIfConst()); } + + if (need_file_column) + { + size_t last_slash_pos = current_path.find_last_of('/'); + auto file_name = current_path.substr(last_slash_pos + 1); + + auto column = DataTypeString().createColumnConst(num_rows, std::move(file_name)); + columns.push_back(column->convertToFullColumnIfConst()); + } + + return Chunk(std::move(columns), num_rows); } -private: - StorageHDFSPtr storage; - StorageMetadataPtr metadata_snapshot; - SourcesInfoPtr source_info; - String uri_without_path; - UInt64 max_block_size; - ColumnsDescription columns_description; + reader.reset(); + pipeline.reset(); + read_buf.reset(); + + if (!initialize()) + return {}; + return generate(); +} - std::unique_ptr read_buf; - std::unique_ptr pipeline; - std::unique_ptr reader; - String current_path; -}; class HDFSSink : public SinkToStorage { @@ -300,7 +332,6 @@ public: private: const String uri; - const String format; const Block sample_block; ContextPtr context; @@ -311,7 +342,7 @@ private: /* Recursive directory listing with matched paths as a result. * Have the same method in StorageFile. */ -Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match) +Strings listFilesWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match) { const size_t first_glob = for_match.find_first_of("*?{"); @@ -344,7 +375,7 @@ Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, c { if (re2::RE2::FullMatch(file_name, matcher)) { - Strings result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash)); + Strings result_part = listFilesWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash)); /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check. std::move(result_part.begin(), result_part.end(), std::back_inserter(result)); } @@ -367,29 +398,33 @@ Pipe StorageHDFS::read( size_t max_block_size, unsigned num_streams) { - const size_t begin_of_path = uri.find('/', uri.find("//") + 2); - const String path_from_uri = uri.substr(begin_of_path); - const String uri_without_path = uri.substr(0, begin_of_path); - - HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context_->getGlobalContext()->getConfigRef()); - HDFSFSPtr fs = createHDFSFS(builder.get()); - - auto sources_info = std::make_shared(); - sources_info->uris = LSWithRegexpMatching("/", fs, path_from_uri); - - if (sources_info->uris.empty()) - LOG_WARNING(log, "No file in HDFS matches the path: {}", uri); - + bool need_path_column = false; + bool need_file_column = false; for (const auto & column : column_names) { if (column == "_path") - sources_info->need_path_column = true; + need_path_column = true; if (column == "_file") - sources_info->need_file_column = true; + need_file_column = true; } - if (num_streams > sources_info->uris.size()) - num_streams = sources_info->uris.size(); + std::shared_ptr iterator_wrapper{nullptr}; + if (distributed_processing) + { + iterator_wrapper = std::make_shared( + [callback = context_->getReadTaskCallback()]() -> String { + return callback(); + }); + } + else + { + /// Iterate through disclosed globs and make a source for each file + auto glob_iterator = std::make_shared(context_, uri); + iterator_wrapper = std::make_shared([glob_iterator]() + { + return glob_iterator->next(); + }); + } Pipes pipes; auto this_ptr = std::static_pointer_cast(shared_from_this()); @@ -409,8 +444,9 @@ Pipe StorageHDFS::read( metadata_snapshot, context_, max_block_size, - sources_info, - uri_without_path, + need_path_column, + need_file_column, + iterator_wrapper, get_columns_for_format())); } return Pipe::unitePipes(std::move(pipes)); @@ -443,13 +479,13 @@ SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataP } } -void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr context_, TableExclusiveLockHolder &) +void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) { const size_t begin_of_path = uri.find('/', uri.find("//") + 2); const String path = uri.substr(begin_of_path); const String url = uri.substr(0, begin_of_path); - HDFSBuilderWrapper builder = createHDFSBuilder(url + "/", context_->getGlobalContext()->getConfigRef()); + HDFSBuilderWrapper builder = createHDFSBuilder(url + "/", local_context->getGlobalContext()->getConfigRef()); HDFSFSPtr fs = createHDFSFS(builder.get()); int ret = hdfsDelete(fs.get(), path.data(), 0); @@ -488,7 +524,7 @@ void registerStorageHDFS(StorageFactory & factory) partition_by = args.storage_def->partition_by->clone(); return StorageHDFS::create( - url, args.table_id, format_name, args.columns, args.constraints, args.comment, args.getContext(), compression_method, partition_by); + url, args.table_id, format_name, args.columns, args.constraints, args.comment, args.getContext(), compression_method, false, partition_by); }, { .supports_sort_order = true, // for partition by diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index db6b078265d..3e2f7a43127 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -4,6 +4,7 @@ #if USE_HDFS +#include #include #include #include @@ -32,7 +33,11 @@ public: SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; - void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, TableExclusiveLockHolder &) override; + void truncate( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + ContextPtr local_context, + TableExclusiveLockHolder &) override; NamesAndTypesList getVirtuals() const override; @@ -54,17 +59,84 @@ protected: const ConstraintsDescription & constraints_, const String & comment, ContextPtr context_, - const String & compression_method_, + const String & compression_method_ = "", + bool distributed_processing_ = false, ASTPtr partition_by = nullptr); private: const String uri; String format_name; String compression_method; + const bool distributed_processing; ASTPtr partition_by; Poco::Logger * log = &Poco::Logger::get("StorageHDFS"); }; + +class PullingPipelineExecutor; + +class HDFSSource : public SourceWithProgress, WithContext +{ +public: + class DisclosedGlobIterator + { + public: + DisclosedGlobIterator(ContextPtr context_, const String & uri_); + String next(); + private: + class Impl; + /// shared_ptr to have copy constructor + std::shared_ptr pimpl; + }; + + using IteratorWrapper = std::function; + using StorageHDFSPtr = std::shared_ptr; + + static Block getHeader( + const StorageMetadataPtr & metadata_snapshot, + bool need_path_column, + bool need_file_column); + + static Block getBlockForSource( + const StorageHDFSPtr & storage, + const StorageMetadataPtr & metadata_snapshot, + const ColumnsDescription & columns_description, + bool need_path_column, + bool need_file_column); + + HDFSSource( + StorageHDFSPtr storage_, + const StorageMetadataPtr & metadata_snapshot_, + ContextPtr context_, + UInt64 max_block_size_, + bool need_path_column_, + bool need_file_column_, + std::shared_ptr file_iterator_, + ColumnsDescription columns_description_); + + String getName() const override; + + Chunk generate() override; + + void onCancel() override; + +private: + StorageHDFSPtr storage; + StorageMetadataPtr metadata_snapshot; + UInt64 max_block_size; + bool need_path_column; + bool need_file_column; + std::shared_ptr file_iterator; + ColumnsDescription columns_description; + + std::unique_ptr read_buf; + std::unique_ptr pipeline; + std::unique_ptr reader; + String current_path; + + /// Recreate ReadBuffer and PullingPipelineExecutor for each file. + bool initialize(); +}; } #endif diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp new file mode 100644 index 00000000000..ba1cc045fbf --- /dev/null +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -0,0 +1,149 @@ +#include + +#if USE_HDFS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ +StorageHDFSCluster::StorageHDFSCluster( + String cluster_name_, + const String & uri_, + const StorageID & table_id_, + const String & format_name_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & compression_method_) + : IStorage(table_id_) + , cluster_name(cluster_name_) + , uri(uri_) + , format_name(format_name_) + , compression_method(compression_method_) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); +} + +/// The code executes on initiator +Pipe StorageHDFSCluster::read( + const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t /*max_block_size*/, + unsigned /*num_streams*/) +{ + auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettings()); + + auto iterator = std::make_shared(context, uri); + auto callback = std::make_shared([iterator]() mutable -> String + { + return iterator->next(); + }); + + /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) + Block header = + InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + + const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{}; + + Pipes pipes; + + const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; + + for (const auto & replicas : cluster->getShardsAddresses()) + { + /// There will be only one replica, because we consider each replica as a shard + for (const auto & node : replicas) + { + auto connection = std::make_shared( + node.host_name, node.port, context->getGlobalContext()->getCurrentDatabase(), + node.user, node.password, node.cluster, node.cluster_secret, + "HDFSClusterInititiator", + node.compression, + node.secure + ); + + + /// For unknown reason global context is passed to IStorage::read() method + /// So, task_identifier is passed as constructor argument. It is more obvious. + auto remote_query_executor = std::make_shared( + connection, + queryToString(query_info.query), + header, + context, + /*throttler=*/nullptr, + scalars, + Tables(), + processed_stage, + RemoteQueryExecutor::Extension{.task_iterator = callback}); + + pipes.emplace_back(std::make_shared(remote_query_executor, add_agg_info, false)); + } + } + + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); + return Pipe::unitePipes(std::move(pipes)); +} + +QueryProcessingStage::Enum StorageHDFSCluster::getQueryProcessingStage( + ContextPtr context, QueryProcessingStage::Enum to_stage, const StorageMetadataPtr &, SelectQueryInfo &) const +{ + /// Initiator executes query on remote node. + if (context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) + if (to_stage >= QueryProcessingStage::Enum::WithMergeableState) + return QueryProcessingStage::Enum::WithMergeableState; + + /// Follower just reads the data. + return QueryProcessingStage::Enum::FetchColumns; +} + + +NamesAndTypesList StorageHDFSCluster::getVirtuals() const +{ + return NamesAndTypesList{ + {"_path", std::make_shared()}, + {"_file", std::make_shared()} + }; +} + + +} + +#endif diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h new file mode 100644 index 00000000000..0e568a9faf8 --- /dev/null +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -0,0 +1,55 @@ +#pragma once + +#include + +#if USE_HDFS + +#include +#include + +#include + +#include +#include +#include + +namespace DB +{ + +class Context; + +class StorageHDFSCluster : public shared_ptr_helper, public IStorage +{ + friend struct shared_ptr_helper; +public: + std::string getName() const override { return "HDFSCluster"; } + + Pipe read(const Names &, const StorageMetadataPtr &, SelectQueryInfo &, + ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, unsigned /*num_streams*/) override; + + QueryProcessingStage::Enum + getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageMetadataPtr &, SelectQueryInfo &) const override; + + NamesAndTypesList getVirtuals() const override; + +protected: + StorageHDFSCluster( + String cluster_name_, + const String & uri_, + const StorageID & table_id_, + const String & format_name_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const String & compression_method_); + +private: + String cluster_name; + String uri; + String format_name; + String compression_method; +}; + + +} + +#endif diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 021335fea1f..a923258b111 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -139,7 +139,6 @@ void IStorage::alter(const AlterCommands & params, ContextPtr context, AlterLock setInMemoryMetadata(new_metadata); } - void IStorage::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /* context */) const { for (const auto & command : commands) diff --git a/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp b/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp index 7b736e95d25..748ea02ac6d 100644 --- a/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp +++ b/src/Storages/Kafka/WriteBufferToKafkaProducer.cpp @@ -103,7 +103,7 @@ void WriteBufferToKafkaProducer::countRow(const Columns & columns, size_t curren producer->poll(timeout); continue; } - throw e; + throw; } break; @@ -126,7 +126,7 @@ void WriteBufferToKafkaProducer::flush() { if (e.get_error() == RD_KAFKA_RESP_ERR__TIMED_OUT) continue; - throw e; + throw; } break; diff --git a/src/Storages/MergeTree/ActiveDataPartSet.cpp b/src/Storages/MergeTree/ActiveDataPartSet.cpp index 0f6cd8050ca..b21910158ad 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -49,7 +49,7 @@ bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) if (out_replaced_parts) out_replaced_parts->push_back(it->second); - part_info_to_name.erase(it++); + it = part_info_to_name.erase(it); } if (out_replaced_parts) @@ -61,7 +61,7 @@ bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) assert(part_info != it->first); if (out_replaced_parts) out_replaced_parts->push_back(it->second); - part_info_to_name.erase(it++); + it = part_info_to_name.erase(it); } if (it != part_info_to_name.end() && !part_info.isDisjoint(it->first)) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index e952deb5a87..2855e21356d 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -591,6 +591,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( block.getNamesAndTypesList(), {}, CompressionCodecFactory::instance().get("NONE", {})); + part_out.write(block); part_out.writeSuffixAndFinalizePart(new_projection_part); new_projection_part->checksums.checkEqual(checksums, /* have_uncompressed = */ true); @@ -612,7 +613,9 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( new_data_part->partition.create(metadata_snapshot, block, 0, context); MergedBlockOutputStream part_out( - new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {}, CompressionCodecFactory::instance().get("NONE", {})); + new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {}, + CompressionCodecFactory::instance().get("NONE", {})); + part_out.write(block); part_out.writeSuffixAndFinalizePart(new_data_part); new_data_part->checksums.checkEqual(checksums, /* have_uncompressed = */ true); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 9da12a2dca2..83328594363 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -270,7 +270,7 @@ static void decrementTypeMetric(MergeTreeDataPartType type) IMergeTreeDataPart::IMergeTreeDataPart( - MergeTreeData & storage_, + const MergeTreeData & storage_, const String & name_, const VolumePtr & volume_, const std::optional & relative_path_, @@ -407,21 +407,40 @@ std::pair IMergeTreeDataPart::getMinMaxTime() const } -void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns) +void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns, const SerializationInfoByName & new_infos) { columns = new_columns; + column_name_to_position.clear(); column_name_to_position.reserve(new_columns.size()); size_t pos = 0; + for (const auto & column : columns) { - column_name_to_position.emplace(column.name, pos); - for (const auto & subcolumn : column.type->getSubcolumnNames()) - column_name_to_position.emplace(Nested::concatenateName(column.name, subcolumn), pos); - ++pos; + column_name_to_position.emplace(column.name, pos++); + + auto it = new_infos.find(column.name); + if (it != new_infos.end()) + { + auto & old_info = serialization_infos[column.name]; + const auto & new_info = it->second; + + if (old_info) + old_info->replaceData(*new_info); + else + old_info = new_info->clone(); + } } } +SerializationPtr IMergeTreeDataPart::getSerialization(const NameAndTypePair & column) const +{ + auto it = serialization_infos.find(column.getNameInStorage()); + return it == serialization_infos.end() + ? IDataType::getSerialization(column) + : IDataType::getSerialization(column, *it->second); +} + void IMergeTreeDataPart::removeIfNeeded() { if (!is_temp && state != State::DeleteOnDestroy) @@ -608,8 +627,8 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks if (check_consistency) checkConsistency(require_columns_checksums); - loadDefaultCompressionCodec(); + loadDefaultCompressionCodec(); } void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency) @@ -660,13 +679,13 @@ void IMergeTreeDataPart::loadIndex() size_t marks_count = index_granularity.getMarksCount(); - Serializations serializations(key_size); + Serializations key_serializations(key_size); for (size_t j = 0; j < key_size; ++j) - serializations[j] = primary_key.data_types[j]->getDefaultSerialization(); + key_serializations[j] = primary_key.data_types[j]->getDefaultSerialization(); for (size_t i = 0; i < marks_count; ++i) //-V756 for (size_t j = 0; j < key_size; ++j) - serializations[j]->deserializeBinary(*loaded_index[j], *index_file); + key_serializations[j]->deserializeBinary(*loaded_index[j], *index_file); for (size_t i = 0; i < key_size; ++i) { @@ -757,14 +776,8 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const auto column_size = getColumnSize(part_column.name); if (column_size.data_compressed != 0 && !storage_columns.hasCompressionCodec(part_column.name)) { - auto serialization = IDataType::getSerialization(part_column, - [&](const String & stream_name) - { - return volume->getDisk()->exists(stream_name + IMergeTreeDataPart::DATA_FILE_EXTENSION); - }); - String path_to_data_file; - serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + getSerialization(part_column)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { if (path_to_data_file.empty()) { @@ -872,6 +885,14 @@ void IMergeTreeDataPart::loadChecksums(bool require) void IMergeTreeDataPart::loadRowsCount() { String path = fs::path(getFullRelativePath()) / "count.txt"; + + auto read_rows_count = [&]() + { + auto buf = openForReading(volume->getDisk(), path); + readIntText(rows_count, *buf); + assertEOF(*buf); + }; + if (index_granularity.empty()) { rows_count = 0; @@ -881,16 +902,16 @@ void IMergeTreeDataPart::loadRowsCount() if (!volume->getDisk()->exists(path)) throw Exception("No count.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); - auto buf = openForReading(volume->getDisk(), path); - readIntText(rows_count, *buf); - assertEOF(*buf); + read_rows_count(); #ifndef NDEBUG /// columns have to be loaded for (const auto & column : getColumns()) { /// Most trivial types - if (column.type->isValueRepresentedByNumber() && !column.type->haveSubtypes()) + if (column.type->isValueRepresentedByNumber() + && !column.type->haveSubtypes() + && getSerialization(column)->getKind() == ISerialization::Kind::DEFAULT) { auto size = getColumnSize(column.name); @@ -934,9 +955,15 @@ void IMergeTreeDataPart::loadRowsCount() } else { + if (volume->getDisk()->exists(path)) + { + read_rows_count(); + return; + } + for (const NameAndTypePair & column : columns) { - ColumnPtr column_col = column.type->createColumn(); + ColumnPtr column_col = column.type->createColumn(*getSerialization(column)); if (!column_col->isFixedAndContiguous() || column_col->lowCardinality()) continue; @@ -1051,7 +1078,18 @@ void IMergeTreeDataPart::loadColumns(bool require) } } - setColumns(loaded_columns); + SerializationInfo::Settings settings = + { + .ratio_of_defaults_for_sparse = storage.getSettings()->ratio_of_defaults_for_sparse_serialization, + .choose_kind = false, + }; + + SerializationInfoByName infos(loaded_columns, settings); + path = getFullRelativePath() + SERIALIZATION_FILE_NAME; + if (volume->getDisk()->exists(path)) + infos.readJSON(*volume->getDisk()->readFile(path)); + + setColumns(loaded_columns, infos); } bool IMergeTreeDataPart::shallParticipateInMerges(const StoragePolicyPtr & storage_policy) const @@ -1319,7 +1357,7 @@ String IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool else if (parent_part) full_relative_path /= parent_part->relative_path; - for (int try_no = 0; try_no < 10; try_no++) + for (int try_no = 0; try_no < 10; ++try_no) { res = (prefix.empty() ? "" : prefix + "_") + name + (try_no ? "_try" + DB::toString(try_no) : ""); @@ -1566,15 +1604,6 @@ bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metada return true; } -SerializationPtr IMergeTreeDataPart::getSerializationForColumn(const NameAndTypePair & column) const -{ - return IDataType::getSerialization(column, - [&](const String & stream_name) - { - return checksums.files.count(stream_name + DATA_FILE_EXTENSION) != 0; - }); -} - String IMergeTreeDataPart::getUniqueId() const { auto disk = volume->getDisk(); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index a203d45aa25..ab08ca1c33a 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -70,7 +71,7 @@ public: const IMergeTreeDataPart * parent_part_); IMergeTreeDataPart( - MergeTreeData & storage_, + const MergeTreeData & storage_, const String & name_, const VolumePtr & volume, const std::optional & relative_path, @@ -127,9 +128,12 @@ public: String getTypeName() const { return getType().toString(); } - void setColumns(const NamesAndTypesList & new_columns); + void setColumns(const NamesAndTypesList & new_columns, const SerializationInfoByName & new_infos = {}); const NamesAndTypesList & getColumns() const { return columns; } + const SerializationInfoByName & getSerializationInfos() const { return serialization_infos; } + SerializationInfoByName & getSerializationInfos() { return serialization_infos; } + SerializationPtr getSerialization(const NameAndTypePair & column) const; /// Throws an exception if part is not stored in on-disk format. void assertOnDisk() const; @@ -192,7 +196,6 @@ public: size_t rows_count = 0; - time_t modification_time = 0; /// When the part is removed from the working set. Changes once. mutable std::atomic remove_time { std::numeric_limits::max() }; @@ -391,14 +394,16 @@ public: static inline constexpr auto UUID_FILE_NAME = "uuid.txt"; + /// File that contains information about kinds of serialization of columns + /// and information that helps to choose kind of serialization later during merging + /// (number of rows, number of rows with default values, etc). + static inline constexpr auto SERIALIZATION_FILE_NAME = "serialization.json"; + /// Checks that all TTLs (table min/max, column ttls, so on) for part /// calculated. Part without calculated TTL may exist if TTL was added after /// part creation (using alter query with materialize_ttl setting). bool checkAllTTLCalculated(const StorageMetadataPtr & metadata_snapshot) const; - /// Returns serialization for column according to files in which column is written in part. - SerializationPtr getSerializationForColumn(const NameAndTypePair & column) const; - /// Return some uniq string for file /// Required for distinguish different copies of the same part on S3 String getUniqueId() const; @@ -421,6 +426,7 @@ protected: /// Columns description. Cannot be changed, after part initialization. NamesAndTypesList columns; + const Type part_type; /// Not null when it's a projection part. @@ -445,6 +451,9 @@ private: /// In compact parts order of columns is necessary NameToNumber column_name_to_position; + /// Map from name of column to its serialization info. + SerializationInfoByName serialization_infos; + /// Reads part unique identifier (if exists) from uuid.txt void loadUUID(); diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 75eb01ed73c..79186402027 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -297,7 +297,7 @@ IMergeTreeReader::ColumnPosition IMergeTreeReader::findColumnForOffsets(const St { if (typeid_cast(part_column.type.get())) { - auto position = data_part->getColumnPosition(part_column.name); + auto position = data_part->getColumnPosition(part_column.getNameInStorage()); if (position && Nested::extractTableName(part_column.name) == table_name) return position; } diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index 48fd9e583bf..5393d71ff86 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -4,19 +4,34 @@ namespace DB { + IMergedBlockOutputStream::IMergedBlockOutputStream( const MergeTreeDataPartPtr & data_part, - const StorageMetadataPtr & metadata_snapshot_) + const StorageMetadataPtr & metadata_snapshot_, + const NamesAndTypesList & columns_list, + bool reset_columns_) : storage(data_part->storage) , metadata_snapshot(metadata_snapshot_) , volume(data_part->volume) , part_path(data_part->isStoredOnDisk() ? data_part->getFullRelativePath() : "") + , reset_columns(reset_columns_) { + if (reset_columns) + { + SerializationInfo::Settings info_settings = + { + .ratio_of_defaults_for_sparse = storage.getSettings()->ratio_of_defaults_for_sparse_serialization, + .choose_kind = false, + }; + + new_serialization_infos = SerializationInfoByName(columns_list, info_settings); + } } NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( const MergeTreeDataPartPtr & data_part, NamesAndTypesList & columns, + SerializationInfoByName & serialization_infos, MergeTreeData::DataPart::Checksums & checksums) { const NameSet & empty_columns = data_part->expired_columns; @@ -28,10 +43,9 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( /// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes. std::map stream_counts; - for (const NameAndTypePair & column : columns) + for (const auto & column : columns) { - auto serialization = data_part->getSerializationForColumn(column); - serialization->enumerateStreams( + data_part->getSerialization(column)->enumerateStreams( [&](const ISerialization::SubstreamPath & substream_path) { ++stream_counts[ISerialization::getFileNameForStream(column, substream_path)]; @@ -57,8 +71,8 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( } }; - auto serialization = data_part->getSerializationForColumn(*column_with_type); - serialization->enumerateStreams(callback); + data_part->getSerialization(*column_with_type)->enumerateStreams(callback); + serialization_infos.erase(column_name); } /// Remove files on disk and checksums diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h index 36fbe76cca2..a7c25edabd4 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.h +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h @@ -13,7 +13,9 @@ class IMergedBlockOutputStream public: IMergedBlockOutputStream( const MergeTreeDataPartPtr & data_part, - const StorageMetadataPtr & metadata_snapshot_); + const StorageMetadataPtr & metadata_snapshot_, + const NamesAndTypesList & columns_list, + bool reset_columns_); virtual ~IMergedBlockOutputStream() = default; @@ -36,6 +38,7 @@ protected: static NameSet removeEmptyColumnsFromPart( const MergeTreeDataPartPtr & data_part, NamesAndTypesList & columns, + SerializationInfoByName & serialization_infos, MergeTreeData::DataPart::Checksums & checksums); const MergeTreeData & storage; @@ -45,6 +48,9 @@ protected: String part_path; IMergeTreeDataPart::MergeTreeWriterPtr writer; + + bool reset_columns = false; + SerializationInfoByName new_serialization_infos; }; using IMergedBlockOutputStreamPtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 881086c024b..b4ecfbebdcb 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -158,12 +159,20 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->parent_part); global_ctx->new_data_part->uuid = global_ctx->future_part->uuid; - global_ctx->new_data_part->setColumns(global_ctx->storage_columns); global_ctx->new_data_part->partition.assign(global_ctx->future_part->getPartition()); global_ctx->new_data_part->is_temp = global_ctx->parent_part == nullptr; ctx->need_remove_expired_values = false; ctx->force_ttl = false; + + SerializationInfo::Settings info_settings = + { + .ratio_of_defaults_for_sparse = global_ctx->data->getSettings()->ratio_of_defaults_for_sparse_serialization, + .choose_kind = true, + }; + + SerializationInfoByName infos(global_ctx->storage_columns, info_settings); + for (const auto & part : global_ctx->future_part->parts) { global_ctx->new_data_part->ttl_infos.update(part->ttl_infos); @@ -173,8 +182,12 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() ctx->need_remove_expired_values = true; ctx->force_ttl = true; } + + infos.add(part->getSerializationInfos()); } + global_ctx->new_data_part->setColumns(global_ctx->storage_columns, infos); + const auto & local_part_min_ttl = global_ctx->new_data_part->ttl_infos.part_min_ttl; if (local_part_min_ttl && local_part_min_ttl <= global_ctx->time_of_merge) ctx->need_remove_expired_values = true; @@ -248,6 +261,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->merging_columns, MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()), ctx->compression_codec, + /*reset_columns=*/ true, ctx->blocks_are_granules_size); global_ctx->rows_written = 0; @@ -395,7 +409,7 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const { - const String & column_name = ctx->it_name_and_type->name; + const auto & [column_name, column_type] = *ctx->it_name_and_type; Names column_names{column_name}; ctx->progress_before = global_ctx->merge_list_element_ptr->progress.load(std::memory_order_relaxed); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 9e600286214..1b7be8ca98d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3743,6 +3743,27 @@ std::unordered_set MergeTreeData::getPartitionIDsFromQuery(const ASTs & return partition_ids; } +std::set MergeTreeData::getPartitionIdsAffectedByCommands( + const MutationCommands & commands, ContextPtr query_context) const +{ + std::set affected_partition_ids; + + for (const auto & command : commands) + { + if (!command.partition) + { + affected_partition_ids.clear(); + break; + } + + affected_partition_ids.insert( + getPartitionIDFromQuery(command.partition, query_context) + ); + } + + return affected_partition_ids; +} + MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector( const DataPartStates & affordable_states, DataPartStateVector * out_states, bool require_projection_parts) const diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 5a3dda785a0..380c2f4f4c5 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -688,6 +688,7 @@ public: /// For ATTACH/DETACH/DROP PARTITION. String getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr context) const; std::unordered_set getPartitionIDsFromQuery(const ASTs & asts, ContextPtr context) const; + std::set getPartitionIdsAffectedByCommands(const MutationCommands & commands, ContextPtr query_context) const; /// Extracts MergeTreeData of other *MergeTree* storage /// and checks that their structure suitable for ALTER TABLE ATTACH PARTITION FROM diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 6161c4c32a3..cb9fa7e6086 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -664,27 +664,55 @@ void MergeTreeDataMergerMutator::splitMutationCommands( } -NamesAndTypesList MergeTreeDataMergerMutator::getColumnsForNewDataPart( +std::pair +MergeTreeDataMergerMutator::getColumnsForNewDataPart( MergeTreeData::DataPartPtr source_part, const Block & updated_header, NamesAndTypesList storage_columns, + const SerializationInfoByName & serialization_infos, const MutationCommands & commands_for_removes) { - /// In compact parts we read all columns, because they all stored in a - /// single file - if (!isWidePart(source_part)) - return updated_header.getNamesAndTypesList(); - NameSet removed_columns; NameToNameMap renamed_columns_to_from; + NameToNameMap renamed_columns_from_to; + ColumnsDescription part_columns(source_part->getColumns()); + /// All commands are validated in AlterCommand so we don't care about order for (const auto & command : commands_for_removes) { + /// If we don't have this column in source part, than we don't need to materialize it + if (!part_columns.has(command.column_name)) + continue; + if (command.type == MutationCommand::DROP_COLUMN) removed_columns.insert(command.column_name); + if (command.type == MutationCommand::RENAME_COLUMN) + { renamed_columns_to_from.emplace(command.rename_to, command.column_name); + renamed_columns_from_to.emplace(command.column_name, command.rename_to); + } } + + bool is_wide_part = isWidePart(source_part); + SerializationInfoByName new_serialization_infos; + for (const auto & [name, info] : serialization_infos) + { + if (is_wide_part && removed_columns.count(name)) + continue; + + auto it = renamed_columns_from_to.find(name); + if (it != renamed_columns_from_to.end()) + new_serialization_infos.emplace(it->second, info); + else + new_serialization_infos.emplace(name, info); + } + + /// In compact parts we read all columns, because they all stored in a + /// single file + if (!is_wide_part) + return {updated_header.getNamesAndTypesList(), new_serialization_infos}; + Names source_column_names = source_part->getColumns().getNames(); NameSet source_columns_name_set(source_column_names.begin(), source_column_names.end()); for (auto it = storage_columns.begin(); it != storage_columns.end();) @@ -711,18 +739,9 @@ NamesAndTypesList MergeTreeDataMergerMutator::getColumnsForNewDataPart( } else { - bool was_renamed = false; - bool was_removed = removed_columns.count(it->name); - /// Check that this column was renamed to some other name - for (const auto & [rename_to, rename_from] : renamed_columns_to_from) - { - if (rename_from == it->name) - { - was_renamed = true; - break; - } - } + bool was_renamed = renamed_columns_from_to.count(it->name); + bool was_removed = removed_columns.count(it->name); /// If we want to rename this column to some other name, than it /// should it's previous version should be dropped or removed @@ -731,7 +750,6 @@ NamesAndTypesList MergeTreeDataMergerMutator::getColumnsForNewDataPart( ErrorCodes::LOGICAL_ERROR, "Incorrect mutation commands, trying to rename column {} to {}, but part {} already has column {}", renamed_columns_to_from[it->name], it->name, source_part->name, it->name); - /// Column was renamed and no other column renamed to it's name /// or column is dropped. if (!renamed_columns_to_from.count(it->name) && (was_renamed || was_removed)) @@ -742,7 +760,7 @@ NamesAndTypesList MergeTreeDataMergerMutator::getColumnsForNewDataPart( } } - return storage_columns; + return {storage_columns, new_serialization_infos}; } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index e5c8a4d8285..bcac642eb16 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -150,10 +150,11 @@ private: MutationCommands & for_file_renames); /// Get the columns list of the resulting part in the same order as storage_columns. - static NamesAndTypesList getColumnsForNewDataPart( + static std::pair getColumnsForNewDataPart( MergeTreeData::DataPartPtr source_part, const Block & updated_header, NamesAndTypesList storage_columns, + const SerializationInfoByName & serialization_infos, const MutationCommands & commands_for_removes); static ExecuteTTLType shouldExecuteTTL( diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index c4c2e65547b..f4da730b1f0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -125,7 +125,7 @@ void MergeTreeDataPartCompact::loadIndexGranularity() bool MergeTreeDataPartCompact::hasColumnFiles(const NameAndTypePair & column) const { - if (!getColumnPosition(column.name)) + if (!getColumnPosition(column.getNameInStorage())) return false; auto bin_checksum = checksums.files.find(DATA_FILE_NAME_WITH_EXTENSION); diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 3a1ea474d74..4ec53d88339 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -121,7 +121,9 @@ void MergeTreeDataPartInMemory::flushToDisk(const String & base_path, const Stri auto projection_compression_codec = storage.getContext()->chooseCompressionCodec(0, 0); auto projection_indices = MergeTreeIndexFactory::instance().getMany(desc.metadata->getSecondaryIndices()); MergedBlockOutputStream projection_out( - projection_data_part, desc.metadata, projection_part->columns, projection_indices, projection_compression_codec); + projection_data_part, desc.metadata, projection_part->columns, projection_indices, + projection_compression_codec); + projection_out.write(projection_part->block); projection_out.writeSuffixAndFinalizePart(projection_data_part); new_data_part->addProjectionPart(projection_name, std::move(projection_data_part)); diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index d1a0344859d..c5ee9ebd01f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -45,7 +45,7 @@ public: bool isStoredOnDisk() const override { return false; } bool isStoredOnRemoteDisk() const override { return false; } - bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.name); } + bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); } String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) const override; void makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 312f5b435d6..b279c1aba6a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -80,8 +80,7 @@ ColumnSize MergeTreeDataPartWide::getColumnSizeImpl( if (checksums.empty()) return size; - auto serialization = getSerializationForColumn(column); - serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + getSerialization(column)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { String file_name = ISerialization::getFileNameForStream(column, substream_path); @@ -163,8 +162,7 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const { for (const NameAndTypePair & name_type : columns) { - auto serialization = getSerializationForColumn(name_type); - serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + getSerialization(name_type)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { String file_name = ISerialization::getFileNameForStream(name_type, substream_path); String mrk_file_name = file_name + index_granularity_info.marks_file_extension; @@ -178,7 +176,6 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const }); } } - } else { @@ -186,13 +183,7 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const std::optional marks_size; for (const NameAndTypePair & name_type : columns) { - auto serialization = IDataType::getSerialization(name_type, - [&](const String & stream_name) - { - return volume->getDisk()->exists(stream_name + DATA_FILE_EXTENSION); - }); - - serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + getSerialization(name_type)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { auto file_path = path + ISerialization::getFileNameForStream(name_type, substream_path) + index_granularity_info.marks_file_extension; @@ -227,8 +218,7 @@ bool MergeTreeDataPartWide::hasColumnFiles(const NameAndTypePair & column) const }; bool res = true; - auto serialization = IDataType::getSerialization(column, check_stream_exists); - serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + getSerialization(column)->enumerateStreams([&](const auto & substream_path) { String file_name = ISerialization::getFileNameForStream(column, substream_path); if (!check_stream_exists(file_name)) @@ -241,8 +231,7 @@ bool MergeTreeDataPartWide::hasColumnFiles(const NameAndTypePair & column) const String MergeTreeDataPartWide::getFileNameForColumn(const NameAndTypePair & column) const { String filename; - auto serialization = column.type->getDefaultSerialization(); - serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + getSerialization(column)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { if (filename.empty()) filename = ISerialization::getFileNameForStream(column, substream_path); @@ -261,7 +250,10 @@ void MergeTreeDataPartWide::calculateEachColumnSizes(ColumnSizeByName & each_col #ifndef NDEBUG /// Most trivial types - if (rows_count != 0 && column.type->isValueRepresentedByNumber() && !column.type->haveSubtypes()) + if (rows_count != 0 + && column.type->isValueRepresentedByNumber() + && !column.type->haveSubtypes() + && getSerialization(column)->getKind() == ISerialization::Kind::DEFAULT) { size_t rows_in_column = size.data_uncompressed / column.type->getSizeOfValueInMemory(); if (rows_in_column != rows_count) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 5d17d6235e1..ce85bc75c80 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -66,7 +66,7 @@ void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, }; ISerialization::SubstreamPath path; - serializations[column.name]->enumerateStreams(path, callback, column.type, nullptr); + data_part->getSerialization(column)->enumerateStreams(path, callback, column.type); } namespace @@ -207,7 +207,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G writeIntBinary(UInt64(0), marks); writeColumnSingleGranule( - block.getByName(name_and_type->name), serializations[name_and_type->name], + block.getByName(name_and_type->name), data_part->getSerialization(*name_and_type), stream_getter, granule.start_row, granule.rows_to_write); /// Each type always have at least one substream diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 4263640c1e0..03ae6688beb 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -85,9 +85,6 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( if (!disk->exists(part_path)) disk->createDirectories(part_path); - for (const auto & column : columns_list) - serializations.emplace(column.name, column.type->getDefaultSerialization()); - if (settings.rewrite_primary_key) initPrimaryIndex(); initSkipIndices(); @@ -119,7 +116,7 @@ static size_t computeIndexGranularityImpl( } else { - size_t size_of_row_in_bytes = block_size_in_memory / rows_in_block; + size_t size_of_row_in_bytes = std::max(block_size_in_memory / rows_in_block, 1UL); index_granularity_for_block = index_granularity_bytes / size_of_row_in_bytes; } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index e64ba9edec0..fb46175c2aa 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -131,9 +131,6 @@ protected: MergeTreeIndexAggregators skip_indices_aggregators; std::vector skip_index_accumulated_marks; - using SerializationsMap = std::unordered_map; - SerializationsMap serializations; - std::unique_ptr index_file_stream; std::unique_ptr index_stream; DataTypes index_types; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 224a197c3c8..b620bf8130e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -85,7 +86,6 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( addStreams(it, columns.getCodecDescOrDefault(it.name, default_codec)); } - void MergeTreeDataPartWriterWide::addStreams( const NameAndTypePair & column, const ASTPtr & effective_codec_desc) @@ -94,6 +94,7 @@ void MergeTreeDataPartWriterWide::addStreams( { assert(!substream_path.empty()); String stream_name = ISerialization::getFileNameForStream(column, substream_path); + /// Shared offsets for Nested type. if (column_streams.count(stream_name)) return; @@ -117,7 +118,7 @@ void MergeTreeDataPartWriterWide::addStreams( }; ISerialization::SubstreamPath path; - serializations[column.name]->enumerateStreams(path, callback, column.type, nullptr); + data_part->getSerialization(column)->enumerateStreams(path, callback, column.type); } @@ -196,7 +197,9 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm fillIndexGranularity(index_granularity_for_block, block.rows()); } - auto granules_to_write = getGranulesToWrite(index_granularity, block.rows(), getCurrentMark(), rows_written_in_last_mark); + Block block_to_write = block; + + auto granules_to_write = getGranulesToWrite(index_granularity, block_to_write.rows(), getCurrentMark(), rows_written_in_last_mark); auto offset_columns = written_offset_columns ? *written_offset_columns : WrittenOffsetColumns{}; Block primary_key_block; @@ -208,7 +211,10 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm auto it = columns_list.begin(); for (size_t i = 0; i < columns_list.size(); ++i, ++it) { - const ColumnWithTypeAndName & column = block.getByName(it->name); + auto & column = block_to_write.getByName(it->name); + + if (data_part->getSerialization(*it)->getKind() != ISerialization::Kind::SPARSE) + column.column = recursiveRemoveSparse(column.column); if (permutation) { @@ -269,7 +275,7 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn( ISerialization::SubstreamPath & path) { StreamsWithMarks result; - serializations[column.name]->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) + data_part->getSerialization(column)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; @@ -304,7 +310,7 @@ void MergeTreeDataPartWriterWide::writeSingleGranule( ISerialization::SerializeBinaryBulkSettings & serialize_settings, const Granule & granule) { - const auto & serialization = serializations[name_and_type.name]; + const auto & serialization = data_part->getSerialization(name_and_type); serialization->serializeBinaryBulkWithMultipleStreams(column, granule.start_row, granule.rows_to_write, serialize_settings, serialization_state); /// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one. @@ -334,12 +340,13 @@ void MergeTreeDataPartWriterWide::writeColumn( const auto & [name, type] = name_and_type; auto [it, inserted] = serialization_states.emplace(name, nullptr); + auto serialization = data_part->getSerialization(name_and_type); if (inserted) { ISerialization::SerializeBinaryBulkSettings serialize_settings; serialize_settings.getter = createStreamGetter(name_and_type, offset_columns); - serializations[name]->serializeBinaryBulkStatePrefix(serialize_settings, it->second); + serialization->serializeBinaryBulkStatePrefix(serialize_settings, it->second); } const auto & global_settings = storage.getContext()->getSettingsRef(); @@ -380,7 +387,7 @@ void MergeTreeDataPartWriterWide::writeColumn( } } - serializations[name]->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) + serialization->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (is_offsets) @@ -392,10 +399,13 @@ void MergeTreeDataPartWriterWide::writeColumn( } -void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name, const IDataType & type) +void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePair & name_type) { - if (!type.isValueRepresentedByNumber() || type.haveSubtypes()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot validate column of non fixed type {}", type.getName()); + const auto & [name, type] = name_type; + const auto & serialization = data_part->getSerialization(name_type); + + if (!type->isValueRepresentedByNumber() || type->haveSubtypes() || serialization->getKind() != ISerialization::Kind::DEFAULT) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot validate column of non fixed type {}", type->getName()); auto disk = data_part->volume->getDisk(); String escaped_name = escapeForFileName(name); @@ -410,7 +420,6 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name, size_t mark_num; - const auto & serialization = serializations[name]; for (mark_num = 0; !mrk_in->eof(); ++mark_num) { if (mark_num > index_granularity.getMarksCount()) @@ -436,7 +445,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name, if (index_granularity_rows == 0) { - auto column = type.createColumn(); + auto column = type->createColumn(); serialization->deserializeBinaryBulk(*column, bin_in, 1000000000, 0.0); @@ -456,7 +465,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name, ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{} (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", data_part->getFullPath(), mark_num, offset_in_compressed_file, offset_in_decompressed_block, index_granularity.getMarkRows(mark_num), index_granularity_rows, index_granularity.getMarksCount()); - auto column = type.createColumn(); + auto column = type->createColumn(); serialization->deserializeBinaryBulk(*column, bin_in, index_granularity_rows, 0.0); @@ -495,7 +504,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name, "Still have something in marks stream, last mark #{} index granularity size {}, last rows {}", mark_num, index_granularity.getMarksCount(), index_granularity_rows); if (!bin_in.eof()) { - auto column = type.createColumn(); + auto column = type->createColumn(); serialization->deserializeBinaryBulk(*column, bin_in, 1000000000, 0.0); @@ -531,7 +540,7 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(IMergeTreeDataPart::Ch if (!serialization_states.empty()) { serialize_settings.getter = createStreamGetter(*it, written_offset_columns ? *written_offset_columns : offset_columns); - serializations[it->name]->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]); + data_part->getSerialization(*it)->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]); } if (write_final_mark) @@ -554,8 +563,12 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(IMergeTreeDataPart::Ch /// data according to marks. Otherwise throws LOGICAL_ERROR (equal to abort in debug mode) for (const auto & column : columns_list) { - if (column.type->isValueRepresentedByNumber() && !column.type->haveSubtypes()) - validateColumnOfFixedSize(column.name, *column.type); + if (column.type->isValueRepresentedByNumber() + && !column.type->haveSubtypes() + && data_part->getSerialization(column)->getKind() == ISerialization::Kind::DEFAULT) + { + validateColumnOfFixedSize(column); + } } #endif @@ -580,7 +593,7 @@ void MergeTreeDataPartWriterWide::writeFinalMark( { writeSingleMark(column, offset_columns, 0, path); /// Memoize information about offsets - serializations[column.name]->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) + data_part->getSerialization(column)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path) { bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes; if (is_offsets) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index 5eaaa0c1bbe..6303fbbac0d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -84,7 +84,7 @@ private: /// Method for self check (used in debug-build only). Checks that written /// data and corresponding marks are consistent. Otherwise throws logical /// errors. - void validateColumnOfFixedSize(const String & name, const IDataType & type); + void validateColumnOfFixedSize(const NameAndTypePair & name_type); void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) override; diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 2cf24215d28..9f17a44a7f8 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -198,36 +198,41 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts( return result; } -Block MergeTreeDataWriter::mergeBlock(const Block & block, SortDescription sort_description, Names & partition_key_columns, IColumn::Permutation *& permutation) +Block MergeTreeDataWriter::mergeBlock( + const Block & block, + SortDescription sort_description, + const Names & partition_key_columns, + IColumn::Permutation *& permutation, + const MergeTreeData::MergingParams & merging_params) { size_t block_size = block.rows(); auto get_merging_algorithm = [&]() -> std::shared_ptr { - switch (data.merging_params.mode) + switch (merging_params.mode) { /// There is nothing to merge in single block in ordinary MergeTree case MergeTreeData::MergingParams::Ordinary: return nullptr; case MergeTreeData::MergingParams::Replacing: return std::make_shared( - block, 1, sort_description, data.merging_params.version_column, block_size + 1); + block, 1, sort_description, merging_params.version_column, block_size + 1); case MergeTreeData::MergingParams::Collapsing: return std::make_shared( - block, 1, sort_description, data.merging_params.sign_column, + block, 1, sort_description, merging_params.sign_column, false, block_size + 1, &Poco::Logger::get("MergeTreeBlockOutputStream")); case MergeTreeData::MergingParams::Summing: return std::make_shared( - block, 1, sort_description, data.merging_params.columns_to_sum, + block, 1, sort_description, merging_params.columns_to_sum, partition_key_columns, block_size + 1); case MergeTreeData::MergingParams::Aggregating: return std::make_shared(block, 1, sort_description, block_size + 1); case MergeTreeData::MergingParams::VersionedCollapsing: return std::make_shared( - block, 1, sort_description, data.merging_params.sign_column, block_size + 1); + block, 1, sort_description, merging_params.sign_column, block_size + 1); case MergeTreeData::MergingParams::Graphite: return std::make_shared( - block, 1, sort_description, block_size + 1, data.merging_params.graphite_params, time(nullptr)); + block, 1, sort_description, block_size + 1, merging_params.graphite_params, time(nullptr)); } __builtin_unreachable(); @@ -330,7 +335,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart( Names partition_key_columns = metadata_snapshot->getPartitionKey().column_names; if (context->getSettingsRef().optimize_on_insert) - block = mergeBlock(block, sort_description, partition_key_columns, perm_ptr); + block = mergeBlock(block, sort_description, partition_key_columns, perm_ptr, data.merging_params); /// Size of part would not be greater than block.bytes() + epsilon size_t expected_size = block.bytes(); @@ -359,7 +364,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart( if (data.storage_settings.get()->assign_part_uuids) new_data_part->uuid = UUIDHelpers::generateV4(); - new_data_part->setColumns(columns); + const auto & data_settings = data.getSettings(); + + SerializationInfo::Settings settings{data_settings->ratio_of_defaults_for_sparse_serialization, true}; + SerializationInfoByName infos(columns, settings); + infos.add(block); + + new_data_part->setColumns(columns, infos); new_data_part->rows_count = block.rows(); new_data_part->partition = std::move(partition); new_data_part->minmax_idx = std::move(minmax_idx); @@ -407,8 +418,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart( auto compression_codec = data.getContext()->chooseCompressionCodec(0, 0); const auto & index_factory = MergeTreeIndexFactory::instance(); - MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec); - bool sync_on_insert = data.getSettings()->fsync_after_insert; + MergedBlockOutputStream out(new_data_part, metadata_snapshot,columns, + index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec); + + bool sync_on_insert = data_settings->fsync_after_insert; out.writeWithPermutation(block, perm_ptr); @@ -429,7 +442,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart( } MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeProjectionPartImpl( - const String part_name, + const String & part_name, MergeTreeDataPartType part_type, const String & relative_path, bool is_temp, @@ -437,8 +450,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeProjectionPartImpl( const MergeTreeData & data, Poco::Logger * log, Block block, - const StorageMetadataPtr & metadata_snapshot) + const ProjectionDescription & projection) { + const StorageMetadataPtr & metadata_snapshot = projection.metadata; MergeTreePartInfo new_part_info("all", 0, 0, 0); auto new_data_part = data.createPart( part_name, @@ -450,7 +464,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeProjectionPartImpl( new_data_part->is_temp = is_temp; NamesAndTypesList columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames()); - new_data_part->setColumns(columns); + SerializationInfo::Settings settings{data.getSettings()->ratio_of_defaults_for_sparse_serialization, true}; + SerializationInfoByName infos(columns, settings); + infos.add(block); + + new_data_part->setColumns(columns, infos); if (new_data_part->isStoredOnDisk()) { @@ -494,6 +512,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeProjectionPartImpl( ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterBlocksAlreadySorted); } + if (projection.type == ProjectionDescription::Type::Aggregate) + { + MergeTreeData::MergingParams projection_merging_params; + projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating; + block = mergeBlock(block, sort_description, {}, perm_ptr, projection_merging_params); + } + /// This effectively chooses minimal compression method: /// either default lz4 or compression method with zero thresholds on absolute and relative part size. auto compression_codec = data.getContext()->chooseCompressionCodec(0, 0); @@ -542,7 +567,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeProjectionPart( data, log, block, - projection.metadata); + projection); } /// This is used for projection materialization process which may contain multiple stages of @@ -579,7 +604,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempProjectionPart( data, log, block, - projection.metadata); + projection); } MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeInMemoryProjectionPart( @@ -598,7 +623,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeInMemoryProjectionPa data, log, block, - projection.metadata); + projection); } } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 006f897c3e2..f16ec877113 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -33,7 +33,7 @@ using BlocksWithPartition = std::vector; class MergeTreeDataWriter { public: - MergeTreeDataWriter(MergeTreeData & data_) : data(data_), log(&Poco::Logger::get(data.getLogName() + " (Writer)")) {} + explicit MergeTreeDataWriter(MergeTreeData & data_) : data(data_), log(&Poco::Logger::get(data.getLogName() + " (Writer)")) {} /** Split the block to blocks, each of them must be written as separate part. * (split rows by partition) @@ -74,11 +74,16 @@ public: const ProjectionDescription & projection, const IMergeTreeDataPart * parent_part); - Block mergeBlock(const Block & block, SortDescription sort_description, Names & partition_key_columns, IColumn::Permutation *& permutation); + static Block mergeBlock( + const Block & block, + SortDescription sort_description, + const Names & partition_key_columns, + IColumn::Permutation *& permutation, + const MergeTreeData::MergingParams & merging_params); private: static MergeTreeData::MutableDataPartPtr writeProjectionPartImpl( - const String part_name, + const String & part_name, MergeTreeDataPartType part_type, const String & relative_path, bool is_temp, @@ -86,7 +91,7 @@ private: const MergeTreeData & data, Poco::Logger * log, Block block, - const StorageMetadataPtr & metadata_snapshot); + const ProjectionDescription & projection); MergeTreeData & data; diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index a8820b3f6d4..9332f4fd442 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -112,7 +112,7 @@ void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos, size_t element_start_row = column_offsets[current_position - 1]; size_t elements_size = column_offsets[current_position] - element_start_row; - for (size_t row_num = 0; row_num < elements_size; row_num++) + for (size_t row_num = 0; row_num < elements_size; ++row_num) { auto ref = column_key.getDataAt(element_start_row + row_num); token_extractor->stringPaddedToBloomFilter(ref.data, ref.size, granule->bloom_filters[col]); diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index 15e7ed4c1d0..5a889ea5e8b 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -123,6 +123,9 @@ bool MergeTreePartsMover::selectPartsForMove( auto metadata_snapshot = data->getInMemoryMetadataPtr(); + if (need_to_move.empty() && !metadata_snapshot->hasAnyMoveTTL()) + return false; + for (const auto & part : data_parts) { String reason; @@ -228,6 +231,7 @@ MergeTreeData::DataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEnt LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part->getFullPath()); cloned_part->loadColumnsChecksumsIndexes(true, true); + cloned_part->modification_time = disk->getLastModified(cloned_part->getFullRelativePath()).epochTime(); return cloned_part; } diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 09542c30636..c89affb5365 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -198,7 +198,9 @@ std::vector MergeTreeReadPool::fillPerPartInfo(const RangesInDataParts & for (const auto i : collections::range(0, parts.size())) { const auto & part = parts[i]; - is_part_on_remote_disk[i] = part.data_part->isStoredOnRemoteDisk(); + bool part_on_remote_disk = part.data_part->isStoredOnRemoteDisk(); + is_part_on_remote_disk[i] = part_on_remote_disk; + do_not_steal_tasks |= part_on_remote_disk; /// Read marks for every data part. size_t sum_marks = 0; diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 1f8642db886..b594b59fdfa 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -54,7 +54,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( { auto column_from_part = getColumnFromPart(*name_and_type); - auto position = data_part->getColumnPosition(column_from_part.name); + auto position = data_part->getColumnPosition(column_from_part.getNameInStorage()); if (!position && typeid_cast(column_from_part.type.get())) { /// If array of Nested column is missing in part, @@ -140,8 +140,12 @@ size_t MergeTreeReaderCompact::readRows( if (!column_positions[i]) continue; + auto column_from_part = getColumnFromPart(*column_it); if (res_columns[i] == nullptr) - res_columns[i] = getColumnFromPart(*column_it).type->createColumn(); + { + auto serialization = data_part->getSerialization(column_from_part); + res_columns[i] = column_from_part.type->createColumn(*serialization); + } } while (read_rows < max_rows_to_read) @@ -199,6 +203,8 @@ void MergeTreeReaderCompact::readData( { const auto & [name, type] = name_and_type; + adjustUpperBound(current_task_last_mark); /// Must go before seek. + if (!isContinuousReading(from_mark, column_position)) seekToMark(from_mark, column_position); @@ -207,8 +213,6 @@ void MergeTreeReaderCompact::readData( if (only_offsets && (substream_path.size() != 1 || substream_path[0].type != ISerialization::Substream::ArraySizes)) return nullptr; - /// For asynchronous reading from remote fs. - data_buffer->setReadUntilPosition(marks_loader.getMark(current_task_last_mark).offset_in_compressed_file); return data_buffer; }; @@ -220,9 +224,11 @@ void MergeTreeReaderCompact::readData( if (name_and_type.isSubcolumn()) { const auto & type_in_storage = name_and_type.getTypeInStorage(); - ColumnPtr temp_column = type_in_storage->createColumn(); + const auto & name_in_storage = name_and_type.getNameInStorage(); + + auto serialization = data_part->getSerialization(NameAndTypePair{name_in_storage, type_in_storage}); + ColumnPtr temp_column = type_in_storage->createColumn(*serialization); - auto serialization = type_in_storage->getDefaultSerialization(); serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state); serialization->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state, nullptr); @@ -236,7 +242,7 @@ void MergeTreeReaderCompact::readData( } else { - auto serialization = type->getDefaultSerialization(); + auto serialization = data_part->getSerialization(name_and_type); serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state); serialization->deserializeBinaryBulkWithMultipleStreams(column, rows_to_read, deserialize_settings, state, nullptr); } @@ -269,6 +275,34 @@ void MergeTreeReaderCompact::seekToMark(size_t row_index, size_t column_index) } } +void MergeTreeReaderCompact::adjustUpperBound(size_t last_mark) +{ + auto right_offset = marks_loader.getMark(last_mark).offset_in_compressed_file; + if (!right_offset) + { + /// If already reading till the end of file. + if (last_right_offset && *last_right_offset == 0) + return; + + last_right_offset = 0; // Zero value means the end of file. + if (cached_buffer) + cached_buffer->setReadUntilEnd(); + if (non_cached_buffer) + non_cached_buffer->setReadUntilEnd(); + } + else + { + if (last_right_offset && right_offset <= last_right_offset.value()) + return; + + last_right_offset = right_offset; + if (cached_buffer) + cached_buffer->setReadUntilPosition(right_offset); + if (non_cached_buffer) + non_cached_buffer->setReadUntilPosition(right_offset); + } +} + bool MergeTreeReaderCompact::isContinuousReading(size_t mark, size_t column_position) { if (!last_read_granule) diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h index 350c8427eff..381b212df3c 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.h +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h @@ -52,6 +52,9 @@ private: /// Should we read full column or only it's offsets std::vector read_only_offsets; + /// For asynchronous reading from remote fs. Same meaning as in MergeTreeReaderStream. + std::optional last_right_offset; + size_t next_mark = 0; std::optional> last_read_granule; @@ -67,6 +70,9 @@ private: MergeTreeMarksLoader & marks_loader, const ColumnPositions & column_positions, const MarkRanges & mark_ranges); + + /// For asynchronous reading from remote fs. + void adjustUpperBound(size_t last_mark); }; } diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 50650ef66e5..5e51a2931e4 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -105,7 +106,10 @@ size_t MergeTreeReaderWide::readRows( /// The column is already present in the block so we will append the values to the end. bool append = res_columns[pos] != nullptr; if (!append) - res_columns[pos] = type->createColumn(); + { + auto serialization = data_part->getSerialization(column_from_part); + res_columns[pos] = type->createColumn(*serialization); + } auto & column = res_columns[pos]; try @@ -184,9 +188,7 @@ void MergeTreeReaderWide::addStreams(const NameAndTypePair & name_and_type, profile_callback, clock_type)); }; - auto serialization = data_part->getSerializationForColumn(name_and_type); - serialization->enumerateStreams(callback); - serializations.emplace(name_and_type.name, std::move(serialization)); + data_part->getSerialization(name_and_type)->enumerateStreams(callback); } @@ -220,6 +222,23 @@ static ReadBuffer * getStream( return stream.data_buffer; } +void MergeTreeReaderWide::deserializePrefix( + const SerializationPtr & serialization, + const NameAndTypePair & name_and_type, + size_t current_task_last_mark, + ISerialization::SubstreamsCache & cache) +{ + const auto & name = name_and_type.name; + if (deserialize_binary_bulk_state_map.count(name) == 0) + { + ISerialization::DeserializeBinaryBulkSettings deserialize_settings; + deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) + { + return getStream(/* seek_to_start = */true, substream_path, streams, name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache); + }; + serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name]); + } +} void MergeTreeReaderWide::prefetch( const NameAndTypePair & name_and_type, @@ -229,8 +248,8 @@ void MergeTreeReaderWide::prefetch( ISerialization::SubstreamsCache & cache, std::unordered_set & prefetched_streams) { - const auto & name = name_and_type.name; - auto & serialization = serializations[name]; + auto serialization = data_part->getSerialization(name_and_type); + deserializePrefix(serialization, name_and_type, current_task_last_mark, cache); serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { @@ -258,16 +277,9 @@ void MergeTreeReaderWide::readData( deserialize_settings.avg_value_size_hint = avg_value_size_hint; const auto & name = name_and_type.name; - auto & serialization = serializations[name]; + auto serialization = data_part->getSerialization(name_and_type); - if (deserialize_binary_bulk_state_map.count(name) == 0) - { - deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) - { - return getStream(/* seek_to_start = */true, substream_path, streams, name_and_type, from_mark, /* seek_to_mark = */false, current_task_last_mark, cache); - }; - serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name]); - } + deserializePrefix(serialization, name_and_type, current_task_last_mark, cache); deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path) { diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index e27dd85643b..41219560ecc 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -34,11 +34,9 @@ public: bool canReadIncompleteGranules() const override { return true; } using FileStreams = std::map>; - using Serializations = std::map; private: FileStreams streams; - Serializations serializations; DiskPtr disk; void addStreams(const NameAndTypePair & name_and_type, @@ -57,6 +55,12 @@ private: size_t current_task_last_mark, ISerialization::SubstreamsCache & cache, std::unordered_set & prefetched_streams); /// if stream was already prefetched do nothing + + void deserializePrefix( + const SerializationPtr & serialization, + const NameAndTypePair & name_and_type, + size_t current_task_last_mark, + ISerialization::SubstreamsCache & cache); }; } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index decc72df14c..b991166b3b6 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -33,6 +33,7 @@ struct Settings; M(UInt64, min_rows_for_compact_part, 0, "Experimental. Minimal number of rows to create part in compact format instead of saving it in RAM", 0) \ M(Bool, in_memory_parts_enable_wal, true, "Whether to write blocks in Native format to write-ahead-log before creation in-memory part", 0) \ M(UInt64, write_ahead_log_max_bytes, 1024 * 1024 * 1024, "Rotate WAL, if it exceeds that amount of bytes", 0) \ + M(Float, ratio_of_defaults_for_sparse_serialization, 1.0, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 9323249946a..694357ab0c2 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -199,6 +199,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor part->minmax_idx->update(block, storage.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); part->partition.create(metadata_snapshot, block, 0, context); + part->setColumns(block.getNamesAndTypesList()); if (metadata_snapshot->hasSortingKey()) metadata_snapshot->getSortingKey().expression->execute(block); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 43146709686..cbdbb2339df 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -18,8 +18,9 @@ MergedBlockOutputStream::MergedBlockOutputStream( const NamesAndTypesList & columns_list_, const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec_, + bool reset_columns_, bool blocks_are_granules_size) - : IMergedBlockOutputStream(data_part, metadata_snapshot_) + : IMergedBlockOutputStream(data_part, metadata_snapshot_, columns_list_, reset_columns_) , columns_list(columns_list_) , default_codec(default_codec_) { @@ -77,10 +78,16 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( else part_columns = *total_columns_list; - if (new_part->isStoredOnDisk()) - finalizePartOnDisk(new_part, part_columns, checksums, sync); + auto & serialization_infos = reset_columns + ? new_serialization_infos + : new_part->getSerializationInfos(); + + if (new_part->isStoredOnDisk()) + finalizePartOnDisk(new_part, part_columns, serialization_infos, checksums, sync); + + if (reset_columns) + new_part->setColumns(part_columns, serialization_infos); - new_part->setColumns(part_columns); new_part->rows_count = rows_count; new_part->modification_time = time(nullptr); new_part->index = writer->releaseIndexColumns(); @@ -97,6 +104,7 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( void MergedBlockOutputStream::finalizePartOnDisk( const MergeTreeData::MutableDataPartPtr & new_part, NamesAndTypesList & part_columns, + SerializationInfoByName & serialization_infos, MergeTreeData::DataPart::Checksums & checksums, bool sync) { @@ -127,15 +135,17 @@ void MergedBlockOutputStream::finalizePartOnDisk( out->sync(); } - if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part)) + if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { new_part->partition.store(storage, volume->getDisk(), part_path, checksums); if (new_part->minmax_idx->initialized) new_part->minmax_idx->store(storage, volume->getDisk(), part_path, checksums); else if (rows_count) throw Exception("MinMax index was not initialized for new non-empty part " + new_part->name - + ". It is a bug.", ErrorCodes::LOGICAL_ERROR); + + ". It is a bug.", ErrorCodes::LOGICAL_ERROR); + } + { auto count_out = volume->getDisk()->writeFile(fs::path(part_path) / "count.txt", 4096); HashingWriteBuffer count_out_hashing(*count_out); writeIntText(rows_count, count_out_hashing); @@ -161,7 +171,19 @@ void MergedBlockOutputStream::finalizePartOnDisk( out->sync(); } - removeEmptyColumnsFromPart(new_part, part_columns, checksums); + removeEmptyColumnsFromPart(new_part, part_columns, serialization_infos, checksums); + + if (!serialization_infos.empty()) + { + auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096); + HashingWriteBuffer out_hashing(*out); + serialization_infos.writeJSON(out_hashing); + checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_size = out_hashing.count(); + checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_hash = out_hashing.getHash(); + out->finalize(); + if (sync) + out->sync(); + } { /// Write a file with a description of columns. @@ -202,6 +224,9 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm return; writer->write(block, permutation); + if (reset_columns) + new_serialization_infos.add(block); + rows_count += rows; } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 5965331ee81..ffc740bf410 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -19,6 +19,7 @@ public: const NamesAndTypesList & columns_list_, const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec_, + bool reset_columns_ = false, bool blocks_are_granules_size = false); Block getHeader() const { return metadata_snapshot->getSampleBlock(); } @@ -48,6 +49,7 @@ private: void finalizePartOnDisk( const MergeTreeData::MutableDataPartPtr & new_part, NamesAndTypesList & part_columns, + SerializationInfoByName & serialization_infos, MergeTreeData::DataPart::Checksums & checksums, bool sync); diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 4b760103750..ff79a187490 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -18,7 +18,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( WrittenOffsetColumns * offset_columns_, const MergeTreeIndexGranularity & index_granularity, const MergeTreeIndexGranularityInfo * index_granularity_info) - : IMergedBlockOutputStream(data_part, metadata_snapshot_) + : IMergedBlockOutputStream(data_part, metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true) , header(header_) { const auto & global_settings = data_part->storage.getContext()->getSettings(); @@ -51,6 +51,7 @@ void MergedColumnOnlyOutputStream::write(const Block & block) return; writer->write(block, nullptr); + new_serialization_infos.add(block); } MergeTreeData::DataPart::Checksums @@ -71,12 +72,12 @@ MergedColumnOnlyOutputStream::writeSuffixAndGetChecksums( auto columns = new_part->getColumns(); - auto removed_files = removeEmptyColumnsFromPart(new_part, columns, checksums); + auto removed_files = removeEmptyColumnsFromPart(new_part, columns, new_serialization_infos, checksums); for (const String & removed_file : removed_files) if (all_checksums.files.count(removed_file)) all_checksums.files.erase(removed_file); - new_part->setColumns(columns); + new_part->setColumns(columns, new_serialization_infos); return checksums; } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index accf167f5ff..86a692c8a48 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -315,8 +315,7 @@ NameSet collectFilesToSkip( files_to_skip.insert(stream_name + mrk_extension); }; - auto serialization = source_part->getSerializationForColumn({entry.name, entry.type}); - serialization->enumerateStreams(callback); + source_part->getSerialization({entry.name, entry.type})->enumerateStreams(callback); } for (const auto & index : indices_to_recalc) { @@ -341,8 +340,7 @@ static NameToNameVector collectFilesForRenames( std::map stream_counts; for (const auto & column : source_part->getColumns()) { - auto serialization = source_part->getSerializationForColumn(column); - serialization->enumerateStreams( + source_part->getSerialization(column)->enumerateStreams( [&](const ISerialization::SubstreamPath & substream_path) { ++stream_counts[ISerialization::getFileNameForStream(column, substream_path)]; @@ -386,10 +384,7 @@ static NameToNameVector collectFilesForRenames( auto column = source_part->getColumns().tryGetByName(command.column_name); if (column) - { - auto serialization = source_part->getSerializationForColumn(*column); - serialization->enumerateStreams(callback); - } + source_part->getSerialization(*column)->enumerateStreams(callback); } else if (command.type == MutationCommand::Type::RENAME_COLUMN) { @@ -411,10 +406,7 @@ static NameToNameVector collectFilesForRenames( auto column = source_part->getColumns().tryGetByName(command.column_name); if (column) - { - auto serialization = source_part->getSerializationForColumn(*column); - serialization->enumerateStreams(callback); - } + source_part->getSerialization(*column)->enumerateStreams(callback); } } @@ -1301,7 +1293,12 @@ bool MutateTask::prepare() /// It shouldn't be changed by mutation. ctx->new_data_part->index_granularity_info = ctx->source_part->index_granularity_info; - ctx->new_data_part->setColumns(MergeTreeDataMergerMutator::getColumnsForNewDataPart(ctx->source_part, ctx->updated_header, ctx->storage_columns, ctx->for_file_renames)); + + auto [new_columns, new_infos] = MergeTreeDataMergerMutator::getColumnsForNewDataPart( + ctx->source_part, ctx->updated_header, ctx->storage_columns, + ctx->source_part->getSerializationInfos(), ctx->commands_for_part); + + ctx->new_data_part->setColumns(new_columns, new_infos); ctx->new_data_part->partition.assign(ctx->source_part->partition); ctx->disk = ctx->new_data_part->volume->getDisk(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index cc9a142c65c..b3da3d47684 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1033,7 +1033,7 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange( min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock); (*it)->removed_by_other_entry = true; - queue.erase(it++); + it = queue.erase(it); ++removed_entries; } else diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index d312a7f9c3e..eabd901eb24 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -98,6 +98,23 @@ IMergeTreeDataPart::Checksums checkDataPart( }; }; + SerializationInfoByName serialization_infos(columns_txt, {}); + auto serialization_path = path + IMergeTreeDataPart::SERIALIZATION_FILE_NAME; + + if (disk->exists(serialization_path)) + { + auto serialization_file = disk->readFile(serialization_path); + serialization_infos.readJSON(*serialization_file); + } + + auto get_serialization = [&serialization_infos](const auto & column) + { + auto it = serialization_infos.find(column.name); + return it == serialization_infos.end() + ? column.type->getDefaultSerialization() + : column.type->getSerialization(*it->second); + }; + /// This function calculates only checksum of file content (compressed or uncompressed). /// It also calculates checksum of projections. auto checksum_file = [&](const String & file_path, const String & file_name) @@ -132,12 +149,7 @@ IMergeTreeDataPart::Checksums checkDataPart( const NamesAndTypesList & projection_columns_list = projection->getColumns(); for (const auto & projection_column : projection_columns_list) { - auto serialization = IDataType::getSerialization(projection_column, [&](const String & stream_name) - { - return disk->exists(stream_name + IMergeTreeDataPart::DATA_FILE_EXTENSION); - }); - - serialization->enumerateStreams( + get_serialization(projection_column)->enumerateStreams( [&](const ISerialization::SubstreamPath & substream_path) { String projection_file_name = ISerialization::getFileNameForStream(projection_column, substream_path) + ".bin"; @@ -209,13 +221,7 @@ IMergeTreeDataPart::Checksums checkDataPart( { for (const auto & column : columns_list) { - auto serialization = IDataType::getSerialization(column, - [&](const String & stream_name) - { - return disk->exists(stream_name + IMergeTreeDataPart::DATA_FILE_EXTENSION); - }); - - serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) + get_serialization(column)->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { String file_name = ISerialization::getFileNameForStream(column, substream_path) + ".bin"; checksums_data.files[file_name] = checksum_compressed_file(disk, path + file_name); diff --git a/src/Storages/MySQL/MySQLHelpers.cpp b/src/Storages/MySQL/MySQLHelpers.cpp new file mode 100644 index 00000000000..e7745e6c0bb --- /dev/null +++ b/src/Storages/MySQL/MySQLHelpers.cpp @@ -0,0 +1,26 @@ +#include "MySQLHelpers.h" + +#if USE_MYSQL +#include +#include +#include + +namespace DB +{ + +mysqlxx::PoolWithFailover +createMySQLPoolWithFailover(const StorageMySQLConfiguration & configuration, const MySQLSettings & mysql_settings) +{ + return mysqlxx::PoolWithFailover( + configuration.database, configuration.addresses, configuration.username, configuration.password, + MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, + mysql_settings.connection_pool_size, + mysql_settings.connection_max_tries, + mysql_settings.connection_wait_timeout, + mysql_settings.connect_timeout, + mysql_settings.read_write_timeout); +} + +} + +#endif diff --git a/src/Storages/MySQL/MySQLHelpers.h b/src/Storages/MySQL/MySQLHelpers.h new file mode 100644 index 00000000000..712c5a2c719 --- /dev/null +++ b/src/Storages/MySQL/MySQLHelpers.h @@ -0,0 +1,19 @@ +#pragma once +#include "config_core.h" + +#if USE_MYSQL +#include + +namespace mysqlxx { class PoolWithFailover; } + +namespace DB +{ +struct StorageMySQLConfiguration; +struct MySQLSettings; + +mysqlxx::PoolWithFailover +createMySQLPoolWithFailover(const StorageMySQLConfiguration & configuration, const MySQLSettings & mysql_settings); + +} + +#endif diff --git a/src/Storages/MySQL/MySQLSettings.h b/src/Storages/MySQL/MySQLSettings.h index 872b0607e20..aa2c2703d6b 100644 --- a/src/Storages/MySQL/MySQLSettings.h +++ b/src/Storages/MySQL/MySQLSettings.h @@ -19,6 +19,8 @@ class ASTStorage; M(UInt64, connection_max_tries, 3, "Number of retries for pool with failover", 0) \ M(UInt64, connection_wait_timeout, 5, "Timeout (in seconds) for waiting for free connection (in case of there is already connection_pool_size active connections), 0 - do not wait.", 0) \ M(Bool, connection_auto_close, true, "Auto-close connection after query execution, i.e. disable connection reuse.", 0) \ + M(UInt64, connect_timeout, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, "Connect timeout (in seconds)", 0) \ + M(UInt64, read_write_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "Read/write timeout (in seconds)", 0) \ DECLARE_SETTINGS_TRAITS(MySQLSettingsTraits, LIST_OF_MYSQL_SETTINGS) diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 7cc71a63443..4848ae6c9ea 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -1,13 +1,15 @@ #include "PostgreSQLReplicationHandler.h" +#include +#include #include #include #include #include +#include #include #include #include -#include #include #include #include @@ -18,6 +20,7 @@ namespace DB static const auto RESCHEDULE_MS = 1000; static const auto BACKOFF_TRESHOLD_MS = 10000; +static const auto CLEANUP_RESCHEDULE_MS = 600000 * 3; /// 30 min namespace ErrorCodes { @@ -26,6 +29,30 @@ namespace ErrorCodes extern const int POSTGRESQL_REPLICATION_INTERNAL_ERROR; } +class TemporaryReplicationSlot +{ +public: + TemporaryReplicationSlot( + PostgreSQLReplicationHandler * handler_, + std::shared_ptr tx_, + String & start_lsn, + String & snapshot_name) + : handler(handler_), tx(tx_) + { + handler->createReplicationSlot(*tx, start_lsn, snapshot_name, /* temporary */true); + } + + ~TemporaryReplicationSlot() + { + handler->dropReplicationSlot(*tx, /* temporary */true); + } + +private: + PostgreSQLReplicationHandler * handler; + std::shared_ptr tx; +}; + + PostgreSQLReplicationHandler::PostgreSQLReplicationHandler( const String & replication_identifier, const String & postgres_database_, @@ -67,6 +94,7 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler( startup_task = context->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ checkConnectionAndStart(); }); consumer_task = context->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ consumerFunc(); }); + cleanup_task = context->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ cleanupFunc(); }); } @@ -146,6 +174,7 @@ void PostgreSQLReplicationHandler::shutdown() stop_synchronization.store(true); startup_task->deactivate(); consumer_task->deactivate(); + cleanup_task->deactivate(); } @@ -266,6 +295,7 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) (is_materialized_postgresql_database ? postgres_database : postgres_database + '.' + tables_list)); consumer_task->activateAndSchedule(); + cleanup_task->activateAndSchedule(); /// Do not rely anymore on saved storage pointers. materialized_storages.clear(); @@ -276,10 +306,12 @@ ASTPtr PostgreSQLReplicationHandler::getCreateNestedTableQuery(StorageMaterializ { postgres::Connection connection(connection_info); pqxx::nontransaction tx(connection.getRef()); - auto table_structure = std::make_unique(fetchPostgreSQLTableStructure(tx, table_name, postgres_schema, true, true, true)); - if (!table_structure) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to get PostgreSQL table structure"); - return storage->getCreateNestedTableQuery(std::move(table_structure)); + + auto [postgres_table_schema, postgres_table_name] = getSchemaAndTableName(table_name); + auto table_structure = std::make_unique(fetchPostgreSQLTableStructure(tx, postgres_table_name, postgres_table_schema, true, true, true)); + + auto table_override = tryGetTableOverride(current_database_name, table_name); + return storage->getCreateNestedTableQuery(std::move(table_structure), table_override ? table_override->as() : nullptr); } @@ -297,7 +329,8 @@ StoragePtr PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection & query_str = fmt::format("SELECT * FROM {}", quoted_name); LOG_DEBUG(log, "Loading PostgreSQL table {}.{}", postgres_database, quoted_name); - materialized_storage->createNestedIfNeeded(fetchTableStructure(*tx, table_name)); + auto table_override = tryGetTableOverride(current_database_name, table_name); + materialized_storage->createNestedIfNeeded(fetchTableStructure(*tx, table_name), table_override ? table_override->as() : nullptr); auto nested_storage = materialized_storage->getNested(); auto insert = std::make_shared(); @@ -326,6 +359,21 @@ StoragePtr PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection & } +void PostgreSQLReplicationHandler::cleanupFunc() +{ + /// It is very important to make sure temporary replication slots are removed! + /// So just in case every 30 minutes check if one still exists. + postgres::Connection connection(connection_info); + String last_committed_lsn; + connection.execWithRetry([&](pqxx::nontransaction & tx) + { + if (isReplicationSlotExist(tx, last_committed_lsn, /* temporary */true)) + dropReplicationSlot(tx, /* temporary */true); + }); + cleanup_task->scheduleAfter(CLEANUP_RESCHEDULE_MS); +} + + void PostgreSQLReplicationHandler::consumerFunc() { std::vector> skipped_tables; @@ -511,17 +559,25 @@ void PostgreSQLReplicationHandler::dropPublication(pqxx::nontransaction & tx) void PostgreSQLReplicationHandler::addTableToPublication(pqxx::nontransaction & ntx, const String & table_name) { - std::string query_str = fmt::format("ALTER PUBLICATION {} ADD TABLE ONLY {}", publication_name, doubleQuoteString(table_name)); + std::string query_str = fmt::format("ALTER PUBLICATION {} ADD TABLE ONLY {}", publication_name, doubleQuoteWithSchema(table_name)); ntx.exec(query_str); - LOG_TRACE(log, "Added table `{}` to publication `{}`", table_name, publication_name); + LOG_TRACE(log, "Added table {} to publication `{}`", doubleQuoteWithSchema(table_name), publication_name); } void PostgreSQLReplicationHandler::removeTableFromPublication(pqxx::nontransaction & ntx, const String & table_name) { - std::string query_str = fmt::format("ALTER PUBLICATION {} DROP TABLE ONLY {}", publication_name, doubleQuoteString(table_name)); - ntx.exec(query_str); - LOG_TRACE(log, "Removed table `{}` from publication `{}`", table_name, publication_name); + try + { + std::string query_str = fmt::format("ALTER PUBLICATION {} DROP TABLE ONLY {}", publication_name, doubleQuoteWithSchema(table_name)); + ntx.exec(query_str); + LOG_TRACE(log, "Removed table `{}` from publication `{}`", doubleQuoteWithSchema(table_name), publication_name); + } + catch (const pqxx::undefined_table &) + { + /// Removing table from replication must succeed even if table does not exist in PostgreSQL. + LOG_WARNING(log, "Did not remove table {} from publication, because table does not exist in PostgreSQL", doubleQuoteWithSchema(table_name), publication_name); + } } @@ -762,10 +818,12 @@ void PostgreSQLReplicationHandler::addTableToReplication(StorageMaterializedPost StoragePtr nested_storage; { - pqxx::nontransaction tx(replication_connection.getRef()); - if (isReplicationSlotExist(tx, start_lsn, /* temporary */true)) - dropReplicationSlot(tx, /* temporary */true); - createReplicationSlot(tx, start_lsn, snapshot_name, /* temporary */true); + auto tx = std::make_shared(replication_connection.getRef()); + + if (isReplicationSlotExist(*tx, start_lsn, /* temporary */true)) + dropReplicationSlot(*tx, /* temporary */true); + + TemporaryReplicationSlot temporary_slot(this, tx, start_lsn, snapshot_name); /// Protect against deadlock. auto nested = DatabaseCatalog::instance().tryGetTable(materialized_storage->getNestedStorageID(), materialized_storage->getNestedTableContext()); @@ -836,81 +894,81 @@ void PostgreSQLReplicationHandler::reloadFromSnapshot(const std::vector(replication_connection.getRef()); - String snapshot_name, start_lsn; - - if (isReplicationSlotExist(tx, start_lsn, /* temporary */true)) - dropReplicationSlot(tx, /* temporary */true); - - createReplicationSlot(tx, start_lsn, snapshot_name, /* temporary */true); - postgres::Connection tmp_connection(connection_info); - - for (const auto & [relation_id, table_name] : relation_data) { - auto storage = DatabaseCatalog::instance().getTable(StorageID(current_database_name, table_name), context); - auto * materialized_storage = storage->as (); - auto materialized_table_lock = materialized_storage->lockForShare(String(), context->getSettingsRef().lock_acquire_timeout); + String snapshot_name, start_lsn; + if (isReplicationSlotExist(*tx, start_lsn, /* temporary */true)) + dropReplicationSlot(*tx, /* temporary */true); - /// If for some reason this temporary table already exists - also drop it. - auto temp_materialized_storage = materialized_storage->createTemporary(); + TemporaryReplicationSlot temporary_slot(this, tx, start_lsn, snapshot_name); + postgres::Connection tmp_connection(connection_info); - /// This snapshot is valid up to the end of the transaction, which exported it. - StoragePtr temp_nested_storage = loadFromSnapshot(tmp_connection, snapshot_name, table_name, - temp_materialized_storage->as ()); - - auto table_id = materialized_storage->getNestedStorageID(); - auto temp_table_id = temp_nested_storage->getStorageID(); - - LOG_DEBUG(log, "Starting background update of table {} ({} with {})", - table_name, table_id.getNameForLogs(), temp_table_id.getNameForLogs()); - - auto ast_rename = std::make_shared(); - ASTRenameQuery::Element elem + for (const auto & [relation_id, table_name] : relation_data) { - ASTRenameQuery::Table{table_id.database_name, table_id.table_name}, - ASTRenameQuery::Table{temp_table_id.database_name, temp_table_id.table_name} - }; - ast_rename->elements.push_back(std::move(elem)); - ast_rename->exchange = true; + auto storage = DatabaseCatalog::instance().getTable(StorageID(current_database_name, table_name), context); + auto * materialized_storage = storage->as (); + auto materialized_table_lock = materialized_storage->lockForShare(String(), context->getSettingsRef().lock_acquire_timeout); - auto nested_context = materialized_storage->getNestedTableContext(); + /// If for some reason this temporary table already exists - also drop it. + auto temp_materialized_storage = materialized_storage->createTemporary(); - try - { - InterpreterRenameQuery(ast_rename, nested_context).execute(); + /// This snapshot is valid up to the end of the transaction, which exported it. + StoragePtr temp_nested_storage = loadFromSnapshot(tmp_connection, snapshot_name, table_name, + temp_materialized_storage->as ()); - auto nested_storage = DatabaseCatalog::instance().getTable(StorageID(table_id.database_name, table_id.table_name, temp_table_id.uuid), nested_context); - materialized_storage->set(nested_storage); + auto table_id = materialized_storage->getNestedStorageID(); + auto temp_table_id = temp_nested_storage->getStorageID(); - auto nested_sample_block = nested_storage->getInMemoryMetadataPtr()->getSampleBlock(); - auto materialized_sample_block = materialized_storage->getInMemoryMetadataPtr()->getSampleBlock(); - assertBlocksHaveEqualStructure(nested_sample_block, materialized_sample_block, "while reloading table in the background"); + LOG_DEBUG(log, "Starting background update of table {} ({} with {})", + table_name, table_id.getNameForLogs(), temp_table_id.getNameForLogs()); - LOG_INFO(log, "Updated table {}. New structure: {}", - nested_storage->getStorageID().getNameForLogs(), nested_sample_block.dumpStructure()); + auto ast_rename = std::make_shared(); + ASTRenameQuery::Element elem + { + ASTRenameQuery::Table{table_id.database_name, table_id.table_name}, + ASTRenameQuery::Table{temp_table_id.database_name, temp_table_id.table_name} + }; + ast_rename->elements.push_back(std::move(elem)); + ast_rename->exchange = true; - /// Pass pointer to new nested table into replication consumer, remove current table from skip list and set start lsn position. - consumer->updateNested(table_name, nested_storage, relation_id, start_lsn); + auto nested_context = materialized_storage->getNestedTableContext(); - auto table_to_drop = DatabaseCatalog::instance().getTable(StorageID(temp_table_id.database_name, temp_table_id.table_name, table_id.uuid), nested_context); - auto drop_table_id = table_to_drop->getStorageID(); + try + { + InterpreterRenameQuery(ast_rename, nested_context).execute(); - if (drop_table_id == nested_storage->getStorageID()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Cannot drop table because is has the same uuid as new table: {}", drop_table_id.getNameForLogs()); + auto nested_storage = DatabaseCatalog::instance().getTable(StorageID(table_id.database_name, table_id.table_name, temp_table_id.uuid), nested_context); + materialized_storage->set(nested_storage); - LOG_DEBUG(log, "Dropping table {}", drop_table_id.getNameForLogs()); - InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, nested_context, nested_context, drop_table_id, true); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); + auto nested_sample_block = nested_storage->getInMemoryMetadataPtr()->getSampleBlock(); + auto materialized_sample_block = materialized_storage->getInMemoryMetadataPtr()->getSampleBlock(); + assertBlocksHaveEqualStructure(nested_sample_block, materialized_sample_block, "while reloading table in the background"); + + LOG_INFO(log, "Updated table {}. New structure: {}", + nested_storage->getStorageID().getNameForLogs(), nested_sample_block.dumpStructure()); + + /// Pass pointer to new nested table into replication consumer, remove current table from skip list and set start lsn position. + consumer->updateNested(table_name, nested_storage, relation_id, start_lsn); + + auto table_to_drop = DatabaseCatalog::instance().getTable(StorageID(temp_table_id.database_name, temp_table_id.table_name, table_id.uuid), nested_context); + auto drop_table_id = table_to_drop->getStorageID(); + + if (drop_table_id == nested_storage->getStorageID()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot drop table because is has the same uuid as new table: {}", drop_table_id.getNameForLogs()); + + LOG_DEBUG(log, "Dropping table {}", drop_table_id.getNameForLogs()); + InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, nested_context, nested_context, drop_table_id, true); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } } } - dropReplicationSlot(tx, /* temporary */true); - tx.commit(); + tx->commit(); } catch (...) { diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h index cf44101db76..c0a2a6f2559 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h @@ -15,6 +15,8 @@ struct SettingChange; class PostgreSQLReplicationHandler { +friend class TemporaryReplicationSlot; + public: PostgreSQLReplicationHandler( const String & replication_identifier, @@ -52,6 +54,8 @@ public: void setSetting(const SettingChange & setting); + void cleanupFunc(); + private: using MaterializedStorages = std::unordered_map; @@ -133,7 +137,9 @@ private: /// Replication consumer. Manages decoding of replication stream and syncing into tables. std::shared_ptr consumer; - BackgroundSchedulePool::TaskHolder startup_task, consumer_task; + BackgroundSchedulePool::TaskHolder startup_task; + BackgroundSchedulePool::TaskHolder consumer_task; + BackgroundSchedulePool::TaskHolder cleanup_task; std::atomic stop_synchronization = false; diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 591e10a88b9..aefd1aedbf7 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -2,28 +2,38 @@ #if USE_LIBPQXX #include + #include #include #include + #include +#include + #include #include #include #include +#include + #include #include + #include #include #include #include -#include + +#include #include #include #include + #include #include #include -#include + +#include namespace DB @@ -181,18 +191,18 @@ StorageID StorageMaterializedPostgreSQL::getNestedStorageID() const } -void StorageMaterializedPostgreSQL::createNestedIfNeeded(PostgreSQLTableStructurePtr table_structure) +void StorageMaterializedPostgreSQL::createNestedIfNeeded(PostgreSQLTableStructurePtr table_structure, const ASTTableOverride * table_override) { if (tryGetNested()) return; - const auto ast_create = getCreateNestedTableQuery(std::move(table_structure)); - auto table_id = getStorageID(); - auto tmp_nested_table_id = StorageID(table_id.database_name, getNestedTableName()); - LOG_DEBUG(log, "Creating clickhouse table for postgresql table {}", table_id.getNameForLogs()); - try { + const auto ast_create = getCreateNestedTableQuery(std::move(table_structure), table_override); + auto table_id = getStorageID(); + auto tmp_nested_table_id = StorageID(table_id.database_name, getNestedTableName()); + LOG_DEBUG(log, "Creating clickhouse table for postgresql table {}", table_id.getNameForLogs()); + InterpreterCreateQuery interpreter(ast_create, nested_context); interpreter.execute(); @@ -200,10 +210,10 @@ void StorageMaterializedPostgreSQL::createNestedIfNeeded(PostgreSQLTableStructur /// Save storage_id with correct uuid. nested_table_id = nested_storage->getStorageID(); } - catch (Exception & e) + catch (...) { - e.addMessage("while creating nested table: {}", tmp_nested_table_id.getNameForLogs()); tryLogCurrentException(__PRETTY_FUNCTION__); + throw; } } @@ -362,12 +372,31 @@ ASTPtr StorageMaterializedPostgreSQL::getColumnDeclaration(const DataTypePtr & d } +std::shared_ptr StorageMaterializedPostgreSQL::getColumnsExpressionList(const NamesAndTypesList & columns) const +{ + auto columns_expression_list = std::make_shared(); + for (const auto & [name, type] : columns) + { + const auto & column_declaration = std::make_shared(); + + column_declaration->name = name; + column_declaration->type = getColumnDeclaration(type); + + columns_expression_list->children.emplace_back(column_declaration); + } + return columns_expression_list; +} + + /// For single storage MaterializedPostgreSQL get columns and primary key columns from storage definition. /// For database engine MaterializedPostgreSQL get columns and primary key columns by fetching from PostgreSQL, also using the same /// transaction with snapshot, which is used for initial tables dump. -ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery(PostgreSQLTableStructurePtr table_structure) +ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery( + PostgreSQLTableStructurePtr table_structure, const ASTTableOverride * table_override) { auto create_table_query = std::make_shared(); + if (table_override) + applyTableOverrideToCreateQuery(*table_override, create_table_query.get()); auto table_id = getStorageID(); create_table_query->setTable(getNestedTableName()); @@ -375,40 +404,85 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery(PostgreSQLTableS if (is_materialized_postgresql_database) create_table_query->uuid = table_id.uuid; + auto storage = std::make_shared(); + storage->set(storage->engine, makeASTFunction("ReplacingMergeTree", std::make_shared("_version"))); + auto columns_declare_list = std::make_shared(); - auto columns_expression_list = std::make_shared(); auto order_by_expression = std::make_shared(); auto metadata_snapshot = getInMemoryMetadataPtr(); - const auto & columns = metadata_snapshot->getColumns(); + + ConstraintsDescription constraints; NamesAndTypesList ordinary_columns_and_types; - if (!is_materialized_postgresql_database) + if (is_materialized_postgresql_database) { - ordinary_columns_and_types = columns.getOrdinary(); - } - else - { - if (!table_structure) + if (!table_structure && !table_override) { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "No table structure returned for table {}.{}", table_id.database_name, table_id.table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No table structure returned for table {}.{}", + table_id.database_name, table_id.table_name); } - if (!table_structure->columns) + if (!table_structure->columns && (!table_override || !table_override->columns)) { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "No columns returned for table {}.{}", table_id.database_name, table_id.table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No columns returned for table {}.{}", + table_id.database_name, table_id.table_name); } - ordinary_columns_and_types = *table_structure->columns; + bool has_order_by_override = table_override && table_override->storage && table_override->storage->order_by; + if (has_order_by_override && !table_structure->replica_identity_columns) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Having PRIMARY KEY OVERRIDE is allowed only if there is " + "replica identity index for PostgreSQL table. (table {}.{})", + table_id.database_name, table_id.table_name); + } if (!table_structure->primary_key_columns && !table_structure->replica_identity_columns) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Table {}.{} has no primary key and no replica identity index", table_id.database_name, table_id.table_name); + "Table {}.{} has no primary key and no replica identity index", + table_id.database_name, table_id.table_name); } + if (table_override && table_override->columns) + { + if (table_override->columns) + { + auto children = table_override->columns->children; + const auto & columns = children[0]->as(); + if (columns) + { + for (const auto & child : columns->children) + { + const auto * column_declaration = child->as(); + auto type = DataTypeFactory::instance().get(column_declaration->type); + ordinary_columns_and_types.emplace_back(NameAndTypePair(column_declaration->name, type)); + } + } + + columns_declare_list->set(columns_declare_list->columns, children[0]); + } + else + { + ordinary_columns_and_types = *table_structure->columns; + columns_declare_list->set(columns_declare_list->columns, getColumnsExpressionList(ordinary_columns_and_types)); + } + + auto * columns = table_override->columns; + if (columns && columns->constraints) + constraints = ConstraintsDescription(columns->constraints->children); + } + else + { + ordinary_columns_and_types = *table_structure->columns; + columns_declare_list->set(columns_declare_list->columns, getColumnsExpressionList(ordinary_columns_and_types)); + } + + if (ordinary_columns_and_types.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Table {}.{} has no columns", table_id.database_name, table_id.table_name); + NamesAndTypesList merging_columns; if (table_structure->primary_key_columns) merging_columns = *table_structure->primary_key_columns; @@ -417,39 +491,28 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery(PostgreSQLTableS order_by_expression->name = "tuple"; order_by_expression->arguments = std::make_shared(); - for (const auto & column : merging_columns) order_by_expression->arguments->children.emplace_back(std::make_shared(column.name)); - } - for (const auto & [name, type] : ordinary_columns_and_types) + storage->set(storage->order_by, order_by_expression); + } + else { - const auto & column_declaration = std::make_shared(); + ordinary_columns_and_types = metadata_snapshot->getColumns().getOrdinary(); + columns_declare_list->set(columns_declare_list->columns, getColumnsExpressionList(ordinary_columns_and_types)); - column_declaration->name = name; - column_declaration->type = getColumnDeclaration(type); + auto primary_key_ast = metadata_snapshot->getPrimaryKeyAST(); + if (!primary_key_ast) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage MaterializedPostgreSQL must have primary key"); + storage->set(storage->order_by, primary_key_ast); - columns_expression_list->children.emplace_back(column_declaration); + constraints = metadata_snapshot->getConstraints(); } - columns_declare_list->set(columns_declare_list->columns, columns_expression_list); - columns_declare_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_sign", "Int8", 1)); columns_declare_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_version", "UInt64", 1)); - create_table_query->set(create_table_query->columns_list, columns_declare_list); - /// Not nullptr for single storage (because throws exception if not specified), nullptr otherwise. - auto primary_key_ast = getInMemoryMetadataPtr()->getPrimaryKeyAST(); - - auto storage = std::make_shared(); - storage->set(storage->engine, makeASTFunction("ReplacingMergeTree", std::make_shared("_version"))); - - if (primary_key_ast) - storage->set(storage->order_by, primary_key_ast); - else - storage->set(storage->order_by, order_by_expression); - create_table_query->set(create_table_query->storage, storage); /// Add columns _sign and _version, so that they can be accessed from nested ReplacingMergeTree table if needed. @@ -458,8 +521,7 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery(PostgreSQLTableS StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(ColumnsDescription(ordinary_columns_and_types)); - storage_metadata.setConstraints(metadata_snapshot->getConstraints()); - + storage_metadata.setConstraints(constraints); setInMemoryMetadata(storage_metadata); return create_table_query; diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index 10724fb9bf0..9e11f314738 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -99,7 +99,11 @@ public: /// only once - when nested table is successfully created and is never changed afterwards. bool hasNested() { return has_nested.load(); } - void createNestedIfNeeded(PostgreSQLTableStructurePtr table_structure); + void createNestedIfNeeded(PostgreSQLTableStructurePtr table_structure, const ASTTableOverride * table_override); + + ASTPtr getCreateNestedTableQuery(PostgreSQLTableStructurePtr table_structure, const ASTTableOverride * table_override); + + std::shared_ptr getColumnsExpressionList(const NamesAndTypesList & columns) const; StoragePtr getNested() const; @@ -120,8 +124,6 @@ public: bool supportsFinal() const override { return true; } - ASTPtr getCreateNestedTableQuery(PostgreSQLTableStructurePtr table_structure); - protected: StorageMaterializedPostgreSQL( const StorageID & table_id_, diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index f1a0372a07d..791583e2495 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -201,7 +201,7 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( select_expression_list->children.push_back(makeASTFunction("count")); select_query->setExpression(ASTProjectionSelectQuery::Expression::SELECT, std::move(select_expression_list)); - if (partition_columns) + if (partition_columns && !partition_columns->children.empty()) select_query->setExpression(ASTProjectionSelectQuery::Expression::GROUP_BY, partition_columns->clone()); result.definition_ast = select_query; @@ -211,7 +211,9 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( auto external_storage_holder = std::make_shared(query_context, columns, ConstraintsDescription{}); StoragePtr storage = external_storage_holder->getTable(); InterpreterSelectQuery select( - result.query_ast, query_context, storage, {}, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias()); + result.query_ast, query_context, storage, {}, + /// Here we ignore ast optimizations because otherwise aggregation keys may be removed from result header as constants. + SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias().ignoreASTOptimizationsAlias()); result.required_columns = select.getRequiredColumns(); result.sample_block = select.getSampleBlock(); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index 1c918c15775..b42f2214d88 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -38,7 +38,7 @@ void EmbeddedRocksDBSink::consume(Chunk chunk) rocksdb::WriteBatch batch; rocksdb::Status status; - for (size_t i = 0; i < rows; i++) + for (size_t i = 0; i < rows; ++i) { wb_key.restart(); wb_value.restart(); diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 6417aa9f72c..87a8ea2315d 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -455,10 +455,8 @@ static void appendBlock(const Block & from, Block & to) size_t rows = from.rows(); size_t bytes = from.bytes(); - CurrentMetrics::add(CurrentMetrics::StorageBufferRows, rows); - CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, bytes); - size_t old_rows = to.rows(); + size_t old_bytes = to.bytes(); MutableColumnPtr last_col; try @@ -468,6 +466,8 @@ static void appendBlock(const Block & from, Block & to) if (to.rows() == 0) { to = from; + CurrentMetrics::add(CurrentMetrics::StorageBufferRows, rows); + CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, bytes); } else { @@ -480,6 +480,8 @@ static void appendBlock(const Block & from, Block & to) to.getByPosition(column_no).column = std::move(last_col); } + CurrentMetrics::add(CurrentMetrics::StorageBufferRows, rows); + CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, to.bytes() - old_bytes); } } catch (...) @@ -1108,7 +1110,7 @@ void registerStorageBuffer(StorageFactory & factory) // After we evaluated all expressions, check that all arguments are // literals. - for (size_t i = 0; i < engine_args.size(); i++) + for (size_t i = 0; i < engine_args.size(); ++i) { if (!typeid_cast(engine_args[i].get())) { diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index f6b330fe3df..da8c5f115b2 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB @@ -21,6 +22,7 @@ namespace ErrorCodes extern const int THERE_IS_NO_COLUMN; extern const int CANNOT_DETACH_DICTIONARY_AS_TABLE; extern const int DICTIONARY_ALREADY_EXISTS; + extern const int NOT_IMPLEMENTED; } namespace @@ -111,10 +113,11 @@ StorageDictionary::StorageDictionary( const StorageID & table_id_, const String & dictionary_name_, const DictionaryStructure & dictionary_structure_, + const String & comment, Location location_, ContextPtr context_) : StorageDictionary( - table_id_, dictionary_name_, ColumnsDescription{getNamesAndTypes(dictionary_structure_)}, String{}, location_, context_) + table_id_, dictionary_name_, ColumnsDescription{getNamesAndTypes(dictionary_structure_)}, comment, location_, context_) { } @@ -126,6 +129,7 @@ StorageDictionary::StorageDictionary( table_id, table_id.getFullNameNotQuoted(), context_->getExternalDictionariesLoader().getDictionaryStructure(*dictionary_configuration), + dictionary_configuration->getString("dictionary.comment", ""), Location::SameDatabaseAndNameAsDictionary, context_) { @@ -230,7 +234,7 @@ void StorageDictionary::renameInMemory(const StorageID & new_table_id) if (move_to_atomic) configuration->setString("dictionary.uuid", toString(new_table_id.uuid)); else if (move_to_ordinary) - configuration->remove("dictionary.uuid"); + configuration->remove("dictionary.uuid"); } /// Dictionary is moving between databases of different engines or is renaming inside Ordinary database @@ -260,6 +264,40 @@ void StorageDictionary::renameInMemory(const StorageID & new_table_id) } } +void StorageDictionary::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /* context */) const +{ + for (const auto & command : commands) + { + if (location == Location::DictionaryDatabase || command.type != AlterCommand::COMMENT_TABLE) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}", + command.type, getName()); + } +} + +void StorageDictionary::alter(const AlterCommands & params, ContextPtr alter_context, AlterLockHolder & lock_holder) +{ + IStorage::alter(params, alter_context, lock_holder); + + if (location == Location::Custom) + return; + + auto new_comment = getInMemoryMetadataPtr()->comment; + + auto storage_id = getStorageID(); + const auto & external_dictionaries_loader = getContext()->getExternalDictionariesLoader(); + auto result = external_dictionaries_loader.getLoadResult(storage_id.getInternalDictionaryName()); + + if (result.object) + { + auto dictionary = std::static_pointer_cast(result.object); + auto * dictionary_non_const = const_cast(dictionary.get()); + dictionary_non_const->setDictionaryComment(new_comment); + } + + std::lock_guard lock(dictionary_config_mutex); + configuration->setString("dictionary.comment", std::move(new_comment)); +} + void registerStorageDictionary(StorageFactory & factory) { factory.registerStorage("Dictionary", [](const StorageFactory::Arguments & args) diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index 7d0af8c0ee3..855d02b0947 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -42,6 +42,10 @@ public: void renameInMemory(const StorageID & new_table_id) override; + void checkAlterIsPossible(const AlterCommands & commands, ContextPtr /* context */) const override; + + void alter(const AlterCommands & params, ContextPtr alter_context, AlterLockHolder &) override; + Poco::Timestamp getUpdateTime() const; LoadablesConfigurationPtr getConfiguration() const; @@ -89,6 +93,7 @@ private: const StorageID & table_id_, const String & dictionary_name_, const DictionaryStructure & dictionary_structure, + const String & comment, Location location_, ContextPtr context_); diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 51ecfc1e884..21143438725 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -2,6 +2,8 @@ #include +#include + #include #include @@ -16,13 +18,12 @@ #include #include #include +#include #include #include #include #include -#include - namespace DB { @@ -30,80 +31,78 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; - extern const int LOGICAL_ERROR; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int TIMEOUT_EXCEEDED; } -StorageExecutable::StorageExecutable( - const StorageID & table_id_, - const String & script_name_, - const std::vector & arguments_, - const String & format_, - const std::vector & input_queries_, - const ColumnsDescription & columns, - const ConstraintsDescription & constraints) - : IStorage(table_id_) - , script_name(script_name_) - , arguments(arguments_) - , format(format_) - , input_queries(input_queries_) - , log(&Poco::Logger::get("StorageExecutable")) +namespace { - StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(columns); - storage_metadata.setConstraints(constraints); - setInMemoryMetadata(storage_metadata); + void transformToSingleBlockSources(Pipes & inputs) + { + size_t inputs_size = inputs.size(); + for (size_t i = 0; i < inputs_size; ++i) + { + auto && input = inputs[i]; + QueryPipeline input_pipeline(std::move(input)); + PullingPipelineExecutor input_pipeline_executor(input_pipeline); + + auto header = input_pipeline_executor.getHeader(); + auto result_block = header.cloneEmpty(); + + size_t result_block_columns = result_block.columns(); + + Block result; + while (input_pipeline_executor.pull(result)) + { + for (size_t result_block_index = 0; result_block_index < result_block_columns; ++result_block_index) + { + auto & block_column = result.safeGetByPosition(result_block_index); + auto & result_block_column = result_block.safeGetByPosition(result_block_index); + + result_block_column.column->assumeMutable()->insertRangeFrom(*block_column.column, 0, block_column.column->size()); + } + } + + auto source = std::make_shared(std::move(result_block)); + inputs[i] = Pipe(std::move(source)); + } + } } StorageExecutable::StorageExecutable( const StorageID & table_id_, - const String & script_name_, - const std::vector & arguments_, - const String & format_, - const std::vector & input_queries_, + const String & format, const ExecutableSettings & settings_, + const std::vector & input_queries_, const ColumnsDescription & columns, const ConstraintsDescription & constraints) : IStorage(table_id_) - , script_name(script_name_) - , arguments(arguments_) - , format(format_) - , input_queries(input_queries_) , settings(settings_) - /// If pool size == 0 then there is no size restrictions. Poco max size of semaphore is integer type. - , process_pool(std::make_shared(settings.pool_size == 0 ? std::numeric_limits::max() : settings.pool_size)) - , log(&Poco::Logger::get("StorageExecutablePool")) + , input_queries(input_queries_) + , log(settings.is_executable_pool ? &Poco::Logger::get("StorageExecutablePool") : &Poco::Logger::get("StorageExecutable")) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns); storage_metadata.setConstraints(constraints); setInMemoryMetadata(storage_metadata); + + ShellCommandSourceCoordinator::Configuration configuration + { + .format = format, + .command_termination_timeout_seconds = settings.command_termination_timeout, + .command_read_timeout_milliseconds = settings.command_read_timeout, + .command_write_timeout_milliseconds = settings.command_write_timeout, + + .pool_size = settings.pool_size, + .max_command_execution_time_seconds = settings.max_command_execution_time, + + .is_executable_pool = settings.is_executable_pool, + .send_chunk_header = settings.send_chunk_header, + .execute_direct = true + }; + + coordinator = std::make_unique(std::move(configuration)); } -class SendingChunkHeaderTransform final : public ISimpleTransform -{ -public: - SendingChunkHeaderTransform(const Block & header, WriteBuffer & buffer_) - : ISimpleTransform(header, header, false) - , buffer(buffer_) - { - } - - String getName() const override { return "SendingChunkHeaderTransform"; } - -protected: - - void transform(Chunk & chunk) override - { - writeText(chunk.getNumRows(), buffer); - writeChar('\n', buffer); - } - -private: - WriteBuffer & buffer; -}; - Pipe StorageExecutable::read( const Names & /*column_names*/, const StorageMetadataPtr & metadata_snapshot, @@ -113,10 +112,12 @@ Pipe StorageExecutable::read( size_t max_block_size, unsigned /*threads*/) { + auto & script_name = settings.script_name; + auto user_scripts_path = context->getUserScriptsPath(); auto script_path = user_scripts_path + '/' + script_name; - if (!pathStartsWith(script_path, user_scripts_path)) + if (!fileOrSymlinkPathStartsWith(script_path, user_scripts_path)) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Executable file {} must be inside user scripts folder {}", script_name, @@ -128,101 +129,31 @@ Pipe StorageExecutable::read( script_name, user_scripts_path); - std::vector inputs; + Pipes inputs; inputs.reserve(input_queries.size()); for (auto & input_query : input_queries) { InterpreterSelectWithUnionQuery interpreter(input_query, context, {}); - inputs.emplace_back(interpreter.buildQueryPipeline()); + inputs.emplace_back(QueryPipelineBuilder::getPipe(interpreter.buildQueryPipeline())); } - ShellCommand::Config config(script_path); - config.arguments = arguments; - for (size_t i = 1; i < inputs.size(); ++i) - config.write_fds.emplace_back(i + 2); - - std::unique_ptr process; - - bool is_executable_pool = (process_pool != nullptr); - if (is_executable_pool) - { - bool result = process_pool->tryBorrowObject(process, [&config, this]() - { - config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, settings.command_termination_timeout }; - auto shell_command = ShellCommand::executeDirect(config); - return shell_command; - }, settings.max_command_execution_time * 10000); - - if (!result) - throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, - "Could not get process from pool, max command execution timeout exceeded {} seconds", - settings.max_command_execution_time); - } - else - { - process = ShellCommand::executeDirect(config); - } - - std::vector tasks; - tasks.reserve(inputs.size()); - - for (size_t i = 0; i < inputs.size(); ++i) - { - WriteBufferFromFile * write_buffer = nullptr; - - if (i == 0) - { - write_buffer = &process->in; - } - else - { - auto descriptor = i + 2; - auto it = process->write_fds.find(descriptor); - if (it == process->write_fds.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Process does not contain descriptor to write {}", descriptor); - - write_buffer = &it->second; - } - - inputs[i].resize(1); - if (settings.send_chunk_header) - { - auto transform = std::make_shared(inputs[i].getHeader(), *write_buffer); - inputs[i].addTransform(std::move(transform)); - } - - auto pipeline = std::make_shared(QueryPipelineBuilder::getPipeline(std::move(inputs[i]))); - - auto out = context->getOutputFormat(format, *write_buffer, materializeBlock(pipeline->getHeader())); - out->setAutoFlush(); - pipeline->complete(std::move(out)); - - ShellCommandSource::SendDataTask task = [pipeline, write_buffer, is_executable_pool]() - { - CompletedPipelineExecutor executor(*pipeline); - executor.execute(); - - if (!is_executable_pool) - write_buffer->close(); - }; - - tasks.emplace_back(std::move(task)); - } + /// For executable pool we read data from input streams and convert it to single blocks streams. + if (settings.is_executable_pool) + transformToSingleBlockSources(inputs); auto sample_block = metadata_snapshot->getSampleBlock(); ShellCommandSourceConfiguration configuration; configuration.max_block_size = max_block_size; - if (is_executable_pool) + if (settings.is_executable_pool) { configuration.read_fixed_number_of_rows = true; configuration.read_number_of_rows_from_process_output = true; } - Pipe pipe(std::make_unique(context, format, std::move(sample_block), std::move(process), std::move(tasks), configuration, process_pool)); - return pipe; + return coordinator->createPipe(script_path, settings.script_arguments, std::move(inputs), std::move(sample_block), context, configuration); } void registerStorageExecutable(StorageFactory & factory) @@ -262,6 +193,11 @@ void registerStorageExecutable(StorageFactory & factory) const auto & columns = args.columns; const auto & constraints = args.constraints; + ExecutableSettings settings; + settings.script_name = script_name; + settings.script_arguments = script_name_with_arguments; + settings.is_executable_pool = is_executable_pool; + if (is_executable_pool) { size_t max_command_execution_time = 10; @@ -270,28 +206,28 @@ void registerStorageExecutable(StorageFactory & factory) if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds) max_command_execution_time = max_execution_time_seconds; - ExecutableSettings pool_settings; - pool_settings.max_command_execution_time = max_command_execution_time; - if (args.storage_def->settings) - pool_settings.loadFromQuery(*args.storage_def); + settings.max_command_execution_time = max_command_execution_time; + } - return StorageExecutable::create(args.table_id, script_name, script_name_with_arguments, format, input_queries, pool_settings, columns, constraints); - } - else - { - return StorageExecutable::create(args.table_id, script_name, script_name_with_arguments, format, input_queries, columns, constraints); - } + if (args.storage_def->settings) + settings.loadFromQuery(*args.storage_def); + + auto global_context = args.getContext()->getGlobalContext(); + return StorageExecutable::create(args.table_id, format, settings, input_queries, columns, constraints); }; + StorageFactory::StorageFeatures storage_features; + storage_features.supports_settings = true; + factory.registerStorage("Executable", [&](const StorageFactory::Arguments & args) { return register_storage(args, false /*is_executable_pool*/); - }); + }, storage_features); factory.registerStorage("ExecutablePool", [&](const StorageFactory::Arguments & args) { return register_storage(args, true /*is_executable_pool*/); - }); + }, storage_features); } }; diff --git a/src/Storages/StorageExecutable.h b/src/Storages/StorageExecutable.h index 74df17f1463..b6248abae97 100644 --- a/src/Storages/StorageExecutable.h +++ b/src/Storages/StorageExecutable.h @@ -23,7 +23,7 @@ public: String getName() const override { - if (process_pool) + if (settings.is_executable_pool) return "ExecutablePool"; else return "Executable"; @@ -42,31 +42,17 @@ protected: StorageExecutable( const StorageID & table_id, - const String & script_name_, - const std::vector & arguments_, - const String & format_, - const std::vector & input_queries_, - const ColumnsDescription & columns, - const ConstraintsDescription & constraints); - - StorageExecutable( - const StorageID & table_id, - const String & script_name_, - const std::vector & arguments_, - const String & format_, - const std::vector & input_queries_, - const ExecutableSettings & settings_, + const String & format, + const ExecutableSettings & settings, + const std::vector & input_queries, const ColumnsDescription & columns, const ConstraintsDescription & constraints); private: - String script_name; - std::vector arguments; - String format; - std::vector input_queries; ExecutableSettings settings; - std::shared_ptr process_pool; + std::vector input_queries; Poco::Logger * log; + std::unique_ptr coordinator; }; } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index f83ab0c3f9e..03ac27d0e46 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -508,6 +508,9 @@ void StorageMergeTree::waitForMutation(Int64 version, const String & file_name) void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr query_context) { + /// Validate partition IDs (if any) before starting mutation + getPartitionIdsAffectedByCommands(commands, query_context); + String mutation_file_name; Int64 version = startMutation(commands, mutation_file_name); @@ -898,6 +901,7 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( auto commands = MutationCommands::create(); size_t current_ast_elements = 0; + auto last_mutation_to_apply = mutations_end_it; for (auto it = mutations_begin_it; it != mutations_end_it; ++it) { size_t commands_size = 0; @@ -934,7 +938,8 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( MergeTreeMutationEntry & entry = it->second; entry.latest_fail_time = time(nullptr); entry.latest_fail_reason = getCurrentExceptionMessage(false); - continue; + /// NOTE we should not skip mutations, because exception may be retryable (e.g. MEMORY_LIMIT_EXCEEDED) + break; } } @@ -943,8 +948,10 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( current_ast_elements += commands_size; commands->insert(commands->end(), it->second.commands.begin(), it->second.commands.end()); + last_mutation_to_apply = it; } + assert(commands->empty() == (last_mutation_to_apply == mutations_end_it)); if (!commands->empty()) { bool is_partition_affected = false; @@ -969,13 +976,13 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( /// Shall not create a new part, but will do that later if mutation with higher version appear. /// This is needed in order to not produce excessive mutations of non-related parts. auto block_range = std::make_pair(part->info.min_block, part->info.max_block); - updated_version_by_block_range[block_range] = current_mutations_by_version.rbegin()->first; + updated_version_by_block_range[block_range] = last_mutation_to_apply->first; were_some_mutations_for_some_parts_skipped = true; continue; } auto new_part_info = part->info; - new_part_info.mutation = current_mutations_by_version.rbegin()->first; + new_part_info.mutation = last_mutation_to_apply->first; future_part->parts.push_back(part); future_part->part_info = new_part_info; diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 0b6095e033b..66adf3ae272 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -306,13 +307,7 @@ void registerStorageMySQL(StorageFactory & factory) if (!mysql_settings.connection_pool_size) throw Exception("connection_pool_size cannot be zero.", ErrorCodes::BAD_ARGUMENTS); - mysqlxx::PoolWithFailover pool( - configuration.database, configuration.addresses, - configuration.username, configuration.password, - MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, - mysql_settings.connection_pool_size, - mysql_settings.connection_max_tries, - mysql_settings.connection_wait_timeout); + mysqlxx::PoolWithFailover pool = createMySQLPoolWithFailover(configuration, mysql_settings); return StorageMySQL::create( args.table_id, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ca877d8a72d..a1f82e14868 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4532,28 +4532,6 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer } -std::set StorageReplicatedMergeTree::getPartitionIdsAffectedByCommands( - const MutationCommands & commands, ContextPtr query_context) const -{ - std::set affected_partition_ids; - - for (const auto & command : commands) - { - if (!command.partition) - { - affected_partition_ids.clear(); - break; - } - - affected_partition_ids.insert( - getPartitionIDFromQuery(command.partition, query_context) - ); - } - - return affected_partition_ids; -} - - PartitionBlockNumbersHolder StorageReplicatedMergeTree::allocateBlockNumbersInAffectedPartitions( const MutationCommands & commands, ContextPtr query_context, const zkutil::ZooKeeperPtr & zookeeper) const { @@ -7394,7 +7372,6 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP new_data_part->minmax_idx = std::move(minmax_idx); new_data_part->is_temp = true; - SyncGuardPtr sync_guard; if (new_data_part->isStoredOnDisk()) { @@ -7419,7 +7396,9 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP auto compression_codec = getContext()->chooseCompressionCodec(0, 0); const auto & index_factory = MergeTreeIndexFactory::instance(); - MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec); + MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, + index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec); + bool sync_on_insert = settings->fsync_after_insert; out.write(block); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index bcd364df30e..6861d89f070 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -717,7 +717,6 @@ private: std::unique_ptr getDefaultSettings() const override; - std::set getPartitionIdsAffectedByCommands(const MutationCommands & commands, ContextPtr query_context) const; PartitionBlockNumbersHolder allocateBlockNumbersInAffectedPartitions( const MutationCommands & commands, ContextPtr query_context, const zkutil::ZooKeeperPtr & zookeeper) const; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 3f08dee62b6..3a03ac3906c 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -10,7 +10,6 @@ #include -#include #include #include #include @@ -25,7 +24,6 @@ #include #include -#include #include #include @@ -54,13 +52,10 @@ #include #include #include -#include #include namespace fs = std::filesystem; -#include - static const String PARTITION_ID_WILDCARD = "{_partition_id}"; @@ -74,6 +69,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int S3_ERROR; extern const int UNEXPECTED_EXPRESSION; + extern const int CANNOT_OPEN_FILE; } class IOutputFormat; @@ -226,6 +222,13 @@ StorageS3Source::StorageS3Source( } +void StorageS3Source::onCancel() +{ + if (reader) + reader->cancel(); +} + + bool StorageS3Source::initialize() { String current_key = (*file_iterator)(); @@ -312,6 +315,9 @@ public: , sample_block(sample_block_) , format_settings(format_settings_) { + if (key.find_first_of("*?{") != std::string::npos) + throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "S3 key '{}' contains globs, so the table is in readonly mode", key); + write_buf = wrapWriteBufferWithCompressionMethod( std::make_unique(client, bucket, key, min_upload_part_size, max_single_part_upload_size), compression_method, 3); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, {}, format_settings); diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 8ce287ff681..248238379dc 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -68,6 +68,8 @@ public: Chunk generate() override; + void onCancel() override; + private: String name; String bucket; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index fe05d168c31..0eec77ac8e7 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -95,11 +95,26 @@ namespace class StorageURLSource : public SourceWithProgress { + using URIParams = std::vector>; public: + struct URIInfo + { + using FailoverOptions = std::vector; + std::vector uri_list_to_read; + std::atomic next_uri_to_read = 0; + }; + using URIInfoPtr = std::shared_ptr; + + void onCancel() override + { + if (reader) + reader->cancel(); + } + StorageURLSource( - const std::vector & uri_options, + URIInfoPtr uri_info_, const std::string & http_method, std::function callback, const String & format, @@ -114,10 +129,12 @@ namespace const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_ = {}, const URIParams & params = {}) : SourceWithProgress(sample_block), name(std::move(name_)) + , uri_info(uri_info_) { auto headers = getHeaders(headers_); + /// Lazy initialization. We should not perform requests in constructor, because we need to do it in query pipeline. - initialize = [=, this] + initialize = [=, this](const URIInfo::FailoverOptions & uri_options) { WriteBufferFromOwnString error_message; for (auto option = uri_options.begin(); option < uri_options.end(); ++option) @@ -135,10 +152,11 @@ namespace if (n != std::string::npos) { credentials.setUsername(user_info.substr(0, n)); - credentials.setPassword(user_info.substr(n+1)); + credentials.setPassword(user_info.substr(n + 1)); } } + /// Get first alive uri. read_buf = wrapReadBufferWithCompressionMethod( std::make_unique( request_uri, @@ -188,29 +206,34 @@ namespace Chunk generate() override { - if (initialize) + while (true) { - initialize(); - initialize = {}; + if (!reader) + { + auto current_uri_pos = uri_info->next_uri_to_read.fetch_add(1); + if (current_uri_pos >= uri_info->uri_list_to_read.size()) + return {}; + + auto current_uri = uri_info->uri_list_to_read[current_uri_pos]; + initialize(current_uri); + } + + Chunk chunk; + if (reader->pull(chunk)) + return chunk; + + pipeline->reset(); + reader.reset(); } - - if (!reader) - return {}; - - Chunk chunk; - if (reader->pull(chunk)) - return chunk; - - pipeline->reset(); - reader.reset(); - - return {}; } private: - std::function initialize; + using InitializeFunc = std::function; + InitializeFunc initialize; String name; + URIInfoPtr uri_info; + std::unique_ptr read_buf; std::unique_ptr pipeline; std::unique_ptr reader; @@ -332,7 +355,7 @@ Pipe IStorageURLBase::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned /*num_streams*/) + unsigned num_streams) { auto params = getReadURIParams(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size); bool with_globs = (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) @@ -341,18 +364,23 @@ Pipe IStorageURLBase::read( if (with_globs) { size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements; - std::vector url_descriptions = parseRemoteDescription(uri, 0, uri.size(), ',', max_addresses); - std::vector uri_options; + auto uri_descriptions = parseRemoteDescription(uri, 0, uri.size(), ',', max_addresses); + + if (num_streams > uri_descriptions.size()) + num_streams = uri_descriptions.size(); + + /// For each uri (which acts like shard) check if it has failover options + auto uri_info = std::make_shared(); + for (const auto & description : uri_descriptions) + uri_info->uri_list_to_read.emplace_back(parseRemoteDescription(description, 0, description.size(), '|', max_addresses)); Pipes pipes; - for (const auto & url_description : url_descriptions) - { - /// For each uri (which acts like shard) check if it has failover options - uri_options = parseRemoteDescription(url_description, 0, url_description.size(), '|', max_addresses); - StoragePtr shard; + pipes.reserve(num_streams); + for (size_t i = 0; i < num_streams; ++i) + { pipes.emplace_back(std::make_shared( - uri_options, + uri_info, getReadMethod(), getReadPOSTDataCallback( column_names, metadata_snapshot, query_info, @@ -371,9 +399,10 @@ Pipe IStorageURLBase::read( } else { - std::vector uri_options{uri}; + auto uri_info = std::make_shared(); + uri_info->uri_list_to_read.emplace_back(std::vector{uri}); return Pipe(std::make_shared( - uri_options, + uri_info, getReadMethod(), getReadPOSTDataCallback( column_names, metadata_snapshot, query_info, @@ -402,8 +431,10 @@ Pipe StorageURLWithFailover::read( { auto params = getReadURIParams(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size); + auto uri_info = std::make_shared(); + uri_info->uri_list_to_read.emplace_back(uri_options); auto pipe = Pipe(std::make_shared( - uri_options, + uri_info, getReadMethod(), getReadPOSTDataCallback( column_names, metadata_snapshot, query_info, diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index 96c05a59173..133761cbe22 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -9,6 +9,36 @@ get_property (BUILD_COMPILE_DEFINITIONS DIRECTORY ${ClickHouse_SOURCE_DIR} PROPE get_property(TZDATA_VERSION GLOBAL PROPERTY TZDATA_VERSION_PROP) + +find_package(Git) +if(Git_FOUND) + # The commit's git hash, and whether the building workspace was dirty or not + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse HEAD + WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" + OUTPUT_VARIABLE GIT_HASH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # Git branch name + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" + OUTPUT_VARIABLE GIT_BRANCH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # The date of the commit + SET(ENV{TZ} "UTC") + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local + WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" + OUTPUT_VARIABLE GIT_DATE + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + # The subject of the commit + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%s + WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" + OUTPUT_VARIABLE GIT_COMMIT_SUBJECT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() + configure_file (StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD}) include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index da563cc245b..8a19d7649aa 100644 --- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -50,6 +50,10 @@ const char * auto_config_build[] "USE_KRB5", "@USE_KRB5@", "USE_FILELOG", "@USE_FILELOG@", "USE_BZIP2", "@USE_BZIP2@", + "GIT_HASH", "@GIT_HASH@", + "GIT_BRANCH", "@GIT_BRANCH@", + "GIT_DATE", "@GIT_DATE@", + "GIT_COMMIT_SUBJECT", "@GIT_COMMIT_SUBJECT@", nullptr, nullptr }; diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index 1f5def6d6b4..1e303d1aeaa 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -45,7 +45,8 @@ void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr co // get an error when trying to get the info about DB from ZK. // Just ignore these inaccessible databases. A good example of a // failing test is `01526_client_start_and_exit`. - try { + try + { writeCluster(res_columns, {name_and_database.first, replicated->getCluster()}); } catch (...) diff --git a/src/Storages/System/StorageSystemDictionaries.cpp b/src/Storages/System/StorageSystemDictionaries.cpp index d8f92d38081..c0d7d8cc4ed 100644 --- a/src/Storages/System/StorageSystemDictionaries.cpp +++ b/src/Storages/System/StorageSystemDictionaries.cpp @@ -142,7 +142,9 @@ void StorageSystemDictionaries::fillData(MutableColumns & res_columns, ContextPt res_columns[i++]->insertDefault(); if (dict_ptr) + { res_columns[i++]->insert(dict_ptr->getDictionaryComment()); + } else { if (load_result.config && load_result.config->config->has("dictionary.comment")) diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index 8dbd73628ca..f1b3a13c332 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -58,7 +60,11 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_ {"column_bytes_on_disk", std::make_shared()}, {"column_data_compressed_bytes", std::make_shared()}, {"column_data_uncompressed_bytes", std::make_shared()}, - {"column_marks_bytes", std::make_shared()} + {"column_marks_bytes", std::make_shared()}, + {"serialization_kind", std::make_shared()}, + {"subcolumns.names", std::make_shared(std::make_shared())}, + {"subcolumns.types", std::make_shared(std::make_shared())}, + {"subcolumns.serializations", std::make_shared(std::make_shared())} } ) { @@ -216,6 +222,28 @@ void StorageSystemPartsColumns::processNextStorage( if (columns_mask[src_index++]) columns[res_index++]->insert(column_size.marks); + auto serialization = part->getSerialization(column); + if (columns_mask[src_index++]) + columns[res_index++]->insert(ISerialization::kindToString(serialization->getKind())); + + Array subcolumn_names; + Array subcolumn_types; + Array subcolumn_sers; + + IDataType::forEachSubcolumn([&](const auto &, const auto & name, const auto & data) + { + subcolumn_names.push_back(name); + subcolumn_types.push_back(data.type->getName()); + subcolumn_sers.push_back(ISerialization::kindToString(data.serialization->getKind())); + }, { serialization, column.type, nullptr, nullptr }); + + if (columns_mask[src_index++]) + columns[res_index++]->insert(subcolumn_names); + if (columns_mask[src_index++]) + columns[res_index++]->insert(subcolumn_types); + if (columns_mask[src_index++]) + columns[res_index++]->insert(subcolumn_sers); + if (has_state_column) columns[res_index++]->insert(part->stateString()); } diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 803e9d55dac..68a1eac305e 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index f161400630b..57b9e73bbbd 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -120,7 +120,7 @@ TEST(TransformQueryForExternalDatabase, InWithSingleElement) check(state, 1, "SELECT column FROM test.table WHERE 1 IN (1)", - R"(SELECT "column" FROM "test"."table" WHERE 1)"); + R"(SELECT "column" FROM "test"."table" WHERE 1 = 1)"); check(state, 1, "SELECT column FROM test.table WHERE column IN (1, 2)", R"(SELECT "column" FROM "test"."table" WHERE "column" IN (1, 2))"); @@ -135,7 +135,7 @@ TEST(TransformQueryForExternalDatabase, InWithMultipleColumns) check(state, 1, "SELECT column FROM test.table WHERE (1,1) IN ((1,1))", - R"(SELECT "column" FROM "test"."table" WHERE 1)"); + R"(SELECT "column" FROM "test"."table" WHERE 1 = 1)"); check(state, 1, "SELECT field, value FROM test.table WHERE (field, value) IN (('foo', 'bar'))", R"(SELECT "field", "value" FROM "test"."table" WHERE ("field", "value") IN (('foo', 'bar')))"); diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 4d6c1787a34..c42fb7fa965 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -306,6 +306,18 @@ String transformQueryForExternalDatabase( throw Exception("Query contains non-compatible expressions (and external_table_strict_query=true)", ErrorCodes::INCORRECT_QUERY); } + auto * literal_expr = typeid_cast(original_where.get()); + UInt64 value; + if (literal_expr && literal_expr->value.tryGet(value) && (value == 0 || value == 1)) + { + /// WHERE 1 -> WHERE 1=1, WHERE 0 -> WHERE 1=0. + if (value) + original_where = makeASTFunction("equals", std::make_shared(1), std::make_shared(1)); + else + original_where = makeASTFunction("equals", std::make_shared(1), std::make_shared(0)); + select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(original_where)); + } + ASTPtr select_ptr = select; dropAliases(select_ptr); diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index 9edb75b0a69..41ba2db5c33 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -75,7 +75,12 @@ ColumnsDescription TableFunctionExecutable::getActualTableStructure(ContextPtr c StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const { auto storage_id = StorageID(getDatabaseName(), table_name); - auto storage = StorageExecutable::create(storage_id, script_name, arguments, format, input_queries, getActualTableStructure(context), ConstraintsDescription{}); + auto global_context = context->getGlobalContext(); + ExecutableSettings settings; + settings.script_name = script_name; + settings.script_arguments = std::move(arguments); + + auto storage = StorageExecutable::create(storage_id, format, settings, input_queries, getActualTableStructure(context), ConstraintsDescription{}); storage->startup(); return storage; } diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h index d9ee9b47868..70bdc67efc8 100644 --- a/src/TableFunctions/TableFunctionHDFS.h +++ b/src/TableFunctions/TableFunctionHDFS.h @@ -12,7 +12,7 @@ namespace DB class Context; -/* hdfs(name_node_ip:name_node_port, format, structure) - creates a temporary storage from hdfs file +/* hdfs(URI, format, structure) - creates a temporary storage from hdfs files * */ class TableFunctionHDFS : public ITableFunctionFileLike diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp new file mode 100644 index 00000000000..ca1ac6a11cd --- /dev/null +++ b/src/TableFunctions/TableFunctionHDFSCluster.cpp @@ -0,0 +1,116 @@ +#include + +#if USE_HDFS + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "registerTableFunctions.h" + +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + + +void TableFunctionHDFSCluster::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + /// Parse args + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + ASTs & args = args_func.at(0)->children; + + const auto message = fmt::format( + "The signature of table function {} shall be the following:\n" \ + " - cluster, uri, format, structure", + " - cluster, uri, format, structure, compression_method", + getName()); + + if (args.size() < 4 || args.size() > 5) + throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + + /// This arguments are always the first + cluster_name = args[0]->as().value.safeGet(); + uri = args[1]->as().value.safeGet(); + format = args[2]->as().value.safeGet(); + structure = args[3]->as().value.safeGet(); + if (args.size() >= 5) + compression_method = args[4]->as().value.safeGet(); +} + + +ColumnsDescription TableFunctionHDFSCluster::getActualTableStructure(ContextPtr context) const +{ + return parseColumnsListFromString(structure, context); +} + +StoragePtr TableFunctionHDFSCluster::executeImpl( + const ASTPtr & /*function*/, ContextPtr context, + const std::string & table_name, ColumnsDescription /*cached_columns*/) const +{ + StoragePtr storage; + if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) + { + /// On worker node this uri won't contains globs + storage = StorageHDFS::create( + uri, + StorageID(getDatabaseName(), table_name), + format, + getActualTableStructure(context), + ConstraintsDescription{}, + String{}, + context, + compression_method, + /*distributed_processing=*/true, + nullptr); + } + else + { + storage = StorageHDFSCluster::create( + cluster_name, uri, StorageID(getDatabaseName(), table_name), + format, getActualTableStructure(context), ConstraintsDescription{}, + compression_method); + } + + storage->startup(); + + return storage; +} + + +void registerTableFunctionHDFSCluster(TableFunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/TableFunctions/TableFunctionHDFSCluster.h b/src/TableFunctions/TableFunctionHDFSCluster.h new file mode 100644 index 00000000000..58d1c3d9b05 --- /dev/null +++ b/src/TableFunctions/TableFunctionHDFSCluster.h @@ -0,0 +1,54 @@ +#pragma once + +#include + +#if USE_HDFS + +#include + + +namespace DB +{ + +class Context; + +/** + * hdfsCluster(cluster, URI, format, structure, compression_method) + * A table function, which allows to process many files from HDFS on a specific cluster + * On initiator it creates a connection to _all_ nodes in cluster, discloses asterics + * in HDFS file path and dispatch each file dynamically. + * On worker node it asks initiator about next task to process, processes it. + * This is repeated until the tasks are finished. + */ +class TableFunctionHDFSCluster : public ITableFunction +{ +public: + static constexpr auto name = "hdfsCluster"; + std::string getName() const override + { + return name; + } + bool hasStaticStructure() const override { return true; } + +protected: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns) const override; + + const char * getStorageTypeName() const override { return "HDFSCluster"; } + + ColumnsDescription getActualTableStructure(ContextPtr) const override; + void parseArguments(const ASTPtr &, ContextPtr) override; + + String cluster_name; + String uri; + String format; + String structure; + String compression_method = "auto"; +}; + +} + +#endif diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index a9cecb11a1c..e959fa754c9 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -37,7 +38,11 @@ void TableFunctionMySQL::parseArguments(const ASTPtr & ast_function, ContextPtr throw Exception("Table function 'mysql' must have arguments.", ErrorCodes::LOGICAL_ERROR); configuration = StorageMySQL::getConfiguration(args_func.arguments->children, context); - pool.emplace(configuration->database, configuration->addresses, configuration->username, configuration->password); + MySQLSettings mysql_settings; + const auto & settings = context->getSettingsRef(); + mysql_settings.connect_timeout = settings.external_storage_connect_timeout_sec; + mysql_settings.read_write_timeout = settings.external_storage_rw_timeout_sec; + pool.emplace(createMySQLPoolWithFailover(*configuration, mysql_settings)); } ColumnsDescription TableFunctionMySQL::getActualTableStructure(ContextPtr context) const diff --git a/src/TableFunctions/TableFunctionPostgreSQL.cpp b/src/TableFunctions/TableFunctionPostgreSQL.cpp index bcfe8d5444c..d948f40588f 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.cpp +++ b/src/TableFunctions/TableFunctionPostgreSQL.cpp @@ -50,6 +50,7 @@ ColumnsDescription TableFunctionPostgreSQL::getActualTableStructure(ContextPtr c if (!columns) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table structure not returned"); + return ColumnsDescription{*columns}; } diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index b4aab3e5c55..ea5c2c75f94 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -28,6 +28,7 @@ void registerTableFunctions() #if USE_HDFS registerTableFunctionHDFS(factory); + registerTableFunctionHDFSCluster(factory); #endif registerTableFunctionODBC(factory); diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 8dbb5ebb5fa..8ddd9b7c8ab 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -26,6 +26,7 @@ void registerTableFunctionCOS(TableFunctionFactory & factory); #if USE_HDFS void registerTableFunctionHDFS(TableFunctionFactory & factory); +void registerTableFunctionHDFSCluster(TableFunctionFactory & factory); #endif void registerTableFunctionODBC(TableFunctionFactory & factory); diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index bbf822c3879..042e0e90459 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -7,9 +7,11 @@ import sys from github import Github +from env_helper import GITHUB_REPOSITORY, TEMP_PATH, REPO_COPY, REPORTS_PATH, GITHUB_SERVER_URL, \ + GITHUB_RUN_ID from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo, get_event +from pr_info import PRInfo from build_download_helper import get_build_name_for_check, get_build_urls from docker_pull_helper import get_image_with_version from commit_status_helper import post_commit_status @@ -26,7 +28,7 @@ def get_run_command(pr_number, sha, download_url, workspace_path, image): f'{image}' def get_commit(gh, commit_sha): - repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + repo = gh.get_repo(GITHUB_REPOSITORY) commit = repo.get_commit(commit_sha) return commit @@ -35,16 +37,16 @@ if __name__ == "__main__": stopwatch = Stopwatch() - temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) - repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) - reports_path = os.getenv("REPORTS_PATH", "./reports") + temp_path = TEMP_PATH + repo_path = REPO_COPY + reports_path = REPORTS_PATH check_name = sys.argv[1] if not os.path.exists(temp_path): os.makedirs(temp_path) - pr_info = PRInfo(get_event()) + pr_info = PRInfo() gh = Github(get_best_robot_token()) @@ -106,7 +108,7 @@ if __name__ == "__main__": logging.info("Exception uploading file %s text %s", f, ex) paths[f] = '' - report_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + report_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" if paths['runlog.log']: report_url = paths['runlog.log'] if paths['main.log']: diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 541cfd597ee..f37ea49e387 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -7,8 +7,10 @@ import os import sys import time from github import Github + +from env_helper import REPO_COPY, TEMP_PATH, CACHES_PATH, IMAGES_PATH from s3_helper import S3Helper -from pr_info import PRInfo, get_event +from pr_info import PRInfo from get_robot_token import get_best_robot_token from version_helper import get_version_from_repo, update_version_local from ccache_utils import get_ccache_if_not_exists, upload_ccache @@ -114,15 +116,19 @@ def create_json_artifact(temp_path, build_name, log_url, build_urls, build_confi "status": success, } - with open(os.path.join(temp_path, "build_urls_" + build_name + '.json'), 'w') as build_links: + json_name = "build_urls_" + build_name + '.json' + + print ("Dump json report", result, "to", json_name, "with env", "build_urls_{build_name}") + + with open(os.path.join(temp_path, json_name), 'w') as build_links: json.dump(result, build_links) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) - temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) - caches_path = os.getenv("CACHES_PATH", temp_path) + repo_path = REPO_COPY + temp_path = TEMP_PATH + caches_path = CACHES_PATH build_check_name = sys.argv[1] build_name = sys.argv[2] @@ -132,7 +138,7 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) - pr_info = PRInfo(get_event()) + pr_info = PRInfo() logging.info("Repo copy path %s", repo_path) @@ -171,7 +177,7 @@ if __name__ == "__main__": sys.exit(0) image_name = get_image_name(build_config) - docker_image = get_image_with_version(os.getenv("IMAGES_PATH"), image_name) + docker_image = get_image_with_version(IMAGES_PATH, image_name) image_version = docker_image.version logging.info("Got version from repo %s", version.get_version_string()) diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index c703b8511e3..a85558ebe33 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -5,11 +5,13 @@ import logging import os import sys from github import Github + +from env_helper import REPORTS_PATH, TEMP_PATH, GITHUB_REPOSITORY, GITHUB_SERVER_URL, GITHUB_RUN_ID from report import create_build_html_report from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo, get_event -from commit_status_helper import get_commit +from pr_info import PRInfo +from commit_status_helper import get_commit from ci_config import CI_CONFIG from rerun_helper import RerunHelper @@ -75,8 +77,8 @@ def get_build_name_from_file_name(file_name): if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - reports_path = os.getenv("REPORTS_PATH", "./reports") - temp_path = os.path.join(os.getenv("TEMP_PATH", ".")) + reports_path = REPORTS_PATH + temp_path = TEMP_PATH logging.info("Reports path %s", reports_path) if not os.path.exists(temp_path): @@ -85,7 +87,7 @@ if __name__ == "__main__": build_check_name = sys.argv[1] gh = Github(get_best_robot_token()) - pr_info = PRInfo(get_event()) + pr_info = PRInfo() rerun_helper = RerunHelper(gh, pr_info, build_check_name) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") @@ -127,15 +129,15 @@ if __name__ == "__main__": s3_helper = S3Helper('https://s3.amazonaws.com') - pr_info = PRInfo(get_event()) + pr_info = PRInfo() - branch_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/commits/master" + branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master" branch_name = "master" if pr_info.number != 0: branch_name = "PR #{}".format(pr_info.number) - branch_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/pull/{pr_info.number}" - commit_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/commit/{pr_info.sha}" - task_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/actions/runs/{os.getenv('GITHUB_RUN_ID', '0')}" + branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}" + commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}" + task_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}" report = create_build_html_report( build_check_name, build_results, diff --git a/tests/ci/cancel_workflow_lambda/Dockerfile b/tests/ci/cancel_and_rerun_workflow_lambda/Dockerfile similarity index 100% rename from tests/ci/cancel_workflow_lambda/Dockerfile rename to tests/ci/cancel_and_rerun_workflow_lambda/Dockerfile diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py new file mode 100644 index 00000000000..b79eb292dc6 --- /dev/null +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 + +from collections import namedtuple +import json +import time + +import jwt +import requests +import boto3 + +NEED_RERUN_OR_CANCELL_WORKFLOWS = { + 13241696, # PR + 15834118, # Docs + 15516108, # ReleaseCI + 15797242, # BackportPR +} + +# https://docs.github.com/en/rest/reference/actions#cancel-a-workflow-run +# +API_URL = 'https://api.github.com/repos/ClickHouse/ClickHouse' + +MAX_RETRY = 5 + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_key_and_app_from_aws(): + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + +def get_token_from_aws(): + private_key, app_id = get_key_and_app_from_aws() + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": app_id, + } + + encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + return get_access_token(encoded_jwt, installation_id) + +def _exec_get_with_retry(url): + for i in range(MAX_RETRY): + try: + response = requests.get(url) + response.raise_for_status() + return response.json() + except Exception as ex: + print("Got exception executing request", ex) + time.sleep(i + 1) + + raise Exception("Cannot execute GET request with retries") + + +WorkflowDescription = namedtuple('WorkflowDescription', + ['run_id', 'status', 'rerun_url', 'cancel_url']) + + +def get_workflows_description_for_pull_request(pull_request_event): + head_branch = pull_request_event['head']['ref'] + print("PR", pull_request_event['number'], "has head ref", head_branch) + workflows_data = [] + workflows = _exec_get_with_retry(API_URL + f"/actions/runs?branch={head_branch}&event=pull_request&page=1") + workflows_data += workflows['workflow_runs'] + i = 2 + while len(workflows['workflow_runs']) > 0: + workflows = _exec_get_with_retry(API_URL + f"/actions/runs?branch={head_branch}&event=pull_request&page={i}") + workflows_data += workflows['workflow_runs'] + i += 1 + if i > 30: + print("Too many workflows found") + break + + workflow_descriptions = [] + for workflow in workflows_data: + # unfortunately we cannot filter workflows from forks in request to API so doing it manually + if (workflow['head_repository']['full_name'] == pull_request_event['head']['repo']['full_name'] + and workflow['workflow_id'] in NEED_RERUN_OR_CANCELL_WORKFLOWS): + workflow_descriptions.append(WorkflowDescription( + run_id=workflow['id'], + status=workflow['status'], + rerun_url=workflow['rerun_url'], + cancel_url=workflow['cancel_url'])) + + return workflow_descriptions + +def get_workflow_description(workflow_id): + workflow = _exec_get_with_retry(API_URL + f"/actions/runs/{workflow_id}") + return WorkflowDescription( + run_id=workflow['id'], + status=workflow['status'], + rerun_url=workflow['rerun_url'], + cancel_url=workflow['cancel_url']) + +def _exec_post_with_retry(url, token): + headers = { + "Authorization": f"token {token}" + } + for i in range(MAX_RETRY): + try: + response = requests.post(url, headers=headers) + response.raise_for_status() + return response.json() + except Exception as ex: + print("Got exception executing request", ex) + time.sleep(i + 1) + + raise Exception("Cannot execute POST request with retry") + +def exec_workflow_url(urls_to_cancel, token): + for url in urls_to_cancel: + print("Post for workflow workflow using url", url) + _exec_post_with_retry(url, token) + print("Workflow post finished") + +def main(event): + token = get_token_from_aws() + event_data = json.loads(event['body']) + + print("Got event for PR", event_data['number']) + action = event_data['action'] + print("Got action", event_data['action']) + pull_request = event_data['pull_request'] + labels = { l['name'] for l in pull_request['labels'] } + print("PR has labels", labels) + if action == 'closed' or 'do not test' in labels: + print("PR merged/closed or manually labeled 'do not test' will kill workflows") + workflow_descriptions = get_workflows_description_for_pull_request(pull_request) + urls_to_cancel = [] + for workflow_description in workflow_descriptions: + if workflow_description.status != 'completed': + urls_to_cancel.append(workflow_description.cancel_url) + print(f"Found {len(urls_to_cancel)} workflows to cancel") + exec_workflow_url(urls_to_cancel, token) + elif action == 'labeled' and 'can be tested' in labels: + print("PR marked with can be tested label, rerun workflow") + workflow_descriptions = get_workflows_description_for_pull_request(pull_request) + if not workflow_descriptions: + print("Not found any workflows") + return + + sorted_workflows = list(sorted(workflow_descriptions, key=lambda x: x.run_id)) + most_recent_workflow = sorted_workflows[-1] + print("Latest workflow", most_recent_workflow) + if most_recent_workflow.status != 'completed': + print("Latest workflow is not completed, cancelling") + exec_workflow_url([most_recent_workflow.cancel_url], token) + print("Cancelled") + + for _ in range(30): + latest_workflow_desc = get_workflow_description(most_recent_workflow.run_id) + print("Checking latest workflow", latest_workflow_desc) + if latest_workflow_desc.status in ('completed', 'cancelled'): + print("Finally latest workflow done, going to rerun") + exec_workflow_url([most_recent_workflow.rerun_url], token) + print("Rerun finished, exiting") + break + print("Still have strange status") + time.sleep(3) + + else: + print("Nothing to do") + +def handler(event, _): + main(event) diff --git a/tests/ci/cancel_workflow_lambda/requirements.txt b/tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt similarity index 100% rename from tests/ci/cancel_workflow_lambda/requirements.txt rename to tests/ci/cancel_and_rerun_workflow_lambda/requirements.txt diff --git a/tests/ci/cancel_workflow_lambda/app.py b/tests/ci/cancel_workflow_lambda/app.py deleted file mode 100644 index e475fcb931a..00000000000 --- a/tests/ci/cancel_workflow_lambda/app.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env python3 - -import json -import time -import jwt - -import requests -import boto3 - -# https://docs.github.com/en/rest/reference/actions#cancel-a-workflow-run -# -API_URL = 'https://api.github.com/repos/ClickHouse/ClickHouse' - -MAX_RETRY = 5 - -def get_installation_id(jwt_token): - headers = { - "Authorization": f"Bearer {jwt_token}", - "Accept": "application/vnd.github.v3+json", - } - response = requests.get("https://api.github.com/app/installations", headers=headers) - response.raise_for_status() - data = response.json() - return data[0]['id'] - -def get_access_token(jwt_token, installation_id): - headers = { - "Authorization": f"Bearer {jwt_token}", - "Accept": "application/vnd.github.v3+json", - } - response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) - response.raise_for_status() - data = response.json() - return data['token'] - -def get_key_and_app_from_aws(): - secret_name = "clickhouse_github_secret_key" - session = boto3.session.Session() - client = session.client( - service_name='secretsmanager', - ) - get_secret_value_response = client.get_secret_value( - SecretId=secret_name - ) - data = json.loads(get_secret_value_response['SecretString']) - return data['clickhouse-app-key'], int(data['clickhouse-app-id']) - -def get_token_from_aws(): - private_key, app_id = get_key_and_app_from_aws() - payload = { - "iat": int(time.time()) - 60, - "exp": int(time.time()) + (10 * 60), - "iss": app_id, - } - - encoded_jwt = jwt.encode(payload, private_key, algorithm="RS256") - installation_id = get_installation_id(encoded_jwt) - return get_access_token(encoded_jwt, installation_id) - -def _exec_get_with_retry(url): - for i in range(MAX_RETRY): - try: - response = requests.get(url) - response.raise_for_status() - return response.json() - except Exception as ex: - print("Got exception executing request", ex) - time.sleep(i + 1) - - raise Exception("Cannot execute GET request with retries") - - -def get_workflows_cancel_urls_for_pull_request(pull_request_event): - head_branch = pull_request_event['head']['ref'] - print("PR", pull_request_event['number'], "has head ref", head_branch) - workflows = _exec_get_with_retry(API_URL + f"/actions/runs?branch={head_branch}") - workflows_urls_to_cancel = set([]) - for workflow in workflows['workflow_runs']: - if workflow['status'] != 'completed': - print("Workflow", workflow['url'], "not finished, going to be cancelled") - workflows_urls_to_cancel.add(workflow['cancel_url']) - else: - print("Workflow", workflow['url'], "already finished, will not try to cancel") - - return workflows_urls_to_cancel - -def _exec_post_with_retry(url, token): - headers = { - "Authorization": f"token {token}" - } - for i in range(MAX_RETRY): - try: - response = requests.post(url, headers=headers) - response.raise_for_status() - return response.json() - except Exception as ex: - print("Got exception executing request", ex) - time.sleep(i + 1) - - raise Exception("Cannot execute POST request with retry") - -def cancel_workflows(urls_to_cancel, token): - for url in urls_to_cancel: - print("Cancelling workflow using url", url) - _exec_post_with_retry(url, token) - print("Workflow cancelled") - -def main(event): - token = get_token_from_aws() - event_data = json.loads(event['body']) - - print("Got event for PR", event_data['number']) - action = event_data['action'] - print("Got action", event_data['action']) - pull_request = event_data['pull_request'] - labels = { l['name'] for l in pull_request['labels'] } - print("PR has labels", labels) - if action == 'closed' or 'do not test' in labels: - print("PR merged/closed or manually labeled 'do not test' will kill workflows") - workflows_to_cancel = get_workflows_cancel_urls_for_pull_request(pull_request) - print(f"Found {len(workflows_to_cancel)} workflows to cancel") - cancel_workflows(workflows_to_cancel, token) - else: - print("Nothing to do") - -def handler(event, _): - main(event) diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index 112b58ef1cf..91a018f158f 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -5,6 +5,7 @@ import logging import os import subprocess +from env_helper import GITHUB_WORKSPACE, TEMP_PATH from get_robot_token import get_parameter_from_ssm from ssh import SSHKey from cherry_pick_utils.backport import Backport @@ -13,8 +14,8 @@ from cherry_pick_utils.cherrypick import CherryPick if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))) - temp_path = os.path.join(os.getenv("TEMP_PATH")) + repo_path = GITHUB_WORKSPACE + temp_path = TEMP_PATH if not os.path.exists(temp_path): os.makedirs(temp_path) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 9297b25cbb7..d5f8757ffdf 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -11,7 +11,7 @@ CI_CONFIG = { "splitted": "unsplitted", "alien_pkgs": True, "tidy": "disable", - "with_coverage": False + "with_coverage": False, }, "performance": { "compiler": "clang-13", @@ -21,7 +21,7 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", - "with_coverage": False + "with_coverage": False, }, "binary_gcc": { "compiler": "gcc-11", @@ -31,7 +31,18 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", - "with_coverage": False + "with_coverage": False, + }, + "package_aarch64": { + "compiler": "clang-13-aarch64", + "build_type": "", + "sanitizer": "", + "package_type": "deb", + "bundled": "bundled", + "splitted": "unsplitted", + "alien_pkgs": True, + "tidy": "disable", + "with_coverage": False, }, "package_asan": { "compiler": "clang-13", @@ -41,7 +52,7 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", - "with_coverage": False + "with_coverage": False, }, "package_ubsan": { "compiler": "clang-13", @@ -51,7 +62,7 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", - "with_coverage": False + "with_coverage": False, }, "package_tsan": { "compiler": "clang-13", @@ -61,7 +72,7 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", - "with_coverage": False + "with_coverage": False, }, "package_msan": { "compiler": "clang-13", @@ -71,7 +82,7 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", - "with_coverage": False + "with_coverage": False, }, "package_debug": { "compiler": "clang-13", @@ -81,7 +92,7 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", - "with_coverage": False + "with_coverage": False, }, "binary_release": { "compiler": "clang-13", @@ -91,7 +102,7 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", - "with_coverage": False + "with_coverage": False, }, "binary_tidy": { "compiler": "clang-13", @@ -101,7 +112,7 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "enable", - "with_coverage": False + "with_coverage": False, }, "binary_splitted": { "compiler": "clang-13", @@ -111,7 +122,7 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "splitted", "tidy": "disable", - "with_coverage": False + "with_coverage": False, }, "binary_darwin": { "compiler": "clang-13-darwin", @@ -121,7 +132,7 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", - "with_coverage": False + "with_coverage": False, }, "binary_aarch64": { "compiler": "clang-13-aarch64", @@ -131,7 +142,7 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", - "with_coverage": False + "with_coverage": False, }, "binary_freebsd": { "compiler": "clang-13-freebsd", @@ -141,7 +152,7 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", - "with_coverage": False + "with_coverage": False, }, "binary_darwin_aarch64": { "compiler": "clang-13-darwin-aarch64", @@ -151,7 +162,7 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", - "with_coverage": False + "with_coverage": False, }, "binary_ppc64le": { "compiler": "clang-13-ppc64le", @@ -161,19 +172,20 @@ CI_CONFIG = { "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", - "with_coverage": False - } + "with_coverage": False, + }, }, "builds_report_config": { "ClickHouse build check (actions)": [ "package_release", "performance", + "package_aarch64", "package_asan", "package_ubsan", "package_tsan", "package_msan", "package_debug", - "binary_release" + "binary_release", ], "ClickHouse special build check (actions)": [ "binary_tidy", @@ -320,6 +332,6 @@ CI_CONFIG = { }, "Performance Comparison (actions)": { "required_build": "performance", - } - } + }, + }, } diff --git a/tests/ci/codebrowser_check.py b/tests/ci/codebrowser_check.py new file mode 100644 index 00000000000..97fd58c3235 --- /dev/null +++ b/tests/ci/codebrowser_check.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 + + +import os +import subprocess +import logging + +from github import Github + +from env_helper import IMAGES_PATH, REPO_COPY +from stopwatch import Stopwatch +from upload_result_helper import upload_results +from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from commit_status_helper import post_commit_status +from docker_pull_helper import get_image_with_version +from tee_popen import TeePopen + +NAME = "Woboq Build (actions)" + +def get_run_command(repo_path, output_path, image): + cmd = "docker run " + \ + f"--volume={repo_path}:/repo_folder " \ + f"--volume={output_path}:/test_output " \ + f"-e 'DATA=https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data' {image}" + return cmd + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) + + gh = Github(get_best_robot_token()) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + docker_image = get_image_with_version(IMAGES_PATH, 'clickhouse/codebrowser') + s3_helper = S3Helper('https://s3.amazonaws.com') + + result_path = os.path.join(temp_path, "result_path") + if not os.path.exists(result_path): + os.makedirs(result_path) + + run_command = get_run_command(REPO_COPY, result_path, docker_image) + + logging.info("Going to run codebrowser: %s", run_command) + + run_log_path = os.path.join(temp_path, "runlog.log") + + with TeePopen(run_command, run_log_path) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) + + report_path = os.path.join(result_path, "html_report") + logging.info("Report path %s", report_path) + s3_path_prefix = "codebrowser" + html_urls = s3_helper.fast_parallel_upload_dir(report_path, s3_path_prefix, 'clickhouse-test-reports') + + index_html = 'HTML report' + + test_results = [(index_html, "Look at the report")] + + report_url = upload_results(s3_helper, 0, os.getenv("GITHUB_SHA"), test_results, [], NAME) + + print(f"::notice ::Report url: {report_url}") + + post_commit_status(gh, os.getenv("GITHUB_SHA"), NAME, "Report built", "success", report_url) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 5bdbf634715..8396303c5a3 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -1,12 +1,33 @@ #!/usr/bin/env python3 -import os +import time +from env_helper import GITHUB_REPOSITORY + +RETRY = 5 + + +def get_commit(gh, commit_sha, retry_count=RETRY): + for i in range(retry_count): + try: + repo = gh.get_repo(GITHUB_REPOSITORY) + commit = repo.get_commit(commit_sha) + return commit + except Exception as ex: + if i == retry_count - 1: + raise ex + time.sleep(i) + + # just suppress warning + return None -def get_commit(gh, commit_sha): - repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) - commit = repo.get_commit(commit_sha) - return commit def post_commit_status(gh, sha, check_name, description, state, report_url): - commit = get_commit(gh, sha) - commit.create_status(context=check_name, description=description, state=state, target_url=report_url) + for i in range(RETRY): + try: + commit = get_commit(gh, sha, 1) + commit.create_status(context=check_name, description=description, state=state, target_url=report_url) + break + except Exception as ex: + if i == RETRY - 1: + raise ex + time.sleep(i) diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index 665f399b040..72626bd6364 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -8,9 +8,10 @@ import sys from github import Github +from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo, get_event +from pr_info import PRInfo from build_download_helper import download_builds_filter from upload_result_helper import upload_results from docker_pull_helper import get_images_with_versions @@ -103,11 +104,11 @@ if __name__ == "__main__": stopwatch = Stopwatch() - temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) - repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) - reports_path = os.getenv("REPORTS_PATH", "./reports") + temp_path = TEMP_PATH + repo_path = REPO_COPY + reports_path = REPORTS_PATH - pr_info = PRInfo(get_event()) + pr_info = PRInfo() gh = Github(get_best_robot_token()) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 5e05cbaecd7..e389d612f44 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -6,8 +6,10 @@ import os import time import shutil from github import Github + +from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP from s3_helper import S3Helper -from pr_info import PRInfo, get_event +from pr_info import PRInfo from get_robot_token import get_best_robot_token, get_parameter_from_ssm from upload_result_helper import upload_results from commit_status_helper import get_commit @@ -157,8 +159,8 @@ if __name__ == "__main__": stopwatch = Stopwatch() - repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) - temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') + repo_path = GITHUB_WORKSPACE + temp_path = os.path.join(RUNNER_TEMP, 'docker_images_check') dockerhub_password = get_parameter_from_ssm('dockerhub_robot_password') if os.path.exists(temp_path): @@ -167,7 +169,7 @@ if __name__ == "__main__": if not os.path.exists(temp_path): os.makedirs(temp_path) - pr_info = PRInfo(get_event(), need_changed_files=True) + pr_info = PRInfo(need_changed_files=True) changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") logging.info("Has changed images %s", ', '.join([str(image[0]) for image in changed_images])) pr_commit_version = str(pr_info.number) + '-' + pr_info.sha diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py index 87c327f2776..2daa75f9663 100644 --- a/tests/ci/docs_check.py +++ b/tests/ci/docs_check.py @@ -4,8 +4,10 @@ import subprocess import os import sys from github import Github + +from env_helper import TEMP_PATH, REPO_COPY from s3_helper import S3Helper -from pr_info import PRInfo, get_event +from pr_info import PRInfo from get_robot_token import get_best_robot_token from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version @@ -23,10 +25,10 @@ if __name__ == "__main__": stopwatch = Stopwatch() - temp_path = os.path.join(os.getenv("TEMP_PATH")) - repo_path = os.path.join(os.getenv("REPO_COPY")) + temp_path = TEMP_PATH + repo_path = REPO_COPY - pr_info = PRInfo(get_event(), need_changed_files=True) + pr_info = PRInfo(need_changed_files=True) gh = Github(get_best_robot_token()) @@ -36,7 +38,7 @@ if __name__ == "__main__": sys.exit(0) if not pr_info.has_changes_in_documentation(): - logging.info ("No changes in documentation") + logging.info("No changes in documentation") commit = get_commit(gh, pr_info.sha) commit.create_status(context=NAME, description="No changes in docs", state="success") sys.exit(0) diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py index 4ea2eae5130..90588848f12 100644 --- a/tests/ci/docs_release.py +++ b/tests/ci/docs_release.py @@ -5,8 +5,9 @@ import os from github import Github +from env_helper import TEMP_PATH, REPO_COPY, CLOUDFLARE_TOKEN from s3_helper import S3Helper -from pr_info import PRInfo, get_event +from pr_info import PRInfo from get_robot_token import get_best_robot_token from ssh import SSHKey from upload_result_helper import upload_results @@ -18,10 +19,10 @@ NAME = "Docs Release (actions)" if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - temp_path = os.path.join(os.getenv("TEMP_PATH")) - repo_path = os.path.join(os.getenv("REPO_COPY")) + temp_path = TEMP_PATH + repo_path = REPO_COPY - pr_info = PRInfo(get_event(), need_changed_files=True) + pr_info = PRInfo(need_changed_files=True) gh = Github(get_best_robot_token()) @@ -34,7 +35,7 @@ if __name__ == "__main__": if not os.path.exists(test_output): os.makedirs(test_output) - token = os.getenv('CLOUDFLARE_TOKEN') + token = CLOUDFLARE_TOKEN cmd = "docker run --cap-add=SYS_PTRACE --volume=$SSH_AUTH_SOCK:/ssh-agent -e SSH_AUTH_SOCK=/ssh-agent " \ f"-e CLOUDFLARE_TOKEN={token} --volume={repo_path}:/repo_path --volume={test_output}:/output_path {docker_image}" diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py new file mode 100644 index 00000000000..90178e5c56a --- /dev/null +++ b/tests/ci/env_helper.py @@ -0,0 +1,18 @@ +import os + +CI = bool(os.getenv("CI")) +TEMP_PATH = os.getenv("TEMP_PATH", os.path.abspath(".")) + +CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH) +CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN") +GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH") +GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") +GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID") +GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com") +GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) +IMAGES_PATH = os.getenv("IMAGES_PATH") +REPORTS_PATH = os.getenv("REPORTS_PATH", "./reports") +REPO_COPY = os.getenv("REPO_COPY", os.path.abspath("../../")) +RUNNER_TEMP = os.getenv("RUNNER_TEMP", os.path.abspath("./tmp")) +S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds") +S3_TEST_REPORTS_BUCKET = os.getenv("S3_TEST_REPORTS_BUCKET", "clickhouse-test-reports") diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 26247dfd0b9..0eef886625a 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -7,7 +7,9 @@ import csv import sys from github import Github -from pr_info import PRInfo, get_event + +from env_helper import CACHES_PATH, TEMP_PATH +from pr_info import PRInfo from s3_helper import S3Helper from get_robot_token import get_best_robot_token from upload_result_helper import upload_results @@ -66,13 +68,13 @@ if __name__ == "__main__": stopwatch = Stopwatch() - temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) - caches_path = os.getenv("CACHES_PATH", temp_path) + temp_path = TEMP_PATH + caches_path = CACHES_PATH if not os.path.exists(temp_path): os.makedirs(temp_path) - pr_info = PRInfo(get_event()) + pr_info = PRInfo() gh = Github(get_best_robot_token()) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 576b97058c7..72f26daf4cd 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -1,13 +1,15 @@ #!/usr/bin/env python3 import logging -import os from github import Github -from pr_info import PRInfo, get_event + +from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID +from pr_info import PRInfo from get_robot_token import get_best_robot_token from commit_status_helper import get_commit NAME = 'Run Check (actions)' + def filter_statuses(statuses): """ Squash statuses to latest state @@ -23,14 +25,15 @@ def filter_statuses(statuses): filt[status.context] = status return filt + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - pr_info = PRInfo(get_event(), need_orgs=True) + pr_info = PRInfo(need_orgs=True) gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) - url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" statuses = filter_statuses(list(commit.get_statuses())) if NAME in statuses and statuses[NAME].state == "pending": commit.create_status(context=NAME, description="All checks finished", state="success", target_url=url) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index e7d4d49f3e7..4419ba1c920 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -8,9 +8,10 @@ import sys from github import Github +from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo, get_event +from pr_info import PRInfo from build_download_helper import download_all_deb_packages from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version @@ -126,9 +127,9 @@ if __name__ == "__main__": stopwatch = Stopwatch() - temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) - repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) - reports_path = os.getenv("REPORTS_PATH", "./reports") + temp_path = TEMP_PATH + repo_path = REPO_COPY + reports_path = REPORTS_PATH check_name = sys.argv[1] kill_timeout = int(sys.argv[2]) @@ -136,7 +137,7 @@ if __name__ == "__main__": flaky_check = 'flaky' in check_name.lower() gh = Github(get_best_robot_token()) - pr_info = PRInfo(get_event(), need_changed_files=flaky_check) + pr_info = PRInfo(need_changed_files=flaky_check) if 'RUN_BY_HASH_NUM' in os.environ: run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index e124635667e..20e33f2f2dc 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -9,9 +9,10 @@ import csv from github import Github +from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH from s3_helper import S3Helper from get_robot_token import get_best_robot_token -from pr_info import PRInfo, get_event +from pr_info import PRInfo from build_download_helper import download_all_deb_packages from upload_result_helper import upload_results from docker_pull_helper import get_images_with_versions @@ -102,9 +103,9 @@ if __name__ == "__main__": stopwatch = Stopwatch() - temp_path = os.getenv("TEMP_PATH", os.path.abspath(".")) - repo_path = os.getenv("REPO_COPY", os.path.abspath("../../")) - reports_path = os.getenv("REPORTS_PATH", "./reports") + temp_path = TEMP_PATH + repo_path = REPO_COPY + reports_path = REPORTS_PATH check_name = sys.argv[1] @@ -121,7 +122,7 @@ if __name__ == "__main__": os.makedirs(temp_path) is_flaky_check = 'flaky' in check_name - pr_info = PRInfo(get_event(), need_changed_files=is_flaky_check) + pr_info = PRInfo(need_changed_files=is_flaky_check) gh = Github(get_best_robot_token()) diff --git a/tests/ci/keeper_jepsen_check.py b/tests/ci/keeper_jepsen_check.py new file mode 100644 index 00000000000..2c2b8b4783f --- /dev/null +++ b/tests/ci/keeper_jepsen_check.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 + +import time +import logging +import os +import sys + +import boto3 +from github import Github +import requests + +from env_helper import REPO_COPY, TEMP_PATH +from stopwatch import Stopwatch +from upload_result_helper import upload_results +from s3_helper import S3Helper +from get_robot_token import get_best_robot_token, get_parameter_from_ssm +from pr_info import PRInfo +from compress_files import compress_fast +from commit_status_helper import post_commit_status +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from version_helper import get_version_from_repo +from tee_popen import TeePopen +from ssh import SSHKey +from build_download_helper import get_build_name_for_check +from rerun_helper import RerunHelper + +JEPSEN_GROUP_NAME = 'jepsen_group' +DESIRED_INSTANCE_COUNT = 3 +IMAGE_NAME = 'clickhouse/keeper-jepsen-test' +CHECK_NAME = 'ClickHouse Keeper Jepsen (actions)' + + +SUCCESSFUL_TESTS_ANCHOR = "# Successful tests" +INTERMINATE_TESTS_ANCHOR = "# Indeterminate tests" +CRASHED_TESTS_ANCHOR = "# Crashed tests" +FAILED_TESTS_ANCHOR = "# Failed tests" + +def _parse_jepsen_output(path): + test_results = [] + current_type = '' + with open(path, 'r') as f: + for line in f: + if SUCCESSFUL_TESTS_ANCHOR in line: + current_type = 'OK' + elif INTERMINATE_TESTS_ANCHOR in line or CRASHED_TESTS_ANCHOR in line: + current_type = 'ERROR' + elif FAILED_TESTS_ANCHOR in line: + current_type = 'FAIL' + + if (line.startswith('store/clickhouse-keeper') or line.startswith('clickhouse-keeper')) and current_type: + test_results.append((line.strip(), current_type)) + + return test_results + +def get_autoscaling_group_instances_ids(asg_client, group_name): + group_description = asg_client.describe_auto_scaling_groups(AutoScalingGroupNames=[group_name]) + our_group = group_description['AutoScalingGroups'][0] + instance_ids = [] + for instance in our_group['Instances']: + if instance['LifecycleState'] == 'InService' and instance['HealthStatus'] == 'Healthy': + instance_ids.append(instance['InstanceId']) + + return instance_ids + +def get_instances_addresses(ec2_client, instance_ids): + ec2_response = ec2_client.describe_instances(InstanceIds = instance_ids) + instance_ips = [] + for instances in ec2_response['Reservations']: + for ip in instances['Instances']: + instance_ips.append(ip['PrivateIpAddress']) + return instance_ips + + +def prepare_autoscaling_group_and_get_hostnames(): + asg_client = boto3.client('autoscaling', region_name='us-east-1') + asg_client.set_desired_capacity(AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=DESIRED_INSTANCE_COUNT) + + instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME) + counter = 0 + while len(instances) < DESIRED_INSTANCE_COUNT: + time.sleep(5) + instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME) + counter += 1 + if counter > 30: + raise Exception("Cannot wait autoscaling group") + + ec2_client = boto3.client('ec2', region_name='us-east-1') + return get_instances_addresses(ec2_client, instances) + + +def clear_autoscaling_group(): + asg_client = boto3.client('autoscaling', region_name='us-east-1') + asg_client.set_desired_capacity(AutoScalingGroupName=JEPSEN_GROUP_NAME, DesiredCapacity=0) + instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME) + counter = 0 + while len(instances) > 0: + time.sleep(5) + instances = get_autoscaling_group_instances_ids(asg_client, JEPSEN_GROUP_NAME) + counter += 1 + if counter > 30: + raise Exception("Cannot wait autoscaling group") + + +def save_nodes_to_file(instances, temp_path): + nodes_path = os.path.join(temp_path, "nodes.txt") + with open(nodes_path, 'w') as f: + f.write("\n".join(instances)) + f.flush() + return nodes_path + +def get_run_command(ssh_auth_sock, ssh_sock_dir, pr_info, nodes_path, repo_path, build_url, result_path, docker_image): + return f"docker run --network=host -v '{ssh_sock_dir}:{ssh_sock_dir}' -e SSH_AUTH_SOCK={ssh_auth_sock} " \ + f"-e PR_TO_TEST={pr_info.number} -e SHA_TO_TEST={pr_info.sha} -v '{nodes_path}:/nodes.txt' -v {result_path}:/test_output " \ + f"-e 'CLICKHOUSE_PACKAGE={build_url}' -v '{repo_path}:/ch' -e 'CLICKHOUSE_REPO_PATH=/ch' -e NODES_USERNAME=ubuntu {docker_image}" + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + stopwatch = Stopwatch() + + pr_info = PRInfo() + + logging.info("Start at PR number %s, commit sha %s labels %s", pr_info.number, pr_info.sha, pr_info.labels) + + if pr_info.number != 0 and 'jepsen-test' not in pr_info.labels(): + logging.info("Not jepsen test label in labels list, skipping") + sys.exit(0) + + gh = Github(get_best_robot_token()) + + rerun_helper = RerunHelper(gh, pr_info, CHECK_NAME) + if rerun_helper.is_already_finished_by_status(): + logging.info("Check is already finished according to github status, exiting") + sys.exit(0) + + if not os.path.exists(TEMP_PATH): + os.makedirs(TEMP_PATH) + + result_path = os.path.join(TEMP_PATH, "result_path") + if not os.path.exists(result_path): + os.makedirs(result_path) + + instances = prepare_autoscaling_group_and_get_hostnames() + nodes_path = save_nodes_to_file(instances, TEMP_PATH) + + # always use latest + docker_image = IMAGE_NAME + + build_name = get_build_name_for_check(CHECK_NAME) + + if pr_info.number == 0: + version = get_version_from_repo(REPO_COPY) + release_or_pr = ".".join(version.as_tuple()[:2]) + else: + # PR number for anything else + release_or_pr = str(pr_info.number) + + # This check run separately from other checks because it requires exclusive + # run (see .github/workflows/jepsen.yml) So we cannot add explicit + # dependency on a build job and using busy loop on it's results. For the + # same reason we are using latest docker image. + build_url = f"https://s3.amazonaws.com/clickhouse-builds/{release_or_pr}/{pr_info.sha}/{build_name}/clickhouse" + head = requests.head(build_url) + counter = 0 + while head.status_code != 200: + time.sleep(10) + head = requests.head(build_url) + counter += 1 + if counter >= 180: + post_commit_status(gh, pr_info.sha, CHECK_NAME, "Cannot fetch build to run", "error", "") + raise Exception("Cannot fetch build") + + with SSHKey(key_value=get_parameter_from_ssm("jepsen_ssh_key") + '\n'): + ssh_auth_sock = os.environ['SSH_AUTH_SOCK'] + auth_sock_dir = os.path.dirname(ssh_auth_sock) + cmd = get_run_command(ssh_auth_sock, auth_sock_dir, pr_info, nodes_path, REPO_COPY, build_url, result_path, docker_image) + logging.info("Going to run jepsen: %s", cmd) + + run_log_path = os.path.join(TEMP_PATH, "runlog.log") + + with TeePopen(cmd, run_log_path) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") + + status = 'success' + description = 'No invalid analysis found ヽ(‘ー`)ノ' + jepsen_log_path = os.path.join(result_path, 'jepsen_run_all_tests.log') + additional_data = [] + try: + test_result = _parse_jepsen_output(jepsen_log_path) + if any(r[1] == 'FAIL' for r in test_result): + status = 'failure' + description = 'Found invalid analysis (ノಥ益ಥ)ノ ┻━┻' + + compress_fast(os.path.join(result_path, 'store'), os.path.join(result_path, 'jepsen_store.tar.gz')) + additional_data.append(os.path.join(result_path, 'jepsen_store.tar.gz')) + except Exception as ex: + print("Exception", ex) + status = 'failure' + description = 'No Jepsen output log' + test_result = [('No Jepsen output log', 'FAIL')] + + s3_helper = S3Helper('https://s3.amazonaws.com') + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_result, [run_log_path] + additional_data, CHECK_NAME) + + print(f"::notice ::Report url: {report_url}") + post_commit_status(gh, pr_info.sha, CHECK_NAME, description, status, report_url) + + ch_helper = ClickHouseHelper() + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_result, status, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, CHECK_NAME) + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) + clear_autoscaling_group() diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 88d4595bc66..812834824b7 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -6,11 +6,13 @@ import urllib import requests from unidiff import PatchSet +from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL, GITHUB_RUN_ID, GITHUB_EVENT_PATH -DIFF_IN_DOCUMENTATION_EXT = [".html", ".md", ".yml", ".txt", ".css", ".js", ".xml", ".ico", ".conf", ".svg", ".png", ".jpg", ".py", ".sh", ".json"] +DIFF_IN_DOCUMENTATION_EXT = [".html", ".md", ".yml", ".txt", ".css", ".js", ".xml", ".ico", ".conf", ".svg", ".png", + ".jpg", ".py", ".sh", ".json"] def get_pr_for_commit(sha, ref): - try_get_pr_url = f"https://api.github.com/repos/{os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse')}/commits/{sha}/pulls" + try_get_pr_url = f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{sha}/pulls" try: response = requests.get(try_get_pr_url) response.raise_for_status() @@ -22,7 +24,7 @@ def get_pr_for_commit(sha, ref): # refs for RPs looks like XX if pr['head']['ref'] in ref: return pr - print ("Cannot find PR with required ref", ref, "returning first one") + print("Cannot find PR with required ref", ref, "returning first one") first_pr = data[0] return first_pr except Exception as ex: @@ -30,24 +32,35 @@ def get_pr_for_commit(sha, ref): return None -def get_event(): - with open(os.getenv('GITHUB_EVENT_PATH'), 'r', encoding='utf-8') as ef: - return json.load(ef) - - class PRInfo: - def __init__(self, github_event, need_orgs=False, need_changed_files=False): - if 'pull_request' in github_event: # pull request and other similar events - self.number = github_event['number'] + def __init__(self, github_event=None, need_orgs=False, need_changed_files=False, labels_from_api=False): + if not github_event: + if GITHUB_EVENT_PATH: + with open(GITHUB_EVENT_PATH, 'r', encoding='utf-8') as event_file: + github_event = json.load(event_file) + else: + github_event = {'commits': 1, 'after': 'HEAD', 'ref': None} + self.event = github_event + self.changed_files = set([]) + + # workflow completed event, used for PRs only + if 'action' in github_event and github_event['action'] == 'completed': + self.sha = github_event['workflow_run']['head_sha'] + prs_for_sha = requests.get(f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{self.sha}/pulls").json() + if len(prs_for_sha) != 0: + github_event['pull_request'] = prs_for_sha[0] + + if 'pull_request' in github_event: # pull request and other similar events + self.number = github_event['pull_request']['number'] if 'after' in github_event: self.sha = github_event['after'] else: self.sha = github_event['pull_request']['head']['sha'] - repo_prefix = f"{os.getenv('GITHUB_SERVER_URL', 'https://github.com')}/{os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse')}" - self.task_url = f"{repo_prefix}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" + self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" - self.repo_full_name = os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse') + self.repo_full_name = GITHUB_REPOSITORY self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.pr_html_url = f"{repo_prefix}/pull/{self.number}" @@ -56,7 +69,12 @@ class PRInfo: self.head_ref = github_event['pull_request']['head']['ref'] self.head_name = github_event['pull_request']['head']['repo']['full_name'] - self.labels = { l['name'] for l in github_event['pull_request']['labels'] } + if labels_from_api: + response = requests.get(f"https://api.github.com/repos/{GITHUB_REPOSITORY}/issues/{self.number}/labels") + self.labels = {l['name'] for l in response.json()} + else: + self.labels = {l['name'] for l in github_event['pull_request']['labels']} + self.user_login = github_event['pull_request']['user']['login'] self.user_orgs = set([]) if need_orgs: @@ -65,21 +83,15 @@ class PRInfo: response_json = user_orgs_response.json() self.user_orgs = set(org['id'] for org in response_json) - self.changed_files = set([]) - if need_changed_files: - diff_url = github_event['pull_request']['diff_url'] - diff = urllib.request.urlopen(diff_url) - diff_object = PatchSet(diff, diff.headers.get_charsets()[0]) - self.changed_files = { f.path for f in diff_object } - + self.diff_url = github_event['pull_request']['diff_url'] elif 'commits' in github_event: self.sha = github_event['after'] pull_request = get_pr_for_commit(self.sha, github_event['ref']) - repo_prefix = f"{os.getenv('GITHUB_SERVER_URL', 'https://github.com')}/{os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse')}" - self.task_url = f"{repo_prefix}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" + self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" - self.repo_full_name = os.getenv('GITHUB_REPOSITORY', 'ClickHouse/ClickHouse') - if pull_request is None or pull_request['state'] == 'closed': # it's merged PR to master + self.repo_full_name = GITHUB_REPOSITORY + if pull_request is None or pull_request['state'] == 'closed': # it's merged PR to master self.number = 0 self.labels = {} self.pr_html_url = f"{repo_prefix}/commits/master" @@ -87,40 +99,58 @@ class PRInfo: self.base_name = self.repo_full_name self.head_ref = "master" self.head_name = self.repo_full_name + self.diff_url = \ + f"https://api.github.com/repos/{GITHUB_REPOSITORY}/compare/{github_event['before']}...{self.sha}" else: self.number = pull_request['number'] - self.labels = { l['name'] for l in pull_request['labels'] } + if labels_from_api: + response = requests.get(f"https://api.github.com/repos/{GITHUB_REPOSITORY}/issues/{self.number}/labels") + self.labels = {l['name'] for l in response.json()} + else: + self.labels = {l['name'] for l in pull_request['labels']} + self.base_ref = pull_request['base']['ref'] self.base_name = pull_request['base']['repo']['full_name'] self.head_ref = pull_request['head']['ref'] self.head_name = pull_request['head']['repo']['full_name'] self.pr_html_url = pull_request['html_url'] - - if need_changed_files: - if self.number == 0: - commit_before = github_event['before'] - response = requests.get(f"https://api.github.com/repos/{os.getenv('GITHUB_REPOSITORY')}/compare/{commit_before}...{self.sha}") - response.raise_for_status() - diff = response.json() - - if 'files' in diff: - self.changed_files = [f['filename'] for f in diff['files']] - else: - self.changed_files = set([]) + if 'pr-backport' in self.labels: + self.diff_url = f"https://github.com/{GITHUB_REPOSITORY}/compare/master...{self.head_ref}.diff" else: - if 'pr-backport' in self.labels: - diff_url = f"https://github.com/{os.getenv('GITHUB_REPOSITORY')}/compare/master...{self.head_ref}.diff" - else: - diff_url = pull_request['diff_url'] - - diff = urllib.request.urlopen(diff_url) - diff_object = PatchSet(diff, diff.headers.get_charsets()[0]) - self.changed_files = { f.path for f in diff_object } - else: - self.changed_files = set([]) + self.diff_url = pull_request['diff_url'] else: - raise Exception("Cannot detect type of event") + print(json.dumps(github_event, sort_keys=True, indent=4)) + self.sha = os.getenv("GITHUB_SHA") + self.number = 0 + self.labels = {} + repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" + self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" + self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" + self.repo_full_name = GITHUB_REPOSITORY + self.pr_html_url = f"{repo_prefix}/commits/master" + self.base_ref = "master" + self.base_name = self.repo_full_name + self.head_ref = "master" + self.head_name = self.repo_full_name + if need_changed_files: + self.fetch_changed_files() + + def fetch_changed_files(self): + if not self.diff_url: + raise Exception("Diff URL cannot be find for event") + + if 'commits' in self.event and self.number == 0: + response = requests.get(self.diff_url) + response.raise_for_status() + diff = response.json() + + if 'files' in diff: + self.changed_files = [f['filename'] for f in diff['files']] + else: + diff = urllib.request.urlopen(self.diff_url) + diff_object = PatchSet(diff, diff.headers.get_charsets()[0]) + self.changed_files = {f.path for f in diff_object} def get_dict(self): return { diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index aa4a130902b..af543211c16 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -7,8 +7,10 @@ import json import logging import sys from github import Github + +from env_helper import REPO_COPY, TEMP_PATH, GITHUB_RUN_ID, GITHUB_REPOSITORY, GITHUB_SERVER_URL from s3_helper import S3Helper -from pr_info import PRInfo, get_event +from pr_info import PRInfo from get_robot_token import get_best_robot_token, get_parameter_from_ssm from upload_result_helper import upload_results from commit_status_helper import get_commit @@ -22,6 +24,7 @@ LICENCE_NAME = 'Free license: ClickHouse, Yandex' HTML_REPORT_FOLDER = 'pvs-studio-html-report' TXT_REPORT_NAME = 'pvs-studio-task-report.txt' + def _process_txt_report(path): warnings = [] errors = [] @@ -37,15 +40,16 @@ def _process_txt_report(path): return warnings, errors + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() - repo_path = os.path.join(os.getenv("REPO_COPY", os.path.abspath("../../"))) - temp_path = os.path.join(os.getenv("TEMP_PATH")) + repo_path = REPO_COPY + temp_path = TEMP_PATH - pr_info = PRInfo(get_event()) + pr_info = PRInfo() # this check modify repository so copy it to the temp directory logging.info("Repo copy path %s", repo_path) @@ -83,7 +87,8 @@ if __name__ == "__main__": logging.info("Run Ok") if retcode != 0: - commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + commit.create_status(context=NAME, description='PVS report failed to build', state='error', + target_url=f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}") sys.exit(1) try: @@ -97,8 +102,8 @@ if __name__ == "__main__": break if not index_html: - commit.create_status(context=NAME, description='PVS report failed to build', state='failure', - target_url=f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + commit.create_status(context=NAME, description='PVS report failed to build', state='error', + target_url=f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}") sys.exit(1) txt_report = os.path.join(temp_path, TXT_REPORT_NAME) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 176a0f4d4c1..692cda18f20 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -1,9 +1,10 @@ #!/usr/bin/env python3 -import os import sys import logging from github import Github -from pr_info import PRInfo, get_event + +from env_helper import GITHUB_RUN_ID, GITHUB_REPOSITORY, GITHUB_SERVER_URL +from pr_info import PRInfo from get_robot_token import get_best_robot_token from commit_status_helper import get_commit @@ -34,6 +35,7 @@ TRUSTED_CONTRIBUTORS = {e.lower() for e in [ "bobrik", # Seasoned contributor, CloundFlare "BohuTANG", "codyrobert", # Flickerbox engineer + "cwurm", # Employee "damozhaeva", # DOCSUP "den-crane", "flickerbox-tom", # Flickerbox @@ -65,6 +67,7 @@ TRUSTED_CONTRIBUTORS = {e.lower() for e in [ "vzakaznikov", "YiuRULE", "zlobober", # Developer of YT + "ilejn", # Arenadata, responsible for Kerberized Kafka ]} @@ -87,6 +90,7 @@ def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): # can be skipped entirely. def should_run_checks_for_pr(pr_info): # Consider the labels and whether the user is trusted. + print("Got labels", pr_info.labels) force_labels = set(['force tests']).intersection(pr_info.labels) if force_labels: return True, "Labeled '{}'".format(', '.join(force_labels)) @@ -102,14 +106,15 @@ def should_run_checks_for_pr(pr_info): return True, "No special conditions apply" + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - pr_info = PRInfo(get_event(), need_orgs=True) + pr_info = PRInfo(need_orgs=True, labels_from_api=True) can_run, description = should_run_checks_for_pr(pr_info) gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) - url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" if not can_run: print("::notice ::Cannot run") commit.create_status(context=NAME, description=description, state="failure", target_url=url) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 7c1ee8ad9ee..753f036a8d7 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -2,10 +2,17 @@ import hashlib import logging import os +import re +import shutil +import time from multiprocessing.dummy import Pool + import boto3 + +from env_helper import S3_TEST_REPORTS_BUCKET, S3_BUILDS_BUCKET, RUNNER_TEMP, CI from compress_files import compress_file_fast + def _md5(fname): hash_md5 = hashlib.md5() with open(fname, "rb") as f: @@ -25,7 +32,7 @@ def _flatten_list(lst): return result -class S3Helper(): +class S3Helper: def __init__(self, host): self.session = boto3.session.Session(region_name='us-east-1') self.client = self.session.client('s3', endpoint_url=host) @@ -49,9 +56,7 @@ class S3Helper(): else: logging.info("No content type provied for %s", file_path) else: - is_log = s3_path.endswith("log") or ".log." in s3_path - is_text = s3_path.endswith("txt") or is_log or s3_path.endswith("err") or s3_path.endswith("out") - if not s3_path.endswith('.gz') and (is_text or is_log): + if re.search(r'\.(txt|log|err|out)$', s3_path) or re.search(r'\.log\..*(? /etc/docker/daemon.json { "ipv6": true, - "fixed-cidr-v6": "2001:db8:1::/64" + "fixed-cidr-v6": "2001:db8:1::/64", + "insecure-registries" : ["dockerhub-proxy.dockerhub-proxy-zone:5000"], + "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] } EOT diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 0c2768b9ac3..f2502f605af 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -23,7 +23,7 @@ SUSPICIOUS_PATTERNS = [ ] MAX_RETRY = 5 -MAX_WORKFLOW_RERUN = 5 +MAX_WORKFLOW_RERUN = 7 WorkflowDescription = namedtuple('WorkflowDescription', ['name', 'action', 'run_id', 'event', 'workflow_id', 'conclusion', 'status', 'api_url', @@ -44,6 +44,7 @@ NEED_RERUN_WORKFLOWS = { 15834118, # Docs 15522500, # MasterCI 15516108, # ReleaseCI + 15797242, # BackportPR } # Individual trusted contirbutors who are not in any trusted organization. @@ -61,6 +62,7 @@ TRUSTED_CONTRIBUTORS = {e.lower() for e in [ "bharatnc", # Newbie, but already with many contributions. "bobrik", # Seasoned contributor, CloundFlare "BohuTANG", + "cwurm", # Employee "damozhaeva", # DOCSUP "den-crane", "gyuton", # DOCSUP diff --git a/tests/clickhouse-test b/tests/clickhouse-test index a75e0a2b4ea..cb8d5914362 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -318,6 +318,7 @@ class FailureReason(enum.Enum): EXCEPTION = "having having exception in stdout: " RESULT_DIFF = "result differs with reference: " TOO_LONG = "Test runs too long (> 60s). Make it faster." + INTERNAL_QUERY_FAIL = "Internal query (CREATE/DROP DATABASE) failed:" # SKIPPED reasons DISABLED = "disabled" @@ -355,6 +356,14 @@ class TestResult: class TestCase: + @staticmethod + def get_description_from_exception_info(exc_info): + exc_type, exc_value, tb = exc_info + exc_name = exc_type.__name__ + traceback_str = "\n".join(traceback.format_tb(tb, 10)) + description = f"\n{exc_name}\n{exc_value}\n{traceback_str}" + return description + @staticmethod def get_reference_file(suite_dir, name): """ @@ -662,13 +671,21 @@ class TestCase: return result except KeyboardInterrupt as e: raise e + except HTTPError: + return TestResult(self.name, TestStatus.FAIL, + FailureReason.INTERNAL_QUERY_FAIL, + 0., + self.get_description_from_exception_info(sys.exc_info())) + except (ConnectionRefusedError, ConnectionResetError): + return TestResult(self.name, TestStatus.FAIL, + FailureReason.SERVER_DIED, + 0., + self.get_description_from_exception_info(sys.exc_info())) except: - exc_type, exc_value, tb = sys.exc_info() - exc_name = exc_type.__name__ - traceback_str = "\n".join(traceback.format_tb(tb, 10)) - description = f"{exc_name}\n{exc_value}\n{traceback_str}" - return TestResult(self.name, TestStatus.UNKNOWN, FailureReason.INTERNAL_ERROR, 0., description) - + return TestResult(self.name, TestStatus.UNKNOWN, + FailureReason.INTERNAL_ERROR, + 0., + self.get_description_from_exception_info(sys.exc_info())) class TestSuite: @staticmethod diff --git a/tests/config/executable_pool_dictionary.xml b/tests/config/executable_pool_dictionary.xml index 13f34f0048e..212552a6776 100644 --- a/tests/config/executable_pool_dictionary.xml +++ b/tests/config/executable_pool_dictionary.xml @@ -61,10 +61,11 @@ - + TabSeparated while read read_data; do printf "$read_data\tvalue a\tvalue b\n"; done - + 5 + diff --git a/tests/config/test_function.xml b/tests/config/test_function.xml index 2e31c9677ec..928cbd75c78 100644 --- a/tests/config/test_function.xml +++ b/tests/config/test_function.xml @@ -11,6 +11,6 @@ TabSeparated cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table" - 0 + 0 diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index c8745294c5b..830b8e149f6 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -630,7 +630,7 @@ class ClickhouseIntegrationTestsRunner: random.shuffle(items_to_run) for group, tests in items_to_run: - logging.info("Running test group %s countaining %s tests", group, len(tests)) + logging.info("Running test group %s containing %s tests", group, len(tests)) group_counters, group_test_times, log_paths = self.try_run_test_group(repo_path, group, tests, MAX_RETRY, NUM_WORKERS) total_tests = 0 for counter, value in group_counters.items(): diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 9461f4a81c5..4b0a9a2835b 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -8,12 +8,12 @@ from helpers.network import _NetworkManager @pytest.fixture(autouse=True, scope="session") def cleanup_environment(): try: - if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS")) == 1: + if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) == 1: logging.debug(f"Cleaning all iptables rules") _NetworkManager.clean_all_user_iptables_rules() result = run_and_check(['docker ps | wc -l'], shell=True) if int(result) > 1: - if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS")) != 1: + if int(os.environ.get("PYTEST_CLEANUP_CONTAINERS", 0)) != 1: logging.warning(f"Docker containters({int(result)}) are running before tests run. They can be left from previous pytest run and cause test failures.\n"\ "You can set env PYTEST_CLEANUP_CONTAINERS=1 or use runner with --cleanup-containers argument to enable automatic containers cleanup.") else: diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 96c44a9bbf6..0c513f68c32 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1476,6 +1476,18 @@ class ClickHouseCluster: common_opts = ['--verbose', 'up', '-d'] + images_pull_cmd = self.base_cmd + ['pull'] + # sometimes dockerhub/proxy can be flaky + for i in range(5): + try: + run_and_check(images_pull_cmd) + break + except Exception as ex: + if i == 4: + raise ex + logging.info("Got exception pulling images: %s", ex) + time.sleep(i * 3) + if self.with_zookeeper_secure and self.base_zookeeper_cmd: logging.debug('Setup ZooKeeper Secure') logging.debug(f'Creating internal ZooKeeper dirs: {self.zookeeper_dirs_to_create}') @@ -1527,7 +1539,7 @@ class ClickHouseCluster: if os.path.exists(self.mysql_dir): shutil.rmtree(self.mysql_dir) os.makedirs(self.mysql_logs_dir) - os.chmod(self.mysql_logs_dir, stat.S_IRWXO) + os.chmod(self.mysql_logs_dir, stat.S_IRWXU | stat.S_IRWXO) subprocess_check_call(self.base_mysql_cmd + common_opts) self.up_called = True self.wait_mysql_to_start() @@ -1537,7 +1549,7 @@ class ClickHouseCluster: if os.path.exists(self.mysql8_dir): shutil.rmtree(self.mysql8_dir) os.makedirs(self.mysql8_logs_dir) - os.chmod(self.mysql8_logs_dir, stat.S_IRWXO) + os.chmod(self.mysql8_logs_dir, stat.S_IRWXU | stat.S_IRWXO) subprocess_check_call(self.base_mysql8_cmd + common_opts) self.wait_mysql8_to_start() @@ -1546,7 +1558,7 @@ class ClickHouseCluster: if os.path.exists(self.mysql_cluster_dir): shutil.rmtree(self.mysql_cluster_dir) os.makedirs(self.mysql_cluster_logs_dir) - os.chmod(self.mysql_cluster_logs_dir, stat.S_IRWXO) + os.chmod(self.mysql_cluster_logs_dir, stat.S_IRWXU | stat.S_IRWXO) subprocess_check_call(self.base_mysql_cluster_cmd + common_opts) self.up_called = True @@ -1557,7 +1569,7 @@ class ClickHouseCluster: if os.path.exists(self.postgres_dir): shutil.rmtree(self.postgres_dir) os.makedirs(self.postgres_logs_dir) - os.chmod(self.postgres_logs_dir, stat.S_IRWXO) + os.chmod(self.postgres_logs_dir, stat.S_IRWXU | stat.S_IRWXO) subprocess_check_call(self.base_postgres_cmd + common_opts) self.up_called = True @@ -1566,11 +1578,11 @@ class ClickHouseCluster: if self.with_postgres_cluster and self.base_postgres_cluster_cmd: print('Setup Postgres') os.makedirs(self.postgres2_logs_dir) - os.chmod(self.postgres2_logs_dir, stat.S_IRWXO) + os.chmod(self.postgres2_logs_dir, stat.S_IRWXU | stat.S_IRWXO) os.makedirs(self.postgres3_logs_dir) - os.chmod(self.postgres3_logs_dir, stat.S_IRWXO) + os.chmod(self.postgres3_logs_dir, stat.S_IRWXU | stat.S_IRWXO) os.makedirs(self.postgres4_logs_dir) - os.chmod(self.postgres4_logs_dir, stat.S_IRWXO) + os.chmod(self.postgres4_logs_dir, stat.S_IRWXU | stat.S_IRWXO) subprocess_check_call(self.base_postgres_cluster_cmd + common_opts) self.up_called = True self.wait_postgres_cluster_to_start() @@ -1591,7 +1603,7 @@ class ClickHouseCluster: if self.with_rabbitmq and self.base_rabbitmq_cmd: logging.debug('Setup RabbitMQ') os.makedirs(self.rabbitmq_logs_dir) - os.chmod(self.rabbitmq_logs_dir, stat.S_IRWXO) + os.chmod(self.rabbitmq_logs_dir, stat.S_IRWXU | stat.S_IRWXO) for i in range(5): subprocess_check_call(self.base_rabbitmq_cmd + common_opts + ['--renew-anon-volumes']) @@ -1604,7 +1616,7 @@ class ClickHouseCluster: if self.with_hdfs and self.base_hdfs_cmd: logging.debug('Setup HDFS') os.makedirs(self.hdfs_logs_dir) - os.chmod(self.hdfs_logs_dir, stat.S_IRWXO) + os.chmod(self.hdfs_logs_dir, stat.S_IRWXU | stat.S_IRWXO) subprocess_check_call(self.base_hdfs_cmd + common_opts) self.up_called = True self.make_hdfs_api() @@ -1613,7 +1625,7 @@ class ClickHouseCluster: if self.with_kerberized_hdfs and self.base_kerberized_hdfs_cmd: logging.debug('Setup kerberized HDFS') os.makedirs(self.hdfs_kerberized_logs_dir) - os.chmod(self.hdfs_kerberized_logs_dir, stat.S_IRWXO) + os.chmod(self.hdfs_kerberized_logs_dir, stat.S_IRWXU | stat.S_IRWXO) run_and_check(self.base_kerberized_hdfs_cmd + common_opts) self.up_called = True self.make_hdfs_api(kerberized=True) @@ -1669,7 +1681,7 @@ class ClickHouseCluster: if self.with_jdbc_bridge and self.base_jdbc_bridge_cmd: os.makedirs(self.jdbc_driver_logs_dir) - os.chmod(self.jdbc_driver_logs_dir, stat.S_IRWXO) + os.chmod(self.jdbc_driver_logs_dir, stat.S_IRWXU | stat.S_IRWXO) subprocess_check_call(self.base_jdbc_bridge_cmd + ['up', '-d']) self.up_called = True @@ -2043,7 +2055,8 @@ class ClickHouseInstance: user=user, password=password, database=database) # Connects to the instance via HTTP interface, sends a query and returns the answer - def http_query(self, sql, data=None, params=None, user=None, password=None, expect_fail_and_get_error=False): + def http_query(self, sql, data=None, params=None, user=None, password=None, expect_fail_and_get_error=False, + port=8123, timeout=None, retry_strategy=None): logging.debug(f"Executing query {sql} on {self.name} via HTTP interface") if params is None: params = {} @@ -2057,12 +2070,19 @@ class ClickHouseInstance: auth = requests.auth.HTTPBasicAuth(user, password) elif user: auth = requests.auth.HTTPBasicAuth(user, '') - url = "http://" + self.ip_address + ":8123/?" + urllib.parse.urlencode(params) + url = f"http://{self.ip_address}:{port}/?" + urllib.parse.urlencode(params) - if data: - r = requests.post(url, data, auth=auth) + if retry_strategy is None: + requester = requests else: - r = requests.get(url, auth=auth) + adapter = requests.adapters.HTTPAdapter(max_retries=retry_strategy) + requester = requests.Session() + requester.mount("https://", adapter) + requester.mount("http://", adapter) + if data: + r = requester.post(url, data, auth=auth, timeout=timeout) + else: + r = requester.get(url, auth=auth, timeout=timeout) def http_code_and_message(): code = r.status_code diff --git a/tests/integration/pytest.ini b/tests/integration/pytest.ini index 2238b173227..2a57ea5a229 100644 --- a/tests/integration/pytest.ini +++ b/tests/integration/pytest.ini @@ -1,7 +1,7 @@ [pytest] python_files = test*.py norecursedirs = _instances* -timeout = 1800 +timeout = 900 junit_duration_report = call junit_suite_name = integration log_level = DEBUG diff --git a/tests/integration/test_async_drain_connection/test.py b/tests/integration/test_async_drain_connection/test.py index 21f9b142e7a..40d78ebbe7c 100644 --- a/tests/integration/test_async_drain_connection/test.py +++ b/tests/integration/test_async_drain_connection/test.py @@ -1,21 +1,21 @@ -import os -import sys -import time -from multiprocessing.dummy import Pool +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument + import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node", main_configs=["configs/config.xml"]) +node = cluster.add_instance('node', main_configs=['configs/config.xml']) -@pytest.fixture(scope="module") +@pytest.fixture(scope='module') def started_cluster(): try: cluster.start() - node.query( - 'create table t (number UInt64) engine = Distributed(test_cluster_two_shards, system, numbers);' - ) + node.query(""" + create table t (number UInt64) + engine = Distributed(test_cluster_two_shards, system, numbers) + """) yield cluster finally: @@ -23,14 +23,14 @@ def started_cluster(): def test_filled_async_drain_connection_pool(started_cluster): - busy_pool = Pool(10) - - def execute_query(i): + def execute_queries(_): for _ in range(100): - node.query('select * from t where number = 0 limit 2;', - settings={ - "sleep_in_receive_cancel_ms": 10000000, - "max_execution_time": 5 - }) + node.query('select * from t where number = 0 limit 2', settings={ + 'sleep_in_receive_cancel_ms': int(10e6), + 'max_execution_time': 5, + # decrease drain_timeout to make test more stable + # (another way is to increase max_execution_time, but this will make test slower) + 'drain_timeout': 1, + }) - p = busy_pool.map(execute_query, range(10)) + any(map(execute_queries, range(10))) diff --git a/tests/integration/test_blob_storage_zero_copy_replication/__init__.py b/tests/integration/test_azure_blob_storage_zero_copy_replication/__init__.py similarity index 100% rename from tests/integration/test_blob_storage_zero_copy_replication/__init__.py rename to tests/integration/test_azure_blob_storage_zero_copy_replication/__init__.py diff --git a/tests/integration/test_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml b/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml similarity index 96% rename from tests/integration/test_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml rename to tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml index 73eae98a80d..4235083f5ca 100644 --- a/tests/integration/test_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml +++ b/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml @@ -2,7 +2,7 @@ - blob_storage + azure_blob_storage http://azurite1:10000/devstoreaccount1 cont false diff --git a/tests/integration/test_blob_storage_zero_copy_replication/test.py b/tests/integration/test_azure_blob_storage_zero_copy_replication/test.py similarity index 100% rename from tests/integration/test_blob_storage_zero_copy_replication/test.py rename to tests/integration/test_azure_blob_storage_zero_copy_replication/test.py diff --git a/tests/integration/test_merge_tree_blob_storage/__init__.py b/tests/integration/test_cluster_discovery/__init__.py similarity index 100% rename from tests/integration/test_merge_tree_blob_storage/__init__.py rename to tests/integration/test_cluster_discovery/__init__.py diff --git a/tests/integration/test_cluster_discovery/config/config.xml b/tests/integration/test_cluster_discovery/config/config.xml new file mode 100644 index 00000000000..70cb010fe0e --- /dev/null +++ b/tests/integration/test_cluster_discovery/config/config.xml @@ -0,0 +1,23 @@ + + 1 + + + + /clickhouse/discovery/test_auto_cluster + + + + + + + node1 + 9000 + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_cluster_discovery/config/config_shard1.xml b/tests/integration/test_cluster_discovery/config/config_shard1.xml new file mode 100644 index 00000000000..06a77a37263 --- /dev/null +++ b/tests/integration/test_cluster_discovery/config/config_shard1.xml @@ -0,0 +1,24 @@ + + 1 + + + + /clickhouse/discovery/test_auto_cluster + 1 + + + + + + + node1 + 9000 + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_cluster_discovery/config/config_shard3.xml b/tests/integration/test_cluster_discovery/config/config_shard3.xml new file mode 100644 index 00000000000..ab66fdc2ab7 --- /dev/null +++ b/tests/integration/test_cluster_discovery/config/config_shard3.xml @@ -0,0 +1,24 @@ + + 1 + + + + /clickhouse/discovery/test_auto_cluster + 3 + + + + + + + node1 + 9000 + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_cluster_discovery/test.py b/tests/integration/test_cluster_discovery/test.py new file mode 100644 index 00000000000..acddd855040 --- /dev/null +++ b/tests/integration/test_cluster_discovery/test.py @@ -0,0 +1,81 @@ +import pytest + +import functools +import time + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +shard_configs = { + i: f'config/config_shard{i}.xml' + for i in [1, 3] +} + +nodes = [ + cluster.add_instance( + f'node{i}', + main_configs=[shard_configs.get(i, 'config/config.xml')], + stay_alive=True, + with_zookeeper=True + ) for i in range(5) +] + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def check_on_cluster(nodes, expected, *, what, cluster_name='test_auto_cluster', msg=None, retries=5): + """ + Select data from `system.clusters` on specified nodes and check the result + """ + assert 1 <= retries <= 6 + + for retry in range(1, retries + 1): + nodes_res = { + node.name: int(node.query(f"SELECT {what} FROM system.clusters WHERE cluster = '{cluster_name}'")) + for node in nodes + } + if all(actual == expected for actual in nodes_res.values()): + break + + if retry != retries: + time.sleep(2 ** retry) + else: + msg = msg or f"Wrong '{what}' result" + raise Exception(f'{msg}: {nodes_res}, expected: {expected} (after {retries} retries)') + + +def test_cluster_discovery_startup_and_stop(start_cluster): + """ + Start cluster, check nodes count in system.clusters, + then stop/start some nodes and check that it (dis)appeared in cluster. + """ + + check_nodes_count = functools.partial(check_on_cluster, what='count()', msg='Wrong nodes count in cluster') + check_shard_num = functools.partial(check_on_cluster, what='count(DISTINCT shard_num)', msg='Wrong shard_num count in cluster') + + total_shards = len(shard_configs) + 1 + check_nodes_count([nodes[0], nodes[2]], len(nodes)) + check_shard_num([nodes[0], nodes[2]], total_shards) + + nodes[1].stop_clickhouse(kill=True) + check_nodes_count([nodes[0], nodes[2]], len(nodes) - 1) + check_shard_num([nodes[0], nodes[2]], total_shards - 1) + + nodes[3].stop_clickhouse() + check_nodes_count([nodes[0], nodes[2]], len(nodes) - 2) + + nodes[1].start_clickhouse() + check_nodes_count([nodes[0], nodes[2]], len(nodes) - 1) + + nodes[3].start_clickhouse() + check_nodes_count([nodes[0], nodes[2]], len(nodes)) + + check_nodes_count([nodes[1], nodes[2]], 2, cluster_name='two_shards', retries=1) diff --git a/tests/integration/test_executable_dictionary/__init__.py b/tests/integration/test_executable_dictionary/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_executable_dictionary/config/dictionaries_config.xml b/tests/integration/test_executable_dictionary/config/dictionaries_config.xml new file mode 100644 index 00000000000..3cbf717bb67 --- /dev/null +++ b/tests/integration/test_executable_dictionary/config/dictionaries_config.xml @@ -0,0 +1,2 @@ + + diff --git a/tests/integration/test_executable_dictionary/dictionaries/executable_input_argument_python_dictionary.xml b/tests/integration/test_executable_dictionary/dictionaries/executable_input_argument_python_dictionary.xml new file mode 100644 index 00000000000..ddbb8e95abb --- /dev/null +++ b/tests/integration/test_executable_dictionary/dictionaries/executable_input_argument_python_dictionary.xml @@ -0,0 +1,99 @@ + + + executable_input_argument_python + + + TabSeparated + input_argument.py 1 + 1 + + + + + + + + input + + + result + String + + + + + + + executable_input_argument_pool_python + + + TabSeparated + input_argument.py 1 + 1 + + + + + + + + input + + + result + String + + + + + + + executable_implicit_input_argument_python + + + TabSeparated + input_implicit_argument.py 1 + 1 + 1 + + + + + + + + input + + + result + String + + + + + + + executable_implicit_input_argument_pool_python + + + TabSeparated + input_implicit_argument.py 1 + 1 + 1 + + + + + + + + input + + + result + String + + + + + \ No newline at end of file diff --git a/tests/integration/test_executable_dictionary/dictionaries/executable_input_bash_dictionary.xml b/tests/integration/test_executable_dictionary/dictionaries/executable_input_bash_dictionary.xml new file mode 100644 index 00000000000..488a12de115 --- /dev/null +++ b/tests/integration/test_executable_dictionary/dictionaries/executable_input_bash_dictionary.xml @@ -0,0 +1,99 @@ + + + executable_input_bash + + + TabSeparated + input.sh + 1 + + + + + + + + input + + + result + String + + + + + + + executable_input_pool_bash + + + TabSeparated + input.sh + 1 + + + + + + + + input + + + result + String + + + + + + + executable_implicit_input_bash + + + TabSeparated + input_implicit.sh + 1 + 1 + + + + + + + + input + + + result + String + + + + + + + executable_implicit_input_pool_bash + + + TabSeparated + input_implicit.sh + 1 + 1 + + + + + + + + input + + + result + String + + + + + \ No newline at end of file diff --git a/tests/integration/test_executable_dictionary/dictionaries/executable_input_python_dictionary.xml b/tests/integration/test_executable_dictionary/dictionaries/executable_input_python_dictionary.xml new file mode 100644 index 00000000000..5b551e51951 --- /dev/null +++ b/tests/integration/test_executable_dictionary/dictionaries/executable_input_python_dictionary.xml @@ -0,0 +1,99 @@ + + + executable_input_python + + + TabSeparated + input.py + 1 + + + + + + + + input + + + result + String + + + + + + + executable_input_pool_python + + + TabSeparated + input.py + 1 + + + + + + + + input + + + result + String + + + + + + + executable_implicit_input_python + + + TabSeparated + input_implicit.py + 1 + 1 + + + + + + + + input + + + result + String + + + + + + + executable_implicit_input_pool_python + + + TabSeparated + input_implicit.py + 1 + 1 + + + + + + + + input + + + result + String + + + + + \ No newline at end of file diff --git a/tests/integration/test_executable_dictionary/dictionaries/executable_input_send_chunk_header_python_dictionary.xml b/tests/integration/test_executable_dictionary/dictionaries/executable_input_send_chunk_header_python_dictionary.xml new file mode 100644 index 00000000000..816cb0db2c5 --- /dev/null +++ b/tests/integration/test_executable_dictionary/dictionaries/executable_input_send_chunk_header_python_dictionary.xml @@ -0,0 +1,103 @@ + + + executable_input_send_chunk_header_python + + + TabSeparated + input_chunk_header.py + 1 + 1 + + + + + + + + input + + + result + String + + + + + + + executable_input_send_chunk_header_pool_python + + + TabSeparated + input_chunk_header.py + 1 + 1 + + + + + + + + input + + + result + String + + + + + + + executable_implicit_input_send_chunk_header_python + + + TabSeparated + input_implicit_chunk_header.py + 1 + 1 + 1 + + + + + + + + input + + + result + String + + + + + + + executable_implicit_input_send_chunk_header_pool_python + + + TabSeparated + input_implicit_chunk_header.py + 1 + 1 + 1 + + + + + + + + input + + + result + String + + + + + \ No newline at end of file diff --git a/tests/integration/test_executable_dictionary/dictionaries/executable_input_signalled_python_dictionary.xml b/tests/integration/test_executable_dictionary/dictionaries/executable_input_signalled_python_dictionary.xml new file mode 100644 index 00000000000..71f8873b20e --- /dev/null +++ b/tests/integration/test_executable_dictionary/dictionaries/executable_input_signalled_python_dictionary.xml @@ -0,0 +1,103 @@ + + + executable_input_signalled_python + + + TabSeparated + input_signalled.py + 1 + 1000 + + + + + + + + input + + + result + String + Default result + + + + + + executable_input_signalled_pool_python + + + TabSeparated + input_signalled.py + 1 + 1000 + + + + + + + + input + + + result + String + Default result + + + + + + executable_implicit_input_signalled_python + + + TabSeparated + input_implicit_signalled.py + 1 + 1 + 1000 + + + + + + + + input + + + result + String + Default result + + + + + + executable_implicit_input_signalled_pool_python + + + TabSeparated + input_implicit_signalled.py + 1 + 1 + 1000 + + + + + + + + input + + + result + String + Default result + + + + \ No newline at end of file diff --git a/tests/integration/test_executable_dictionary/dictionaries/executable_input_slow_python_dictionary.xml b/tests/integration/test_executable_dictionary/dictionaries/executable_input_slow_python_dictionary.xml new file mode 100644 index 00000000000..dee161a9b78 --- /dev/null +++ b/tests/integration/test_executable_dictionary/dictionaries/executable_input_slow_python_dictionary.xml @@ -0,0 +1,103 @@ + + + executable_input_slow_python + + + TabSeparated + input_slow.py + 1 + 1000 + + + + + + + + input + + + result + String + + + + + + + executable_input_slow_pool_python + + + TabSeparated + input_slow.py + 1 + 1000 + + + + + + + + input + + + result + String + + + + + + + executable_implicit_input_slow_python + + + TabSeparated + input_implicit_slow.py + 1 + 1 + 1000 + + + + + + + + input + + + result + String + + + + + + + executable_implicit_input_slow_pool_python + + + TabSeparated + input_implicit_slow.py + 1 + 1 + 1000 + + + + + + + + input + + + result + String + + + + + \ No newline at end of file diff --git a/tests/integration/test_executable_dictionary/dictionaries/executable_input_sum_python_dictionary.xml b/tests/integration/test_executable_dictionary/dictionaries/executable_input_sum_python_dictionary.xml new file mode 100644 index 00000000000..3f63e7b8671 --- /dev/null +++ b/tests/integration/test_executable_dictionary/dictionaries/executable_input_sum_python_dictionary.xml @@ -0,0 +1,128 @@ + + + executable_input_sum_python + + + TabSeparated + input_sum.py + 1 + + + + + + + + + first_argument + UInt64 + + + second_argument + UInt64 + + + + result + UInt64 + + + + + + + executable_input_sum_pool_python + + + TabSeparated + input_sum.py + 1 + + + + + + + + + first_argument + UInt64 + + + second_argument + UInt64 + + + + result + UInt64 + + + + + + + + executable_implicit_input_sum_python + + + TabSeparated + input_implicit_sum.py + 1 + 1 + + + + + + + + + first_argument + UInt64 + + + second_argument + UInt64 + + + + result + UInt64 + + + + + + + executable_implicit_input_sum_pool_python + + + TabSeparated + input_implicit_sum.py + 1 + 1 + + + + + + + + + first_argument + UInt64 + + + second_argument + UInt64 + + + + result + UInt64 + + + + + diff --git a/tests/integration/test_executable_dictionary/dictionaries/executable_non_direct_dictionary.xml b/tests/integration/test_executable_dictionary/dictionaries/executable_non_direct_dictionary.xml new file mode 100644 index 00000000000..3f77dae1ac6 --- /dev/null +++ b/tests/integration/test_executable_dictionary/dictionaries/executable_non_direct_dictionary.xml @@ -0,0 +1,95 @@ + + + executable_input_non_direct_bash + + + TabSeparated + while read read_data; do printf "$read_data\tKey $read_data\n"; done + + + + + + + + input + + + result + String + + + + + + + executable_input_non_direct_pool_bash + + + TabSeparated + while read read_data; do printf "$read_data\tKey $read_data\n"; done + + + + + + + + input + + + result + String + + + + + + + executable_input_implicit_non_direct_bash + + + TabSeparated + while read read_data; do printf "Key $read_data\n"; done + 1 + + + + + + + + input + + + result + String + + + + + + + executable_input_implicit_non_direct_pool_bash + + + TabSeparated + while read read_data; do printf "Key $read_data\n"; done + 1 + + + + + + + + input + + + result + String + + + + + \ No newline at end of file diff --git a/tests/integration/test_executable_dictionary/dictionaries/executable_source_argument_python_dictionary.xml b/tests/integration/test_executable_dictionary/dictionaries/executable_source_argument_python_dictionary.xml new file mode 100644 index 00000000000..3173eb5500d --- /dev/null +++ b/tests/integration/test_executable_dictionary/dictionaries/executable_source_argument_python_dictionary.xml @@ -0,0 +1,54 @@ + + + executable_source_simple_key_argument_python + + + TabSeparated + source_argument.py 1 + 1 + + + + + + 0 + + + input + + + result + String + + + + + + + executable_source_complex_key_argument_python + + + TabSeparated + source_argument.py 1 + 1 + + + + + + 0 + + + + input + UInt64 + + + + result + String + + + + + \ No newline at end of file diff --git a/tests/integration/test_executable_dictionary/dictionaries/executable_source_python_dictionary.xml b/tests/integration/test_executable_dictionary/dictionaries/executable_source_python_dictionary.xml new file mode 100644 index 00000000000..a2036fc67bb --- /dev/null +++ b/tests/integration/test_executable_dictionary/dictionaries/executable_source_python_dictionary.xml @@ -0,0 +1,54 @@ + + + executable_source_simple_key_python + + + TabSeparated + source.py + 1 + + + + + + 0 + + + input + + + result + String + + + + + + + executable_source_complex_key_python + + + TabSeparated + source.py + 1 + + + + + + 0 + + + + input + UInt64 + + + + result + String + + + + + \ No newline at end of file diff --git a/tests/integration/test_executable_dictionary/dictionaries/executable_source_updated_python_dictionary.xml b/tests/integration/test_executable_dictionary/dictionaries/executable_source_updated_python_dictionary.xml new file mode 100644 index 00000000000..10d1b1ca0c6 --- /dev/null +++ b/tests/integration/test_executable_dictionary/dictionaries/executable_source_updated_python_dictionary.xml @@ -0,0 +1,56 @@ + + + executable_source_simple_key_update_python + + + TabSeparated + source_update.py + 1 + 1 + + + + + + 5 + + + input + + + result + String + + + + + + + executable_source_complex_key_update_python + + + TabSeparated + source_update.py + 1 + 1 + + + + + + 5 + + + + input + UInt64 + + + + result + String + + + + + \ No newline at end of file diff --git a/tests/integration/test_executable_dictionary/test.py b/tests/integration/test_executable_dictionary/test.py new file mode 100644 index 00000000000..5e50a092a29 --- /dev/null +++ b/tests/integration/test_executable_dictionary/test.py @@ -0,0 +1,175 @@ +import os +import sys +import time + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', stay_alive=True, main_configs=[]) + + +def skip_test_msan(instance): + if instance.is_built_with_memory_sanitizer(): + pytest.skip("Memory Sanitizer cannot work with vfork") + +def copy_file_to_container(local_path, dist_path, container_id): + os.system("docker cp {local} {cont_id}:{dist}".format(local=local_path, cont_id=container_id, dist=dist_path)) + +config = ''' + /etc/clickhouse-server/dictionaries/*_dictionary.xml +''' + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node.replace_config("/etc/clickhouse-server/config.d/dictionaries_config.xml", config) + + copy_file_to_container(os.path.join(SCRIPT_DIR, 'dictionaries/.'), '/etc/clickhouse-server/dictionaries', node.docker_id) + copy_file_to_container(os.path.join(SCRIPT_DIR, 'user_scripts/.'), '/var/lib/clickhouse/user_scripts', node.docker_id) + + node.restart_clickhouse() + + yield cluster + + finally: + cluster.shutdown() + +def test_executable_input_bash(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_input_bash', 'result', toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT dictGet('executable_input_pool_bash', 'result', toUInt64(1))") == 'Key 1\n' + +def test_executable_implicit_input_bash(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_implicit_input_bash', 'result', toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT dictGet('executable_implicit_input_pool_bash', 'result', toUInt64(1))") == 'Key 1\n' + +def test_executable_input_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_input_python', 'result', toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT dictGet('executable_input_pool_python', 'result', toUInt64(1))") == 'Key 1\n' + +def test_executable_implicit_input_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_implicit_input_python', 'result', toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT dictGet('executable_implicit_input_pool_python', 'result', toUInt64(1))") == 'Key 1\n' + +def test_executable_input_send_chunk_header_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_input_send_chunk_header_python', 'result', toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT dictGet('executable_input_send_chunk_header_pool_python', 'result', toUInt64(1))") == 'Key 1\n' + +def test_executable_implicit_input_send_chunk_header_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_implicit_input_send_chunk_header_python', 'result', toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT dictGet('executable_implicit_input_send_chunk_header_pool_python', 'result', toUInt64(1))") == 'Key 1\n' + +def test_executable_input_sum_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_input_sum_python', 'result', tuple(toUInt64(1), toUInt64(1)))") == '2\n' + assert node.query("SELECT dictGet('executable_input_sum_pool_python', 'result', tuple(toUInt64(1), toUInt64(1)))") == '2\n' + +def test_executable_implicit_input_sum_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_implicit_input_sum_python', 'result', tuple(toUInt64(1), toUInt64(1)))") == '2\n' + assert node.query("SELECT dictGet('executable_implicit_input_sum_pool_python', 'result', tuple(toUInt64(1), toUInt64(1)))") == '2\n' + +def test_executable_input_argument_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_input_argument_python', 'result', toUInt64(1))") == 'Key 1 1\n' + assert node.query("SELECT dictGet('executable_input_argument_pool_python', 'result', toUInt64(1))") == 'Key 1 1\n' + +def test_executable_implicit_input_argument_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_implicit_input_argument_python', 'result', toUInt64(1))") == 'Key 1 1\n' + assert node.query("SELECT dictGet('executable_implicit_input_argument_pool_python', 'result', toUInt64(1))") == 'Key 1 1\n' + +def test_executable_input_signalled_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_input_signalled_python', 'result', toUInt64(1))") == 'Default result\n' + assert node.query("SELECT dictGet('executable_input_signalled_pool_python', 'result', toUInt64(1))") == 'Default result\n' + +def test_executable_implicit_input_signalled_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_implicit_input_signalled_python', 'result', toUInt64(1))") == 'Default result\n' + assert node.query("SELECT dictGet('executable_implicit_input_signalled_pool_python', 'result', toUInt64(1))") == 'Default result\n' + +def test_executable_input_slow_python(started_cluster): + skip_test_msan(node) + assert node.query_and_get_error("SELECT dictGet('executable_input_slow_python', 'result', toUInt64(1))") + assert node.query_and_get_error("SELECT dictGet('executable_input_slow_pool_python', 'result', toUInt64(1))") + +def test_executable_implicit_input_slow_python(started_cluster): + skip_test_msan(node) + assert node.query_and_get_error("SELECT dictGet('executable_implicit_input_slow_python', 'result', toUInt64(1))") + assert node.query_and_get_error("SELECT dictGet('executable_implicit_input_slow_pool_python', 'result', toUInt64(1))") + +def test_executable_input_slow_python(started_cluster): + skip_test_msan(node) + assert node.query_and_get_error("SELECT dictGet('executable_input_slow_python', 'result', toUInt64(1))") + assert node.query_and_get_error("SELECT dictGet('executable_input_slow_pool_python', 'result', toUInt64(1))") + +def test_executable_implicit_input_slow_python(started_cluster): + skip_test_msan(node) + assert node.query_and_get_error("SELECT dictGet('executable_implicit_input_slow_python', 'result', toUInt64(1))") + assert node.query_and_get_error("SELECT dictGet('executable_implicit_input_slow_pool_python', 'result', toUInt64(1))") + +def test_executable_non_direct_input_bash(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_input_non_direct_bash', 'result', toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT dictGet('executable_input_non_direct_pool_bash', 'result', toUInt64(1))") == 'Key 1\n' + +def test_executable_implicit_non_direct_input_bash(started_cluster): + skip_test_msan(node) + assert node.query("SELECT dictGet('executable_input_implicit_non_direct_bash', 'result', toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT dictGet('executable_input_implicit_non_direct_pool_bash', 'result', toUInt64(1))") == 'Key 1\n' + +def test_executable_source_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT * FROM dictionary(executable_source_simple_key_python) ORDER BY input") == '1\tValue 1\n2\tValue 2\n3\tValue 3\n' + assert node.query("SELECT dictGet('executable_source_simple_key_python', 'result', toUInt64(1))") == 'Value 1\n' + assert node.query("SELECT dictGet('executable_source_simple_key_python', 'result', toUInt64(2))") == 'Value 2\n' + assert node.query("SELECT dictGet('executable_source_simple_key_python', 'result', toUInt64(3))") == 'Value 3\n' + + assert node.query("SELECT * FROM dictionary('executable_source_complex_key_python') ORDER BY input") == '1\tValue 1\n2\tValue 2\n3\tValue 3\n' + assert node.query("SELECT dictGet('executable_source_complex_key_python', 'result', tuple(toUInt64(1)))") == 'Value 1\n' + assert node.query("SELECT dictGet('executable_source_complex_key_python', 'result', tuple(toUInt64(2)))") == 'Value 2\n' + assert node.query("SELECT dictGet('executable_source_complex_key_python', 'result', tuple(toUInt64(3)))") == 'Value 3\n' + +def test_executable_source_argument_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT * FROM dictionary(executable_source_simple_key_argument_python) ORDER BY input") == '1\tValue 1 1\n2\tValue 1 2\n3\tValue 1 3\n' + assert node.query("SELECT dictGet('executable_source_simple_key_argument_python', 'result', toUInt64(1))") == 'Value 1 1\n' + assert node.query("SELECT dictGet('executable_source_simple_key_argument_python', 'result', toUInt64(2))") == 'Value 1 2\n' + assert node.query("SELECT dictGet('executable_source_simple_key_argument_python', 'result', toUInt64(3))") == 'Value 1 3\n' + + assert node.query("SELECT * FROM dictionary(executable_source_complex_key_argument_python) ORDER BY input") == '1\tValue 1 1\n2\tValue 1 2\n3\tValue 1 3\n' + assert node.query("SELECT dictGet('executable_source_complex_key_argument_python', 'result', toUInt64(1))") == 'Value 1 1\n' + assert node.query("SELECT dictGet('executable_source_complex_key_argument_python', 'result', toUInt64(2))") == 'Value 1 2\n' + assert node.query("SELECT dictGet('executable_source_complex_key_argument_python', 'result', toUInt64(3))") == 'Value 1 3\n' + +def test_executable_source_updated_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT * FROM dictionary(executable_source_simple_key_update_python) ORDER BY input") == '1\tValue 0 1\n' + assert node.query("SELECT dictGet('executable_source_simple_key_update_python', 'result', toUInt64(1))") == 'Value 0 1\n' + + time.sleep(10) + + assert node.query("SELECT * FROM dictionary(executable_source_simple_key_update_python) ORDER BY input") == '1\tValue 1 1\n' + assert node.query("SELECT dictGet('executable_source_simple_key_update_python', 'result', toUInt64(1))") == 'Value 1 1\n' + + assert node.query("SELECT * FROM dictionary(executable_source_complex_key_update_python) ORDER BY input") == '1\tValue 0 1\n' + assert node.query("SELECT dictGet('executable_source_complex_key_update_python', 'result', toUInt64(1))") == 'Value 0 1\n' + + time.sleep(10) + + assert node.query("SELECT * FROM dictionary(executable_source_complex_key_update_python) ORDER BY input") == '1\tValue 1 1\n' + assert node.query("SELECT dictGet('executable_source_complex_key_update_python', 'result', toUInt64(1))") == 'Value 1 1\n' + diff --git a/tests/integration/test_executable_dictionary/user_scripts/input.py b/tests/integration/test_executable_dictionary/user_scripts/input.py new file mode 100755 index 00000000000..e711dd8e306 --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input.py @@ -0,0 +1,11 @@ +#!/usr/bin/python3 + +import sys +import os +import signal + +if __name__ == '__main__': + for line in sys.stdin: + updated_line = line.replace('\n', '') + print(updated_line + '\t' + "Key " + updated_line, end='\n') + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/input.sh b/tests/integration/test_executable_dictionary/user_scripts/input.sh new file mode 100755 index 00000000000..7712c392951 --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +while read read_data; + do printf "$read_data\tKey $read_data\n"; +done diff --git a/tests/integration/test_executable_dictionary/user_scripts/input_argument.py b/tests/integration/test_executable_dictionary/user_scripts/input_argument.py new file mode 100755 index 00000000000..163f9c4183f --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input_argument.py @@ -0,0 +1,11 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + arg = int(sys.argv[1]) + + for line in sys.stdin: + updated_line = line.replace('\n', '') + print(updated_line + '\t' + "Key " + str(arg) + " " + updated_line, end='\n') + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/input_chunk_header.py b/tests/integration/test_executable_dictionary/user_scripts/input_chunk_header.py new file mode 100755 index 00000000000..4eb00f64eb3 --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input_chunk_header.py @@ -0,0 +1,15 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + for chunk_header in sys.stdin: + chunk_length = int(chunk_header) + + while chunk_length != 0: + line = sys.stdin.readline() + updated_line = line.replace('\n', '') + chunk_length -= 1 + print(updated_line + '\t' + "Key " + updated_line, end='\n') + + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/input_implicit.py b/tests/integration/test_executable_dictionary/user_scripts/input_implicit.py new file mode 100755 index 00000000000..835ab1f441a --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input_implicit.py @@ -0,0 +1,8 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + for line in sys.stdin: + print("Key " + line, end='') + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/input_implicit.sh b/tests/integration/test_executable_dictionary/user_scripts/input_implicit.sh new file mode 100755 index 00000000000..aea51b82b1f --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input_implicit.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +while read read_data; + do printf "Key $read_data\n"; +done diff --git a/tests/integration/test_executable_dictionary/user_scripts/input_implicit_argument.py b/tests/integration/test_executable_dictionary/user_scripts/input_implicit_argument.py new file mode 100755 index 00000000000..c1b2e5966d7 --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input_implicit_argument.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + arg = int(sys.argv[1]) + + for line in sys.stdin: + print("Key " + str(arg) + " " + line, end='') + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/input_implicit_chunk_header.py b/tests/integration/test_executable_dictionary/user_scripts/input_implicit_chunk_header.py new file mode 100755 index 00000000000..5dc03e1c507 --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input_implicit_chunk_header.py @@ -0,0 +1,14 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + for chunk_header in sys.stdin: + chunk_length = int(chunk_header) + + while chunk_length != 0: + line = sys.stdin.readline() + chunk_length -= 1 + print("Key " + line, end='') + + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/input_implicit_signalled.py b/tests/integration/test_executable_dictionary/user_scripts/input_implicit_signalled.py new file mode 100755 index 00000000000..27c8bc4840e --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input_implicit_signalled.py @@ -0,0 +1,13 @@ +#!/usr/bin/python3 + +import sys +import os +import signal +import time + +if __name__ == '__main__': + for line in sys.stdin: + os.signal(os.getpid(), signal.SIGTERM) + + print("Key " + line, end='') + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/input_implicit_slow.py b/tests/integration/test_executable_dictionary/user_scripts/input_implicit_slow.py new file mode 100755 index 00000000000..648a9eac918 --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input_implicit_slow.py @@ -0,0 +1,12 @@ +#!/usr/bin/python3 + +import sys +import os +import signal +import time + +if __name__ == '__main__': + for line in sys.stdin: + time.sleep(5) + print("Key " + line, end='') + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/input_implicit_sum.py b/tests/integration/test_executable_dictionary/user_scripts/input_implicit_sum.py new file mode 100755 index 00000000000..432d7a13a2f --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input_implicit_sum.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys +import re + +if __name__ == '__main__': + for line in sys.stdin: + line_split = re.split(r'\t+', line) + print(int(line_split[0]) + int(line_split[1]), end='\n') + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/input_signalled.py b/tests/integration/test_executable_dictionary/user_scripts/input_signalled.py new file mode 100755 index 00000000000..a3a99f1e71e --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input_signalled.py @@ -0,0 +1,13 @@ +#!/usr/bin/python3 + +import sys +import os +import signal +import time + +if __name__ == '__main__': + for line in sys.stdin: + os.signal(os.getpid(), signal.SIGTERM) + updated_line = line.replace('\n', '') + print(updated_line + '\t' + "Key " + updated_line, end='\n') + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/input_slow.py b/tests/integration/test_executable_dictionary/user_scripts/input_slow.py new file mode 100755 index 00000000000..a3b8c484b29 --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input_slow.py @@ -0,0 +1,13 @@ +#!/usr/bin/python3 + +import sys +import os +import signal +import time + +if __name__ == '__main__': + for line in sys.stdin: + time.sleep(5) + updated_line = line.replace('\n', '') + print(updated_line + '\t' + "Key " + updated_line, end='\n') + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/input_sum.py b/tests/integration/test_executable_dictionary/user_scripts/input_sum.py new file mode 100755 index 00000000000..e9ec5028701 --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/input_sum.py @@ -0,0 +1,12 @@ +#!/usr/bin/python3 + +import sys +import re + +if __name__ == '__main__': + for line in sys.stdin: + updated_line = line.replace('\n', '') + line_split = re.split(r'\t+', line) + sum = int(line_split[0]) + int(line_split[1]) + print(updated_line + '\t' + str(sum), end='\n') + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/source.py b/tests/integration/test_executable_dictionary/user_scripts/source.py new file mode 100755 index 00000000000..e105773c467 --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/source.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + print('1' + '\t' + 'Value 1', end='\n') + print('2' + '\t' + 'Value 2', end='\n') + print('3' + '\t' + 'Value 3', end='\n') + + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/source_argument.py b/tests/integration/test_executable_dictionary/user_scripts/source_argument.py new file mode 100755 index 00000000000..881e73adc97 --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/source_argument.py @@ -0,0 +1,12 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + arg = int(sys.argv[1]) + + print('1' + '\t' + 'Value ' + str(arg) + ' 1', end='\n') + print('2' + '\t' + 'Value ' + str(arg) + ' 2', end='\n') + print('3' + '\t' + 'Value ' + str(arg) + ' 3', end='\n') + + sys.stdout.flush() diff --git a/tests/integration/test_executable_dictionary/user_scripts/source_update.py b/tests/integration/test_executable_dictionary/user_scripts/source_update.py new file mode 100755 index 00000000000..99388f9ada3 --- /dev/null +++ b/tests/integration/test_executable_dictionary/user_scripts/source_update.py @@ -0,0 +1,12 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + update_field_value = 0 + + if len(sys.argv) >= 2: + update_field_value = int(sys.argv[1]) + + print('1' + '\t' + 'Value ' + str(update_field_value) + ' 1', end='\n') + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/test.py b/tests/integration/test_executable_table_function/test.py index f5537e26b94..7820396d20f 100644 --- a/tests/integration/test_executable_table_function/test.py +++ b/tests/integration/test_executable_table_function/test.py @@ -1,6 +1,5 @@ import os import sys -import time import pytest @@ -30,69 +29,353 @@ def started_cluster(): copy_file_to_container(os.path.join(SCRIPT_DIR, 'user_scripts/.'), '/var/lib/clickhouse/user_scripts', node.docker_id) node.restart_clickhouse() + node.query("CREATE TABLE test_data_table (id UInt64) ENGINE=TinyLog;") + node.query("INSERT INTO test_data_table VALUES (0), (1), (2);") + yield cluster finally: cluster.shutdown() -def test_executable_function_no_input(started_cluster): +def test_executable_function_no_input_bash(started_cluster): skip_test_msan(node) - assert node.query("SELECT * FROM executable('test_no_input.sh', 'TabSeparated', 'value UInt64')") == '1\n' + assert node.query("SELECT * FROM executable('no_input.sh', 'TabSeparated', 'value String')") == 'Key 0\nKey 1\nKey 2\n' -def test_executable_function_input(started_cluster): +def test_executable_function_no_input_python(started_cluster): skip_test_msan(node) - assert node.query("SELECT * FROM executable('test_input.sh', 'TabSeparated', 'value String', (SELECT 1))") == 'Key 1\n' + assert node.query("SELECT * FROM executable('no_input.py', 'TabSeparated', 'value String')") == 'Key 0\nKey 1\nKey 2\n' -def test_executable_function_input_multiple_pipes(started_cluster): +def test_executable_function_input_bash(started_cluster): skip_test_msan(node) - actual = node.query("SELECT * FROM executable('test_input_multiple_pipes.sh', 'TabSeparated', 'value String', (SELECT 1), (SELECT 2), (SELECT 3))") + + query = "SELECT * FROM executable('input.sh', 'TabSeparated', 'value String', {source})" + assert node.query(query.format(source='(SELECT 1)')) == 'Key 1\n' + assert node.query(query.format(source='(SELECT id FROM test_data_table)')) == 'Key 0\nKey 1\nKey 2\n' + +def test_executable_function_input_python(started_cluster): + skip_test_msan(node) + + query = "SELECT * FROM executable('input.py', 'TabSeparated', 'value String', {source})" + assert node.query(query.format(source='(SELECT 1)')) == 'Key 1\n' + assert node.query(query.format(source='(SELECT id FROM test_data_table)')) == 'Key 0\nKey 1\nKey 2\n' + +def test_executable_function_input_sum_python(started_cluster): + skip_test_msan(node) + + query = "SELECT * FROM executable('input_sum.py', 'TabSeparated', 'value UInt64', {source})" + assert node.query(query.format(source='(SELECT 1, 1)')) == '2\n' + assert node.query(query.format(source='(SELECT id, id FROM test_data_table)')) == '0\n2\n4\n' + +def test_executable_function_input_argument_python(started_cluster): + skip_test_msan(node) + + query = "SELECT * FROM executable('input_argument.py 1', 'TabSeparated', 'value String', {source})" + assert node.query(query.format(source='(SELECT 1)')) == 'Key 1 1\n' + assert node.query(query.format(source='(SELECT id FROM test_data_table)')) == 'Key 1 0\nKey 1 1\nKey 1 2\n' + +def test_executable_function_input_signalled_python(started_cluster): + skip_test_msan(node) + + query = "SELECT * FROM executable('input_signalled.py', 'TabSeparated', 'value String', {source})" + assert node.query(query.format(source='(SELECT 1)')) == '' + assert node.query(query.format(source='(SELECT id FROM test_data_table)')) == '' + +def test_executable_function_input_slow_python(started_cluster): + skip_test_msan(node) + + query = "SELECT * FROM executable('input_slow.py', 'TabSeparated', 'value String', {source})" + assert node.query_and_get_error(query.format(source='(SELECT 1)')) + assert node.query_and_get_error(query.format(source='(SELECT id FROM test_data_table)')) + +def test_executable_function_input_multiple_pipes_python(started_cluster): + skip_test_msan(node) + query = "SELECT * FROM executable('input_multiple_pipes.py', 'TabSeparated', 'value String', {source})" + actual = node.query(query.format(source='(SELECT 1), (SELECT 2), (SELECT 3)')) expected = 'Key from 4 fd 3\nKey from 3 fd 2\nKey from 0 fd 1\n' assert actual == expected -def test_executable_function_argument(started_cluster): - skip_test_msan(node) - assert node.query("SELECT * FROM executable('test_argument.sh 1', 'TabSeparated', 'value String')") == 'Key 1\n' - -def test_executable_storage_no_input(started_cluster): - skip_test_msan(node) - node.query("DROP TABLE IF EXISTS test_table") - node.query("CREATE TABLE test_table (value UInt64) ENGINE=Executable('test_no_input.sh', 'TabSeparated')") - assert node.query("SELECT * FROM test_table") == '1\n' - node.query("DROP TABLE test_table") - -def test_executable_storage_input(started_cluster): - skip_test_msan(node) - node.query("DROP TABLE IF EXISTS test_table") - node.query("CREATE TABLE test_table (value String) ENGINE=Executable('test_input.sh', 'TabSeparated', (SELECT 1))") - assert node.query("SELECT * FROM test_table") == 'Key 1\n' - node.query("DROP TABLE test_table") - -def test_executable_storage_input_multiple_pipes(started_cluster): - skip_test_msan(node) - node.query("DROP TABLE IF EXISTS test_table") - node.query("CREATE TABLE test_table (value String) ENGINE=Executable('test_input_multiple_pipes.sh', 'TabSeparated', (SELECT 1), (SELECT 2), (SELECT 3))") - actual = node.query("SELECT * FROM test_table") - expected = 'Key from 4 fd 3\nKey from 3 fd 2\nKey from 0 fd 1\n' + actual = node.query(query.format(source='(SELECT id FROM test_data_table), (SELECT 2), (SELECT 3)')) + expected = 'Key from 4 fd 3\nKey from 3 fd 2\nKey from 0 fd 0\nKey from 0 fd 1\nKey from 0 fd 2\n' assert actual == expected - node.query("DROP TABLE test_table") -def test_executable_storage_argument(started_cluster): +def test_executable_storage_no_input_bash(started_cluster): skip_test_msan(node) node.query("DROP TABLE IF EXISTS test_table") - node.query("CREATE TABLE test_table (value String) ENGINE=Executable('test_argument.sh 1', 'TabSeparated')") + node.query("CREATE TABLE test_table (value String) ENGINE=Executable('no_input.sh', 'TabSeparated')") + assert node.query("SELECT * FROM test_table") == 'Key 0\nKey 1\nKey 2\n' + node.query("DROP TABLE test_table") + +def test_executable_storage_no_input_python(started_cluster): + skip_test_msan(node) + node.query("DROP TABLE IF EXISTS test_table") + node.query("CREATE TABLE test_table (value String) ENGINE=Executable('no_input.py', 'TabSeparated')") + assert node.query("SELECT * FROM test_table") == 'Key 0\nKey 1\nKey 2\n' + node.query("DROP TABLE test_table") + +def test_executable_storage_input_bash(started_cluster): + skip_test_msan(node) + + query = "CREATE TABLE test_table (value String) ENGINE=Executable('input.sh', 'TabSeparated', {source})" + + node.query("DROP TABLE IF EXISTS test_table") + node.query(query.format(source='(SELECT 1)')) assert node.query("SELECT * FROM test_table") == 'Key 1\n' node.query("DROP TABLE test_table") -def test_executable_pool_storage(started_cluster): + node.query(query.format(source='(SELECT id FROM test_data_table)')) + assert node.query("SELECT * FROM test_table") == 'Key 0\nKey 1\nKey 2\n' + node.query("DROP TABLE test_table") + +def test_executable_storage_input_python(started_cluster): skip_test_msan(node) + + query = "CREATE TABLE test_table (value String) ENGINE=Executable('input.py', 'TabSeparated', {source})" + node.query("DROP TABLE IF EXISTS test_table") - node.query("CREATE TABLE test_table (value String) ENGINE=ExecutablePool('test_input_process_pool.sh', 'TabSeparated', (SELECT 1))") + node.query(query.format(source='(SELECT 1)')) assert node.query("SELECT * FROM test_table") == 'Key 1\n' node.query("DROP TABLE test_table") -def test_executable_pool_storage_multiple_pipes(started_cluster): + node.query(query.format(source='(SELECT id FROM test_data_table)')) + assert node.query("SELECT * FROM test_table") == 'Key 0\nKey 1\nKey 2\n' + node.query("DROP TABLE test_table") + +def test_executable_storage_input_send_chunk_header_python(started_cluster): skip_test_msan(node) + + query = "CREATE TABLE test_table (value String) ENGINE=Executable('input_chunk_header.py', 'TabSeparated', {source}) SETTINGS send_chunk_header=1" + node.query("DROP TABLE IF EXISTS test_table") - node.query("CREATE TABLE test_table (value String) ENGINE=ExecutablePool('test_input_process_pool_multiple_pipes.sh', 'TabSeparated', (SELECT 1), (SELECT 2), (SELECT 3))") + node.query(query.format(source='(SELECT 1)')) + assert node.query("SELECT * FROM test_table") == 'Key 1\n' + node.query("DROP TABLE test_table") + + node.query(query.format(source='(SELECT id FROM test_data_table)')) + assert node.query("SELECT * FROM test_table") == 'Key 0\nKey 1\nKey 2\n' + node.query("DROP TABLE test_table") + +def test_executable_storage_input_sum_python(started_cluster): + skip_test_msan(node) + + query = "CREATE TABLE test_table (value UInt64) ENGINE=Executable('input_sum.py', 'TabSeparated', {source})" + + node.query("DROP TABLE IF EXISTS test_table") + node.query(query.format(source='(SELECT 1, 1)')) + assert node.query("SELECT * FROM test_table") == '2\n' + node.query("DROP TABLE test_table") + + node.query(query.format(source='(SELECT id, id FROM test_data_table)')) + assert node.query("SELECT * FROM test_table") == '0\n2\n4\n' + node.query("DROP TABLE test_table") + +def test_executable_storage_input_argument_python(started_cluster): + skip_test_msan(node) + + query = "CREATE TABLE test_table (value String) ENGINE=Executable('input_argument.py 1', 'TabSeparated', {source})" + + node.query("DROP TABLE IF EXISTS test_table") + node.query(query.format(source='(SELECT 1)')) + assert node.query("SELECT * FROM test_table") == 'Key 1 1\n' + node.query("DROP TABLE test_table") + + node.query(query.format(source='(SELECT id FROM test_data_table)')) + assert node.query("SELECT * FROM test_table") == 'Key 1 0\nKey 1 1\nKey 1 2\n' + node.query("DROP TABLE test_table") + +def test_executable_storage_input_signalled_python(started_cluster): + skip_test_msan(node) + + query = "CREATE TABLE test_table (value String) ENGINE=Executable('input_signalled.py', 'TabSeparated', {source})" + + node.query("DROP TABLE IF EXISTS test_table") + node.query(query.format(source='(SELECT 1)')) + assert node.query("SELECT * FROM test_table") == '' + node.query("DROP TABLE test_table") + + node.query(query.format(source='(SELECT id FROM test_data_table)')) + assert node.query("SELECT * FROM test_table") == '' + node.query("DROP TABLE test_table") + +def test_executable_storage_input_slow_python(started_cluster): + skip_test_msan(node) + + query = "CREATE TABLE test_table (value String) ENGINE=Executable('input_slow.py', 'TabSeparated', {source}) SETTINGS command_read_timeout=2500" + + node.query("DROP TABLE IF EXISTS test_table") + node.query(query.format(source='(SELECT 1)')) + assert node.query_and_get_error("SELECT * FROM test_table") + node.query("DROP TABLE test_table") + + node.query(query.format(source='(SELECT id FROM test_data_table)')) + assert node.query_and_get_error("SELECT * FROM test_table") + node.query("DROP TABLE test_table") + +def test_executable_function_input_multiple_pipes_python(started_cluster): + skip_test_msan(node) + + query = "CREATE TABLE test_table (value String) ENGINE=Executable('input_multiple_pipes.py', 'TabSeparated', {source})" + + node.query("DROP TABLE IF EXISTS test_table") + node.query(query.format(source='(SELECT 1), (SELECT 2), (SELECT 3)')) assert node.query("SELECT * FROM test_table") == 'Key from 4 fd 3\nKey from 3 fd 2\nKey from 0 fd 1\n' node.query("DROP TABLE test_table") + + node.query(query.format(source='(SELECT id FROM test_data_table), (SELECT 2), (SELECT 3)')) + assert node.query("SELECT * FROM test_table") == 'Key from 4 fd 3\nKey from 3 fd 2\nKey from 0 fd 0\nKey from 0 fd 1\nKey from 0 fd 2\n' + node.query("DROP TABLE test_table") + +def test_executable_pool_storage_input_python(started_cluster): + skip_test_msan(node) + + query = "CREATE TABLE test_table (value String) ENGINE=ExecutablePool('input_pool.py', 'TabSeparated', {source}) SETTINGS send_chunk_header=1, pool_size=1" + + node.query("DROP TABLE IF EXISTS test_table") + node.query(query.format(source='(SELECT 1)')) + + assert node.query("SELECT * FROM test_table") == 'Key 1\n' + assert node.query("SELECT * FROM test_table") == 'Key 1\n' + assert node.query("SELECT * FROM test_table") == 'Key 1\n' + + node.query("DROP TABLE test_table") + + node.query(query.format(source='(SELECT id FROM test_data_table)')) + + assert node.query("SELECT * FROM test_table") == 'Key 0\nKey 1\nKey 2\n' + assert node.query("SELECT * FROM test_table") == 'Key 0\nKey 1\nKey 2\n' + assert node.query("SELECT * FROM test_table") == 'Key 0\nKey 1\nKey 2\n' + + node.query("DROP TABLE test_table") + +def test_executable_pool_storage_input_sum_python(started_cluster): + skip_test_msan(node) + + query = "CREATE TABLE test_table (value UInt64) ENGINE=ExecutablePool('input_sum_pool.py', 'TabSeparated', {source}) SETTINGS send_chunk_header=1, pool_size=1" + + node.query("DROP TABLE IF EXISTS test_table") + node.query(query.format(source='(SELECT 1, 1)')) + + assert node.query("SELECT * FROM test_table") == '2\n' + assert node.query("SELECT * FROM test_table") == '2\n' + assert node.query("SELECT * FROM test_table") == '2\n' + + node.query("DROP TABLE test_table") + + node.query(query.format(source='(SELECT id, id FROM test_data_table)')) + + assert node.query("SELECT * FROM test_table") == '0\n2\n4\n' + assert node.query("SELECT * FROM test_table") == '0\n2\n4\n' + assert node.query("SELECT * FROM test_table") == '0\n2\n4\n' + + node.query("DROP TABLE test_table") + +def test_executable_pool_storage_input_argument_python(started_cluster): + skip_test_msan(node) + + query = "CREATE TABLE test_table (value String) ENGINE=ExecutablePool('input_argument_pool.py 1', 'TabSeparated', {source}) SETTINGS send_chunk_header=1, pool_size=1" + + node.query("DROP TABLE IF EXISTS test_table") + node.query(query.format(source='(SELECT 1)')) + + assert node.query("SELECT * FROM test_table") == 'Key 1 1\n' + assert node.query("SELECT * FROM test_table") == 'Key 1 1\n' + assert node.query("SELECT * FROM test_table") == 'Key 1 1\n' + + node.query("DROP TABLE test_table") + + node.query(query.format(source='(SELECT id FROM test_data_table)')) + + assert node.query("SELECT * FROM test_table") == 'Key 1 0\nKey 1 1\nKey 1 2\n' + assert node.query("SELECT * FROM test_table") == 'Key 1 0\nKey 1 1\nKey 1 2\n' + assert node.query("SELECT * FROM test_table") == 'Key 1 0\nKey 1 1\nKey 1 2\n' + + node.query("DROP TABLE test_table") + +def test_executable_pool_storage_input_signalled_python(started_cluster): + skip_test_msan(node) + + query = "CREATE TABLE test_table (value String) ENGINE=ExecutablePool('input_signalled_pool.py', 'TabSeparated', {source}) SETTINGS send_chunk_header=1, pool_size=1" + + node.query("DROP TABLE IF EXISTS test_table") + node.query(query.format(source='(SELECT 1)')) + + assert node.query_and_get_error("SELECT * FROM test_table") + assert node.query_and_get_error("SELECT * FROM test_table") + assert node.query_and_get_error("SELECT * FROM test_table") + + node.query("DROP TABLE test_table") + + node.query(query.format(source='(SELECT id FROM test_data_table)')) + + assert node.query_and_get_error("SELECT * FROM test_table") + assert node.query_and_get_error("SELECT * FROM test_table") + assert node.query_and_get_error("SELECT * FROM test_table") + + node.query("DROP TABLE test_table") + +def test_executable_pool_storage_input_slow_python(started_cluster): + skip_test_msan(node) + + query = """CREATE TABLE test_table (value String) + ENGINE=ExecutablePool('input_slow_pool.py', 'TabSeparated', {source}) + SETTINGS send_chunk_header=1, pool_size=1, command_read_timeout=2500""" + + node.query("DROP TABLE IF EXISTS test_table") + node.query(query.format(source='(SELECT 1)')) + + assert node.query_and_get_error("SELECT * FROM test_table") + assert node.query_and_get_error("SELECT * FROM test_table") + assert node.query_and_get_error("SELECT * FROM test_table") + + node.query("DROP TABLE test_table") + + node.query(query.format(source='(SELECT id FROM test_data_table)')) + + assert node.query_and_get_error("SELECT * FROM test_table") + assert node.query_and_get_error("SELECT * FROM test_table") + assert node.query_and_get_error("SELECT * FROM test_table") + + node.query("DROP TABLE test_table") + +def test_executable_pool_storage_input_multiple_pipes_python(started_cluster): + skip_test_msan(node) + + query = "CREATE TABLE test_table (value String) ENGINE=ExecutablePool('input_multiple_pipes_pool.py', 'TabSeparated', {source}) SETTINGS send_chunk_header=1, pool_size=1" + + node.query("DROP TABLE IF EXISTS test_table") + node.query(query.format(source='(SELECT 1), (SELECT 2), (SELECT 3)')) + + assert node.query("SELECT * FROM test_table") == 'Key from 4 fd 3\nKey from 3 fd 2\nKey from 0 fd 1\n' + assert node.query("SELECT * FROM test_table") == 'Key from 4 fd 3\nKey from 3 fd 2\nKey from 0 fd 1\n' + assert node.query("SELECT * FROM test_table") == 'Key from 4 fd 3\nKey from 3 fd 2\nKey from 0 fd 1\n' + + node.query("DROP TABLE test_table") + + node.query(query.format(source='(SELECT id FROM test_data_table), (SELECT 2), (SELECT 3)')) + + assert node.query("SELECT * FROM test_table") == 'Key from 4 fd 3\nKey from 3 fd 2\nKey from 0 fd 0\nKey from 0 fd 1\nKey from 0 fd 2\n' + assert node.query("SELECT * FROM test_table") == 'Key from 4 fd 3\nKey from 3 fd 2\nKey from 0 fd 0\nKey from 0 fd 1\nKey from 0 fd 2\n' + assert node.query("SELECT * FROM test_table") == 'Key from 4 fd 3\nKey from 3 fd 2\nKey from 0 fd 0\nKey from 0 fd 1\nKey from 0 fd 2\n' + + node.query("DROP TABLE test_table") + +def test_executable_pool_storage_input_count_python(started_cluster): + skip_test_msan(node) + + query = "CREATE TABLE test_table (value String) ENGINE=ExecutablePool('input_count_pool.py', 'TabSeparated', {source}) SETTINGS send_chunk_header=1, pool_size=1" + + node.query("DROP TABLE IF EXISTS test_table") + node.query(query.format(source='(SELECT 1)')) + + assert node.query("SELECT * FROM test_table") == '1\n' + assert node.query("SELECT * FROM test_table") == '1\n' + assert node.query("SELECT * FROM test_table") == '1\n' + + node.query("DROP TABLE test_table") + + node.query(query.format(source='(SELECT number FROM system.numbers LIMIT 250000)')) + + assert node.query("SELECT * FROM test_table") == '250000\n' + assert node.query("SELECT * FROM test_table") == '250000\n' + assert node.query("SELECT * FROM test_table") == '250000\n' + + node.query("DROP TABLE test_table") diff --git a/tests/integration/test_executable_table_function/user_scripts/input.py b/tests/integration/test_executable_table_function/user_scripts/input.py new file mode 100755 index 00000000000..835ab1f441a --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input.py @@ -0,0 +1,8 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + for line in sys.stdin: + print("Key " + line, end='') + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/test_input.sh b/tests/integration/test_executable_table_function/user_scripts/input.sh similarity index 100% rename from tests/integration/test_executable_table_function/user_scripts/test_input.sh rename to tests/integration/test_executable_table_function/user_scripts/input.sh diff --git a/tests/integration/test_executable_table_function/user_scripts/input_argument.py b/tests/integration/test_executable_table_function/user_scripts/input_argument.py new file mode 100755 index 00000000000..c1b2e5966d7 --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input_argument.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + arg = int(sys.argv[1]) + + for line in sys.stdin: + print("Key " + str(arg) + " " + line, end='') + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/input_argument_pool.py b/tests/integration/test_executable_table_function/user_scripts/input_argument_pool.py new file mode 100755 index 00000000000..378a6ef4391 --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input_argument_pool.py @@ -0,0 +1,17 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + arg = int(sys.argv[1]) + + for chunk_header in sys.stdin: + chunk_length = int(chunk_header) + print(str(chunk_length), end='\n') + + while chunk_length != 0: + line = sys.stdin.readline() + chunk_length -= 1 + print("Key " + str(arg) + " " + line, end='') + + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/input_chunk_header.py b/tests/integration/test_executable_table_function/user_scripts/input_chunk_header.py new file mode 100755 index 00000000000..5dc03e1c507 --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input_chunk_header.py @@ -0,0 +1,14 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + for chunk_header in sys.stdin: + chunk_length = int(chunk_header) + + while chunk_length != 0: + line = sys.stdin.readline() + chunk_length -= 1 + print("Key " + line, end='') + + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/input_count_pool.py b/tests/integration/test_executable_table_function/user_scripts/input_count_pool.py new file mode 100755 index 00000000000..8b744168a82 --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input_count_pool.py @@ -0,0 +1,15 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + for chunk_header in sys.stdin: + chunk_length = int(chunk_header) + print(1, end='\n') + print(str(chunk_length), end='\n') + + while chunk_length != 0: + line = sys.stdin.readline() + chunk_length -= 1 + + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/input_multiple_pipes.py b/tests/integration/test_executable_table_function/user_scripts/input_multiple_pipes.py new file mode 100755 index 00000000000..64590cbc16a --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input_multiple_pipes.py @@ -0,0 +1,19 @@ +#!/usr/bin/python3 + +import sys +import os + +if __name__ == '__main__': + fd3 = os.fdopen(3) + fd4 = os.fdopen(4) + + for line in fd4: + print("Key from 4 fd " + line, end='') + + for line in fd3: + print("Key from 3 fd " + line, end='') + + for line in sys.stdin: + print("Key from 0 fd " + line, end='') + + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/input_multiple_pipes_pool.py b/tests/integration/test_executable_table_function/user_scripts/input_multiple_pipes_pool.py new file mode 100755 index 00000000000..a3a515899f9 --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input_multiple_pipes_pool.py @@ -0,0 +1,45 @@ +#!/usr/bin/python3 + +import sys +import os + +if __name__ == '__main__': + fd3 = os.fdopen(3) + fd4 = os.fdopen(4) + + lines = [] + + for chunk_header_fd4 in fd4: + fd4_chunk_length = int(chunk_header_fd4) + + while fd4_chunk_length != 0: + line = fd4.readline() + fd4_chunk_length -= 1 + lines.append("Key from 4 fd " + line) + + for chunk_header_fd3 in fd3: + fd3_chunk_length = int(chunk_header_fd3) + + while fd3_chunk_length != 0: + line = fd3.readline() + fd3_chunk_length -= 1 + lines.append("Key from 3 fd " + line) + + for chunk_header in sys.stdin: + chunk_length = int(chunk_header) + + while chunk_length != 0: + line = sys.stdin.readline() + chunk_length -= 1 + lines.append("Key from 0 fd " + line) + + break + break + + print(str(len(lines)), end='\n') + + for line in lines: + print(line, end='') + lines.clear() + + sys.stdout.flush() \ No newline at end of file diff --git a/tests/integration/test_executable_table_function/user_scripts/input_pool.py b/tests/integration/test_executable_table_function/user_scripts/input_pool.py new file mode 100755 index 00000000000..ec4e9af23cd --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input_pool.py @@ -0,0 +1,15 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + for chunk_header in sys.stdin: + chunk_length = int(chunk_header) + print(str(chunk_length), end='\n') + + while chunk_length != 0: + line = sys.stdin.readline() + chunk_length -= 1 + print("Key " + line, end='') + + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/input_signalled.py b/tests/integration/test_executable_table_function/user_scripts/input_signalled.py new file mode 100755 index 00000000000..93ce20fa8e7 --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input_signalled.py @@ -0,0 +1,12 @@ +#!/usr/bin/python3 + +import sys +import os +import signal + +if __name__ == '__main__': + for line in sys.stdin: + os.signal(os.getpid(), signal.SIGTERM) + + print("Key " + line, end='') + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/input_signalled_pool.py b/tests/integration/test_executable_table_function/user_scripts/input_signalled_pool.py new file mode 100755 index 00000000000..1ea0eddbd8d --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input_signalled_pool.py @@ -0,0 +1,19 @@ +#!/usr/bin/python3 + +import sys +import os +import signal + +if __name__ == '__main__': + for chunk_header in sys.stdin: + os.signal(os.getpid(), signal.SIGTERM) + + chunk_length = int(chunk_header) + print(str(chunk_length), end='\n') + + while chunk_length != 0: + line = sys.stdin.readline() + chunk_length -= 1 + print("Key " + line, end='') + + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/input_slow.py b/tests/integration/test_executable_table_function/user_scripts/input_slow.py new file mode 100755 index 00000000000..4c2abe89e33 --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input_slow.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys +import time + +if __name__ == '__main__': + for line in sys.stdin: + time.sleep(25) + print("Key " + line, end='') + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/input_slow_pool.py b/tests/integration/test_executable_table_function/user_scripts/input_slow_pool.py new file mode 100755 index 00000000000..c8df7e18c4c --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input_slow_pool.py @@ -0,0 +1,18 @@ +#!/usr/bin/python3 + +import sys +import time + +if __name__ == '__main__': + for chunk_header in sys.stdin: + time.sleep(25) + + chunk_length = int(chunk_header) + print(str(chunk_length), end='\n') + + while chunk_length != 0: + line = sys.stdin.readline() + chunk_length -= 1 + print("Key " + line, end='') + + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/input_sum.py b/tests/integration/test_executable_table_function/user_scripts/input_sum.py new file mode 100755 index 00000000000..432d7a13a2f --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input_sum.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys +import re + +if __name__ == '__main__': + for line in sys.stdin: + line_split = re.split(r'\t+', line) + print(int(line_split[0]) + int(line_split[1]), end='\n') + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/input_sum_pool.py b/tests/integration/test_executable_table_function/user_scripts/input_sum_pool.py new file mode 100755 index 00000000000..cd0de25fe87 --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/input_sum_pool.py @@ -0,0 +1,17 @@ +#!/usr/bin/python3 + +import sys +import re + +if __name__ == '__main__': + for chunk_header in sys.stdin: + chunk_length = int(chunk_header) + print(str(chunk_length), end='\n') + + while chunk_length != 0: + line = sys.stdin.readline() + line_split = re.split(r'\t+', line) + print(int(line_split[0]) + int(line_split[1]), end='\n') + chunk_length -= 1 + + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/no_input.py b/tests/integration/test_executable_table_function/user_scripts/no_input.py new file mode 100755 index 00000000000..65b78f3d755 --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/no_input.py @@ -0,0 +1,9 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + print("Key 0") + print("Key 1") + print("Key 2") + sys.stdout.flush() diff --git a/tests/integration/test_executable_table_function/user_scripts/no_input.sh b/tests/integration/test_executable_table_function/user_scripts/no_input.sh new file mode 100755 index 00000000000..13d172a5be4 --- /dev/null +++ b/tests/integration/test_executable_table_function/user_scripts/no_input.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +printf "Key 0\n"; +printf "Key 1\n"; +printf "Key 2\n"; diff --git a/tests/integration/test_executable_table_function/user_scripts/test_argument.sh b/tests/integration/test_executable_table_function/user_scripts/test_argument.sh deleted file mode 100755 index 89634031d2b..00000000000 --- a/tests/integration/test_executable_table_function/user_scripts/test_argument.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -echo "Key $1" diff --git a/tests/integration/test_executable_table_function/user_scripts/test_input_multiple_pipes.sh b/tests/integration/test_executable_table_function/user_scripts/test_input_multiple_pipes.sh deleted file mode 100755 index 1e53e3211dc..00000000000 --- a/tests/integration/test_executable_table_function/user_scripts/test_input_multiple_pipes.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -while read -t 250 -u 4 read_data; do printf "Key from 4 fd $read_data\n"; done -while read -t 250 -u 3 read_data; do printf "Key from 3 fd $read_data\n"; done -while read -t 250 read_data; do printf "Key from 0 fd $read_data\n"; done diff --git a/tests/integration/test_executable_table_function/user_scripts/test_no_input.sh b/tests/integration/test_executable_table_function/user_scripts/test_no_input.sh deleted file mode 100755 index 9e8b3be63d6..00000000000 --- a/tests/integration/test_executable_table_function/user_scripts/test_no_input.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -echo "1" diff --git a/tests/integration/test_executable_user_defined_function/__init__.py b/tests/integration/test_executable_user_defined_function/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_executable_user_defined_function/config/executable_user_defined_functions_config.xml b/tests/integration/test_executable_user_defined_function/config/executable_user_defined_functions_config.xml new file mode 100644 index 00000000000..3cbf717bb67 --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/config/executable_user_defined_functions_config.xml @@ -0,0 +1,2 @@ + + diff --git a/tests/integration/test_executable_user_defined_function/functions/test_function_config.xml b/tests/integration/test_executable_user_defined_function/functions/test_function_config.xml new file mode 100644 index 00000000000..d8f81a588a2 --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/functions/test_function_config.xml @@ -0,0 +1,196 @@ + + + executable + test_function_bash + String + + UInt64 + + TabSeparated + input.sh + + + + executable_pool + test_function_pool_bash + String + + UInt64 + + TabSeparated + input.sh + + + + executable + test_function_python + String + + UInt64 + + TabSeparated + input.py + + + + executable_pool + test_function_pool_python + String + + UInt64 + + TabSeparated + input.py + + + + executable + test_function_send_chunk_header_python + String + + UInt64 + + TabSeparated + 1 + input_chunk_header.py + + + + executable_pool + test_function_send_chunk_header_pool_python + String + + UInt64 + + TabSeparated + 1 + input_chunk_header.py + + + + executable + test_function_sum_python + String + + UInt64 + + + UInt64 + + TabSeparated + input_sum.py + + + + executable_pool + test_function_sum_pool_python + String + + UInt64 + + + UInt64 + + TabSeparated + input_sum.py + + + + executable + test_function_argument_python + String + + UInt64 + + TabSeparated + input_argument.py 1 + + + + executable_pool + test_function_argument_pool_python + String + + UInt64 + + TabSeparated + input_argument.py 1 + + + + executable + test_function_slow_python + String + + UInt64 + + TabSeparated + input_slow.py + 1 + 1000 + + + + executable_pool + test_function_slow_pool_python + String + + UInt64 + + TabSeparated + input_slow.py + 1 + 1000 + + + + executable + test_function_signalled_python + String + + UInt64 + + TabSeparated + input_signalled.py + 1 + 1000 + + + + executable_pool + test_function_signalled_pool_python + String + + UInt64 + + TabSeparated + input_signalled.py + 1 + 1000 + + + + executable + test_function_non_direct_bash + String + + UInt64 + + TabSeparated + while read read_data; do printf "Key $read_data\n"; done + 0 + + + + executable_pool + test_function_non_direct_pool_bash + String + + UInt64 + + TabSeparated + while read read_data; do printf "Key $read_data\n"; done + 0 + + + diff --git a/tests/integration/test_executable_user_defined_function/test.py b/tests/integration/test_executable_user_defined_function/test.py new file mode 100644 index 00000000000..94afdf8d8a9 --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/test.py @@ -0,0 +1,106 @@ +import os +import sys +import time + +import pytest + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', stay_alive=True, main_configs=[]) + + +def skip_test_msan(instance): + if instance.is_built_with_memory_sanitizer(): + pytest.skip("Memory Sanitizer cannot work with vfork") + +def copy_file_to_container(local_path, dist_path, container_id): + os.system("docker cp {local} {cont_id}:{dist}".format(local=local_path, cont_id=container_id, dist=dist_path)) + +config = ''' + /etc/clickhouse-server/functions/test_function_config.xml +''' + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node.replace_config("/etc/clickhouse-server/config.d/executable_user_defined_functions_config.xml", config) + + copy_file_to_container(os.path.join(SCRIPT_DIR, 'functions/.'), '/etc/clickhouse-server/functions', node.docker_id) + copy_file_to_container(os.path.join(SCRIPT_DIR, 'user_scripts/.'), '/var/lib/clickhouse/user_scripts', node.docker_id) + + node.restart_clickhouse() + + yield cluster + + finally: + cluster.shutdown() + +def test_executable_function_bash(started_cluster): + skip_test_msan(node) + assert node.query("SELECT test_function_bash(toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT test_function_bash(1)") == 'Key 1\n' + + assert node.query("SELECT test_function_pool_bash(toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT test_function_pool_bash(1)") == 'Key 1\n' + +def test_executable_function_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT test_function_python(toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT test_function_python(1)") == 'Key 1\n' + + assert node.query("SELECT test_function_pool_python(toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT test_function_pool_python(1)") == 'Key 1\n' + +def test_executable_function_send_chunk_header_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT test_function_send_chunk_header_python(toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT test_function_send_chunk_header_python(1)") == 'Key 1\n' + + assert node.query("SELECT test_function_send_chunk_header_pool_python(toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT test_function_send_chunk_header_pool_python(1)") == 'Key 1\n' + +def test_executable_function_sum_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT test_function_sum_python(toUInt64(1), toUInt64(1))") == '2\n' + assert node.query("SELECT test_function_sum_python(1, 1)") == '2\n' + + assert node.query("SELECT test_function_sum_pool_python(toUInt64(1), toUInt64(1))") == '2\n' + assert node.query("SELECT test_function_sum_pool_python(1, 1)") == '2\n' + +def test_executable_function_argument_python(started_cluster): + skip_test_msan(node) + assert node.query("SELECT test_function_argument_python(toUInt64(1))") == 'Key 1 1\n' + assert node.query("SELECT test_function_argument_python(1)") == 'Key 1 1\n' + + assert node.query("SELECT test_function_argument_pool_python(toUInt64(1))") == 'Key 1 1\n' + assert node.query("SELECT test_function_argument_pool_python(1)") == 'Key 1 1\n' + +def test_executable_function_signalled_python(started_cluster): + skip_test_msan(node) + assert node.query_and_get_error("SELECT test_function_signalled_python(toUInt64(1))") + assert node.query_and_get_error("SELECT test_function_signalled_python(1)") + + assert node.query_and_get_error("SELECT test_function_signalled_pool_python(toUInt64(1))") + assert node.query_and_get_error("SELECT test_function_signalled_pool_python(1)") + +def test_executable_function_slow_python(started_cluster): + skip_test_msan(node) + assert node.query_and_get_error("SELECT test_function_slow_python(toUInt64(1))") + assert node.query_and_get_error("SELECT test_function_slow_python(1)") + + assert node.query_and_get_error("SELECT test_function_slow_pool_python(toUInt64(1))") + assert node.query_and_get_error("SELECT test_function_slow_pool_python(1)") + +def test_executable_function_non_direct_bash(started_cluster): + skip_test_msan(node) + assert node.query("SELECT test_function_non_direct_bash(toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT test_function_non_direct_bash(1)") == 'Key 1\n' + + assert node.query("SELECT test_function_non_direct_pool_bash(toUInt64(1))") == 'Key 1\n' + assert node.query("SELECT test_function_non_direct_pool_bash(1)") == 'Key 1\n' diff --git a/tests/integration/test_executable_user_defined_function/user_scripts/input.py b/tests/integration/test_executable_user_defined_function/user_scripts/input.py new file mode 100755 index 00000000000..835ab1f441a --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/user_scripts/input.py @@ -0,0 +1,8 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + for line in sys.stdin: + print("Key " + line, end='') + sys.stdout.flush() diff --git a/tests/integration/test_executable_user_defined_function/user_scripts/input.sh b/tests/integration/test_executable_user_defined_function/user_scripts/input.sh new file mode 100755 index 00000000000..aea51b82b1f --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/user_scripts/input.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +while read read_data; + do printf "Key $read_data\n"; +done diff --git a/tests/integration/test_executable_user_defined_function/user_scripts/input_argument.py b/tests/integration/test_executable_user_defined_function/user_scripts/input_argument.py new file mode 100755 index 00000000000..c1b2e5966d7 --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/user_scripts/input_argument.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + arg = int(sys.argv[1]) + + for line in sys.stdin: + print("Key " + str(arg) + " " + line, end='') + sys.stdout.flush() diff --git a/tests/integration/test_executable_user_defined_function/user_scripts/input_chunk_header.py b/tests/integration/test_executable_user_defined_function/user_scripts/input_chunk_header.py new file mode 100755 index 00000000000..5dc03e1c507 --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/user_scripts/input_chunk_header.py @@ -0,0 +1,14 @@ +#!/usr/bin/python3 + +import sys + +if __name__ == '__main__': + for chunk_header in sys.stdin: + chunk_length = int(chunk_header) + + while chunk_length != 0: + line = sys.stdin.readline() + chunk_length -= 1 + print("Key " + line, end='') + + sys.stdout.flush() diff --git a/tests/integration/test_executable_user_defined_function/user_scripts/input_signalled.py b/tests/integration/test_executable_user_defined_function/user_scripts/input_signalled.py new file mode 100755 index 00000000000..27c8bc4840e --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/user_scripts/input_signalled.py @@ -0,0 +1,13 @@ +#!/usr/bin/python3 + +import sys +import os +import signal +import time + +if __name__ == '__main__': + for line in sys.stdin: + os.signal(os.getpid(), signal.SIGTERM) + + print("Key " + line, end='') + sys.stdout.flush() diff --git a/tests/integration/test_executable_user_defined_function/user_scripts/input_slow.py b/tests/integration/test_executable_user_defined_function/user_scripts/input_slow.py new file mode 100755 index 00000000000..648a9eac918 --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/user_scripts/input_slow.py @@ -0,0 +1,12 @@ +#!/usr/bin/python3 + +import sys +import os +import signal +import time + +if __name__ == '__main__': + for line in sys.stdin: + time.sleep(5) + print("Key " + line, end='') + sys.stdout.flush() diff --git a/tests/integration/test_executable_user_defined_function/user_scripts/input_sum.py b/tests/integration/test_executable_user_defined_function/user_scripts/input_sum.py new file mode 100755 index 00000000000..432d7a13a2f --- /dev/null +++ b/tests/integration/test_executable_user_defined_function/user_scripts/input_sum.py @@ -0,0 +1,10 @@ +#!/usr/bin/python3 + +import sys +import re + +if __name__ == '__main__': + for line in sys.stdin: + line_split = re.split(r'\t+', line) + print(int(line_split[0]) + int(line_split[1]), end='\n') + sys.stdout.flush() diff --git a/tests/integration/test_executable_user_defined_functions_config_reload/functions/test_function_config.xml b/tests/integration/test_executable_user_defined_functions_config_reload/functions/test_function_config.xml index f2a7d6e67b1..d0bd6e5ab88 100644 --- a/tests/integration/test_executable_user_defined_functions_config_reload/functions/test_function_config.xml +++ b/tests/integration/test_executable_user_defined_functions_config_reload/functions/test_function_config.xml @@ -7,8 +7,7 @@ UInt64 TabSeparated - while read read_data; do printf "Key_1 $read_data\n"; done - 0 + test_input_1.sh diff --git a/tests/integration/test_executable_user_defined_functions_config_reload/functions/test_function_config2.xml b/tests/integration/test_executable_user_defined_functions_config_reload/functions/test_function_config2.xml index fe02146a6b8..80ae21a086d 100644 --- a/tests/integration/test_executable_user_defined_functions_config_reload/functions/test_function_config2.xml +++ b/tests/integration/test_executable_user_defined_functions_config_reload/functions/test_function_config2.xml @@ -7,8 +7,7 @@ UInt64 TabSeparated - while read read_data; do printf "Key_2 $read_data\n"; done - 0 + test_input_2.sh diff --git a/tests/integration/test_executable_user_defined_functions_config_reload/test.py b/tests/integration/test_executable_user_defined_functions_config_reload/test.py index 3117b3e72b1..629c426a28c 100644 --- a/tests/integration/test_executable_user_defined_functions_config_reload/test.py +++ b/tests/integration/test_executable_user_defined_functions_config_reload/test.py @@ -28,6 +28,8 @@ def started_cluster(): cluster.start() copy_file_to_container(os.path.join(SCRIPT_DIR, 'functions/.'), '/etc/clickhouse-server/functions', node.docker_id) + copy_file_to_container(os.path.join(SCRIPT_DIR, 'user_scripts/.'), '/var/lib/clickhouse/user_scripts', node.docker_id) + node.restart_clickhouse() yield cluster diff --git a/tests/integration/test_executable_user_defined_functions_config_reload/user_scripts/test_input_1.sh b/tests/integration/test_executable_user_defined_functions_config_reload/user_scripts/test_input_1.sh new file mode 100755 index 00000000000..a6cffe83bba --- /dev/null +++ b/tests/integration/test_executable_user_defined_functions_config_reload/user_scripts/test_input_1.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +while read read_data; + do printf "Key_1 $read_data\n"; +done diff --git a/tests/integration/test_executable_user_defined_functions_config_reload/user_scripts/test_input_2.sh b/tests/integration/test_executable_user_defined_functions_config_reload/user_scripts/test_input_2.sh new file mode 100755 index 00000000000..a673cfd18fb --- /dev/null +++ b/tests/integration/test_executable_user_defined_functions_config_reload/user_scripts/test_input_2.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +while read read_data; + do printf "Key_2 $read_data\n"; +done diff --git a/tests/integration/test_groupBitmapAnd_on_distributed/__init__.py b/tests/integration/test_groupBitmapAnd_on_distributed/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_groupBitmapAnd_on_distributed/configs/clusters.xml b/tests/integration/test_groupBitmapAnd_on_distributed/configs/clusters.xml new file mode 100644 index 00000000000..5ac57bed6a6 --- /dev/null +++ b/tests/integration/test_groupBitmapAnd_on_distributed/configs/clusters.xml @@ -0,0 +1,32 @@ + + + + + + node1 + 9000 + + + + + node2 + 9000 + + + + + + + node3 + 9000 + + + + + node4 + 9000 + + + + + diff --git a/tests/integration/test_groupBitmapAnd_on_distributed/test.py b/tests/integration/test_groupBitmapAnd_on_distributed/test.py new file mode 100644 index 00000000000..b0fb55b13ff --- /dev/null +++ b/tests/integration/test_groupBitmapAnd_on_distributed/test.py @@ -0,0 +1,82 @@ +import pytest + +from helpers.cluster import ClickHouseCluster +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', main_configs=["configs/clusters.xml"], with_zookeeper=True) +node2 = cluster.add_instance('node2', main_configs=["configs/clusters.xml"], with_zookeeper=True) +node3 = cluster.add_instance('node3', main_configs=["configs/clusters.xml"], with_zookeeper=True) +node4 = cluster.add_instance('node4', main_configs=["configs/clusters.xml"], image='yandex/clickhouse-server', tag='21.5', with_zookeeper=True) + +def insert_data(node, table_name): + node.query("""INSERT INTO {} + VALUES (bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32))));""".format(table_name)) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_groupBitmapAnd_on_distributed_table(start_cluster): + local_table_name = "bitmap_column_expr_test" + distributed_table_name = "bitmap_column_expr_test_dst" + cluster_name = "awesome_cluster" + + for node in (node1, node2): + node.query("""CREATE TABLE {} + ( + z AggregateFunction(groupBitmap, UInt32) + ) + ENGINE = MergeTree() + ORDER BY tuple()""".format(local_table_name)) + + node.query("""CREATE TABLE {} + ( + z AggregateFunction(groupBitmap, UInt32) + ) + ENGINE = Distributed('{}', 'default', '{}')""".format(distributed_table_name, cluster_name, local_table_name)) + insert_data(node1, local_table_name) + + expected = "10" + + for node in (node1, node2): + result = node.query("select groupBitmapAnd(z) FROM {};".format(distributed_table_name)).strip() + assert(result == expected) + +def test_groupBitmapAnd_function_versioning(start_cluster): + local_table_name = "bitmap_column_expr_versioning_test" + distributed_table_name = "bitmap_column_expr_versioning_test_dst" + cluster_name = "test_version_cluster" + + for node in (node3, node4): + node.query("""CREATE TABLE {} + ( + z AggregateFunction(groupBitmap, UInt32) + ) + ENGINE = MergeTree() + ORDER BY tuple()""".format(local_table_name)) + + node.query("""CREATE TABLE {} + ( + z AggregateFunction(groupBitmap, UInt32) + ) + ENGINE = Distributed('{}', 'default', '{}')""".format(distributed_table_name, cluster_name, local_table_name)) + + node.query("""INSERT INTO {} VALUES + (bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32))));""".format(local_table_name)) + + expected = "10" + new_version_distributed_result = node3.query("select groupBitmapAnd(z) FROM {};".format(distributed_table_name)).strip() + old_version_distributed_result = node4.query("select groupBitmapAnd(z) FROM {};".format(distributed_table_name)).strip() + assert(new_version_distributed_result == expected) + assert(old_version_distributed_result == expected) + + result_from_old_to_new_version = node3.query("select groupBitmapAnd(z) FROM remote('node4', default.{})".format(local_table_name)).strip() + assert(result_from_old_to_new_version == expected) + + result_from_new_to_old_version = node4.query("select groupBitmapAnd(z) FROM remote('node3', default.{})".format(local_table_name)).strip() + assert(result_from_new_to_old_version == expected) diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index c892fc94712..e17ed0d9c8e 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -5,6 +5,8 @@ import time import grpc from helpers.cluster import ClickHouseCluster, run_and_check from threading import Thread +import gzip +import lz4.frame GRPC_PORT = 9100 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -365,3 +367,67 @@ def test_result_compression(): stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) result = stub.ExecuteQuery(query_info) assert result.output == (b'0\n')*1000000 + +def test_compressed_output(): + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000)", compression_type="lz4") + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + result = stub.ExecuteQuery(query_info) + assert lz4.frame.decompress(result.output) == (b'0\n')*1000 + +def test_compressed_output_streaming(): + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(100000)", compression_type="lz4") + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + d_context = lz4.frame.create_decompression_context() + data = b'' + for result in stub.ExecuteQueryWithStreamOutput(query_info): + d1, _, _ = lz4.frame.decompress_chunk(d_context, result.output) + data += d1 + assert data == (b'0\n')*100000 + +def test_compressed_output_gzip(): + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000)", compression_type="gzip", compression_level=6) + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + result = stub.ExecuteQuery(query_info) + assert gzip.decompress(result.output) == (b'0\n')*1000 + +def test_compressed_totals_and_extremes(): + query("CREATE TABLE t (x UInt8, y UInt8) ENGINE = Memory") + query("INSERT INTO t VALUES (1, 2), (2, 4), (3, 2), (3, 3), (3, 4)") + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT sum(x), y FROM t GROUP BY y WITH TOTALS", compression_type="lz4") + result = stub.ExecuteQuery(query_info) + assert lz4.frame.decompress(result.totals) == b'12\t0\n' + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT x, y FROM t", settings={"extremes": "1"}, compression_type="lz4") + result = stub.ExecuteQuery(query_info) + assert lz4.frame.decompress(result.extremes) == b'1\t2\n3\t4\n' + +def test_compressed_insert_query_streaming(): + query("CREATE TABLE t (a UInt8) ENGINE = Memory") + data = lz4.frame.compress(b'(1),(2),(3),(5),(4),(6),(7),(8),(9)') + sz1 = len(data) // 3 + sz2 = len(data) // 3 + d1 = data[:sz1] + d2 = data[sz1:sz1+sz2] + d3 = data[sz1+sz2:] + def send_query_info(): + yield clickhouse_grpc_pb2.QueryInfo(query="INSERT INTO t VALUES", input_data=d1, compression_type="lz4", next_query_info=True) + yield clickhouse_grpc_pb2.QueryInfo(input_data=d2, next_query_info=True) + yield clickhouse_grpc_pb2.QueryInfo(input_data=d3) + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + stub.ExecuteQueryWithStreamInput(send_query_info()) + assert query("SELECT a FROM t ORDER BY a") == "1\n2\n3\n4\n5\n6\n7\n8\n9\n" + +def test_compressed_external_table(): + columns = [clickhouse_grpc_pb2.NameAndType(name='UserID', type='UInt64'), clickhouse_grpc_pb2.NameAndType(name='UserName', type='String')] + d1 = lz4.frame.compress(b'1\tAlex\n2\tBen\n3\tCarl\n') + d2 = gzip.compress(b'4,Daniel\n5,Ethan\n') + ext1 = clickhouse_grpc_pb2.ExternalTable(name='ext1', columns=columns, data=d1, format='TabSeparated', compression_type="lz4") + ext2 = clickhouse_grpc_pb2.ExternalTable(name='ext2', columns=columns, data=d2, format='CSV', compression_type="gzip") + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT * FROM (SELECT * FROM ext1 UNION ALL SELECT * FROM ext2) ORDER BY UserID", external_tables=[ext1, ext2]) + result = stub.ExecuteQuery(query_info) + assert result.output == b"1\tAlex\n"\ + b"2\tBen\n"\ + b"3\tCarl\n"\ + b"4\tDaniel\n"\ + b"5\tEthan\n" diff --git a/tests/integration/test_http_handlers_config/test.py b/tests/integration/test_http_handlers_config/test.py index 818a1e54640..01872a1d0c3 100644 --- a/tests/integration/test_http_handlers_config/test.py +++ b/tests/integration/test_http_handlers_config/test.py @@ -58,9 +58,9 @@ def test_predefined_query_handler(): 'test_predefined_handler_get?max_threads=1&setting_name=max_threads', method='GET', headers={'XXX': 'xxx'}).content - assert b'max_threads\t1\nmax_alter_threads\t1\n' == cluster.instance.http_request( - 'query_param_with_url/max_threads?max_threads=1&max_alter_threads=1', - headers={'XXX': 'max_alter_threads'}).content + assert b'max_final_threads\t1\nmax_threads\t1\n' == cluster.instance.http_request( + 'query_param_with_url/max_threads?max_threads=1&max_final_threads=1', + headers={'XXX': 'max_final_threads'}).content def test_fixed_static_handler(): diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml index 3e4c885d1f6..3adba1d402a 100644 --- a/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml +++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/configs/conf.xml @@ -1,4 +1,23 @@ - - 3000000000 + + 4000000000 + + + + + + + + + + + + + + diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py index bc7f32bf544..1c686c7982e 100644 --- a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py +++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py @@ -24,16 +24,13 @@ def start_cluster(): # max_memory_usage_for_user cannot be used, since the memory for user accounted -# correctly, only total is not +# correctly, only total is not (it is set via conf.xml) def test_memory_tracking_total(): - instance.query(''' - CREATE TABLE null (row String) ENGINE=Null; - ''') + instance.query('CREATE TABLE null (row String) ENGINE=Null') instance.exec_in_container(['bash', '-c', 'clickhouse local -q "SELECT arrayStringConcat(arrayMap(x->toString(cityHash64(x)), range(1000)), \' \') from numbers(10000)" > data.json']) for it in range(0, 20): # the problem can be triggered only via HTTP, # since clickhouse-client parses the data by itself. assert instance.exec_in_container(['curl', '--silent', '--show-error', '--data-binary', '@data.json', - 'http://127.1:8123/?query=INSERT%20INTO%20null%20FORMAT%20TSV']) == '', 'Failed on {} iteration'.format( - it) + 'http://127.1:8123/?query=INSERT%20INTO%20null%20FORMAT%20TSV']) == '', f'Failed on {it} iteration' diff --git a/tests/integration/test_jemalloc_percpu_arena/__init__.py b/tests/integration/test_jemalloc_percpu_arena/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_jemalloc_percpu_arena/test.py b/tests/integration/test_jemalloc_percpu_arena/test.py new file mode 100755 index 00000000000..bdd0ada966f --- /dev/null +++ b/tests/integration/test_jemalloc_percpu_arena/test.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# pylint: disable=line-too-long + +import os +import subprocess +import multiprocessing +from tempfile import NamedTemporaryFile +import pytest + + +CPU_ID = 4 + + +def run_command_in_container(cmd, *args): + # /clickhouse is mounted by interation tests runner + alternative_binary = os.getenv('CLICKHOUSE_BINARY', '/clickhouse') + if alternative_binary: + args+=( + '--volume', f'{alternative_binary}:/usr/bin/clickhouse', + ) + + return subprocess.check_output(['docker', 'run', '--rm', + *args, + 'ubuntu:20.04', + 'sh', '-c', cmd, + ]) + + +def run_with_cpu_limit(cmd, *args): + with NamedTemporaryFile() as online_cpu: + # NOTE: this is not the number of CPUs, but specific CPU ID + online_cpu.write(f'{CPU_ID}'.encode()) + online_cpu.flush() + + # replace /sys/devices/system/cpu/online to full _SC_NPROCESSORS_ONLN + # like LXD/LXC from [1] does. + # + # [1]: https://github.com/ClickHouse/ClickHouse/issues/32806 + args+=( + '--volume', f'{online_cpu.name}:/sys/devices/system/cpu/online', + ) + + return run_command_in_container(cmd, *args) + + +def skip_if_jemalloc_disabled(): + output = run_command_in_container("""clickhouse local -q " + SELECT value FROM system.build_options WHERE name = 'USE_JEMALLOC'" + """).strip() + if output != b'ON' and output != b'1': + pytest.skip(f'Compiled w/o jemalloc (USE_JEMALLOC={output})') + +# Ensure that clickhouse works even when number of online CPUs +# (_SC_NPROCESSORS_ONLN) is smaller then available (_SC_NPROCESSORS_CONF). +# +# Refs: https://github.com/jemalloc/jemalloc/pull/2181 +def test_jemalloc_percpu_arena(): + skip_if_jemalloc_disabled() + + assert multiprocessing.cpu_count() > CPU_ID + + online_cpus = int(run_with_cpu_limit('getconf _NPROCESSORS_ONLN')) + assert online_cpus == 1, online_cpus + + all_cpus = int(run_with_cpu_limit('getconf _NPROCESSORS_CONF')) + assert all_cpus == multiprocessing.cpu_count(), all_cpus + + # implicitly disable percpu arena + result = run_with_cpu_limit('clickhouse local -q "select 1"', + # NOTE: explicitly disable, since it is enabled by default in debug build + # (and even though debug builds are not in CI let's state this). + '--env', 'MALLOC_CONF=abort_conf:false') + assert int(result) == int(1), result + + # should fail because of abort_conf:true + with pytest.raises(subprocess.CalledProcessError): + run_with_cpu_limit('clickhouse local -q "select 1"', + '--env', 'MALLOC_CONF=abort_conf:true') + + # should not fail even with abort_conf:true, due to explicit narenas + # NOTE: abort:false to make it compatible with debug build + run_with_cpu_limit('clickhouse local -q "select 1"', + '--env', f'MALLOC_CONF=abort_conf:true,abort:false,narenas:{all_cpus}') + +# For manual run. +if __name__ == '__main__': + test_jemalloc_percpu_arena() diff --git a/tests/integration/test_keeper_auth/test.py b/tests/integration/test_keeper_auth/test.py index 276fe3d8518..2df08cc94b7 100644 --- a/tests/integration/test_keeper_auth/test.py +++ b/tests/integration/test_keeper_auth/test.py @@ -43,12 +43,11 @@ def test_digest_auth_basic(started_cluster, get_zk): auth_connection.create("/test_no_acl", b"") auth_connection.create("/test_all_acl", b"data", acl=[make_acl("auth", "", all=True)]) - # for some reason original zookeeper accepts this ACL, but doesn't allow to do anything with this node - # even with correct credentials. - auth_connection.create("/test_all_digest_acl", b"dataX", acl=[make_acl("digest", "user1:password1", all=True)]) + # Consistent with zookeeper, accept generated digest + auth_connection.create("/test_all_digest_acl", b"dataX", acl=[make_acl("digest", "user1:XDkd2dsEuhc9ImU3q8pa8UOdtpI=", all=True)]) assert auth_connection.get("/test_all_acl")[0] == b"data" - #assert auth_connection.get("/test_all_digest_acl")[0] == b"dataX" + assert auth_connection.get("/test_all_digest_acl")[0] == b"dataX" no_auth_connection = get_zk() no_auth_connection.set("/test_no_acl", b"hello") diff --git a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py index 43fab165c53..7265105c8df 100644 --- a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py @@ -624,7 +624,7 @@ def err_sync_user_privs_with_materialized_mysql_database(clickhouse_node, mysql_ service_name)) assert "priv_err_db" in clickhouse_node.query("SHOW DATABASES") assert "test_table_1" not in clickhouse_node.query("SHOW TABLES FROM priv_err_db") - clickhouse_node.query("DETACH DATABASE priv_err_db") + clickhouse_node.query_with_retry("DETACH DATABASE priv_err_db") mysql_node.query("REVOKE SELECT ON priv_err_db.* FROM 'test'@'%'") time.sleep(3) @@ -743,7 +743,7 @@ def mysql_kill_sync_thread_restore_test(clickhouse_node, mysql_node, service_nam time.sleep(sleep_time) clickhouse_node.query("SELECT * FROM test_database.test_table") - clickhouse_node.query("DETACH DATABASE test_database") + clickhouse_node.query_with_retry("DETACH DATABASE test_database") clickhouse_node.query("ATTACH DATABASE test_database") check_query(clickhouse_node, "SELECT * FROM test_database.test_table ORDER BY id FORMAT TSV", '1\n2\n') @@ -784,7 +784,7 @@ def mysql_killed_while_insert(clickhouse_node, mysql_node, service_name): mysql_node.alloc_connection() - clickhouse_node.query("DETACH DATABASE kill_mysql_while_insert") + clickhouse_node.query_with_retry("DETACH DATABASE kill_mysql_while_insert") clickhouse_node.query("ATTACH DATABASE kill_mysql_while_insert") result = mysql_node.query_and_get_data("SELECT COUNT(1) FROM kill_mysql_while_insert.test") @@ -1066,9 +1066,83 @@ def table_overrides(clickhouse_node, mysql_node, service_name): mysql_node.query("COMMIT") clickhouse_node.query(f""" CREATE DATABASE table_overrides ENGINE=MaterializeMySQL('{service_name}:3306', 'table_overrides', 'root', 'clickhouse') - TABLE OVERRIDE t1 (COLUMNS (sensor_id UInt64)) + TABLE OVERRIDE t1 (COLUMNS (sensor_id UInt64, temp_f Nullable(Float32) ALIAS if(isNull(temperature), NULL, (temperature * 9 / 5) + 32))) """) + check_query( + clickhouse_node, + "SELECT type FROM system.columns WHERE database = 'table_overrides' AND table = 't1' AND name = 'sensor_id'", + "UInt64\n") + check_query( + clickhouse_node, + "SELECT type, default_kind FROM system.columns WHERE database = 'table_overrides' AND table = 't1' AND name = 'temp_f'", + "Nullable(Float32)\tALIAS\n") check_query(clickhouse_node, "SELECT count() FROM table_overrides.t1", "1000\n") - check_query(clickhouse_node, "SELECT type FROM system.columns WHERE database = 'table_overrides' AND table = 't1' AND name = 'sensor_id'", "UInt64\n") + mysql_node.query("INSERT INTO table_overrides.t1 VALUES(1001, '2021-10-01 00:00:00', 42.0)") + check_query(clickhouse_node, "SELECT count() FROM table_overrides.t1", "1001\n") clickhouse_node.query("DROP DATABASE IF EXISTS table_overrides") mysql_node.query("DROP DATABASE IF EXISTS table_overrides") + +def materialized_database_support_all_kinds_of_mysql_datatype(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS test_database_datatype") + clickhouse_node.query("DROP DATABASE IF EXISTS test_database_datatype") + mysql_node.query("CREATE DATABASE test_database_datatype DEFAULT CHARACTER SET 'utf8'") + mysql_node.query(""" + CREATE TABLE test_database_datatype.t1 ( + `v1` int(10) unsigned AUTO_INCREMENT, + `v2` TINYINT, + `v3` SMALLINT, + `v4` BIGINT, + `v5` int, + `v6` TINYINT unsigned, + `v7` SMALLINT unsigned, + `v8` BIGINT unsigned, + `v9` FLOAT, + `v10` FLOAT unsigned, + `v11` DOUBLE, + `v12` DOUBLE unsigned, + `v13` DECIMAL(5,4), + `v14` date, + `v15` TEXT, + `v16` varchar(100) , + `v17` BLOB, + `v18` datetime DEFAULT CURRENT_TIMESTAMP, + `v19` datetime(6) DEFAULT CURRENT_TIMESTAMP(6), + `v20` TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + `v21` TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP(6), + /* todo support */ + # `v22` YEAR, + # `v23` TIME, + # `v24` TIME(3), + # `v25` GEOMETRY, + `v26` bit(4), + # `v27` JSON DEFAULT NULL, + # `v28` set('a', 'c', 'f', 'd', 'e', 'b'), + `v29` mediumint(4) unsigned NOT NULL DEFAULT '0', + `v30` varbinary(255) DEFAULT NULL COMMENT 'varbinary support', + `v31` binary(200) DEFAULT NULL, + `v32` ENUM('RED','GREEN','BLUE'), + PRIMARY KEY (`v1`) + ) ENGINE=InnoDB; + """) + + mysql_node.query(""" + INSERT INTO test_database_datatype.t1 (v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v26, v29, v30, v31, v32) values + (1, 11, 9223372036854775807, -1, 1, 11, 18446744073709551615, -1.1, 1.1, -1.111, 1.111, 1.1111, '2021-10-06', 'text', 'varchar', 'BLOB', '2021-10-06 18:32:57', '2021-10-06 18:32:57.482786', '2021-10-06 18:32:57', '2021-10-06 18:32:57.482786', b'1010', 11, 'varbinary', 'binary', 'RED'); + """) + clickhouse_node.query( + "CREATE DATABASE test_database_datatype ENGINE = MaterializeMySQL('{}:3306', 'test_database_datatype', 'root', 'clickhouse')".format( + service_name)) + + check_query(clickhouse_node, "SELECT name FROM system.tables WHERE database = 'test_database_datatype'", "t1\n") + # full synchronization check + check_query(clickhouse_node, "SELECT v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v26, v29, v30, v32 FROM test_database_datatype.t1 FORMAT TSV", + "1\t1\t11\t9223372036854775807\t-1\t1\t11\t18446744073709551615\t-1.1\t1.1\t-1.111\t1.111\t1.1111\t2021-10-06\ttext\tvarchar\tBLOB\t2021-10-06 18:32:57\t2021-10-06 18:32:57.482786\t2021-10-06 18:32:57\t2021-10-06 18:32:57.482786\t10\t11\tvarbinary\tRED\n") + + mysql_node.query(""" + INSERT INTO test_database_datatype.t1 (v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v26, v29, v30, v31, v32) values + (2, 22, 9223372036854775807, -2, 2, 22, 18446744073709551615, -2.2, 2.2, -2.22, 2.222, 2.2222, '2021-10-07', 'text', 'varchar', 'BLOB', '2021-10-07 18:32:57', '2021-10-07 18:32:57.482786', '2021-10-07 18:32:57', '2021-10-07 18:32:57.482786', b'1011', 22, 'varbinary', 'binary', 'GREEN' ); + """) + # increment synchronization check + check_query(clickhouse_node, "SELECT v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v26, v29, v30, v32 FROM test_database_datatype.t1 ORDER BY v1 FORMAT TSV", + "1\t1\t11\t9223372036854775807\t-1\t1\t11\t18446744073709551615\t-1.1\t1.1\t-1.111\t1.111\t1.1111\t2021-10-06\ttext\tvarchar\tBLOB\t2021-10-06 18:32:57\t2021-10-06 18:32:57.482786\t2021-10-06 18:32:57\t2021-10-06 18:32:57.482786\t10\t11\tvarbinary\tRED\n" + + "2\t2\t22\t9223372036854775807\t-2\t2\t22\t18446744073709551615\t-2.2\t2.2\t-2.22\t2.222\t2.2222\t2021-10-07\ttext\tvarchar\tBLOB\t2021-10-07 18:32:57\t2021-10-07 18:32:57.482786\t2021-10-07 18:32:57\t2021-10-07 18:32:57.482786\t11\t22\tvarbinary\tGREEN\n") diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 5142a613799..501c0cd78fa 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -253,3 +253,7 @@ def test_table_table(started_cluster, started_mysql_8_0, started_mysql_5_7, clic def test_table_overrides(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): materialize_with_ddl.table_overrides(clickhouse_node, started_mysql_5_7, "mysql57") materialize_with_ddl.table_overrides(clickhouse_node, started_mysql_8_0, "mysql80") + +def test_materialized_database_support_all_kinds_of_mysql_datatype(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): + materialize_with_ddl.materialized_database_support_all_kinds_of_mysql_datatype(clickhouse_node, started_mysql_8_0, "mysql80") + materialize_with_ddl.materialized_database_support_all_kinds_of_mysql_datatype(clickhouse_node, started_mysql_5_7, "mysql57") diff --git a/tests/integration/test_merge_tree_azure_blob_storage/__init__.py b/tests/integration/test_merge_tree_azure_blob_storage/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_merge_tree_blob_storage/configs/config.d/bg_processing_pool_conf.xml b/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/bg_processing_pool_conf.xml similarity index 100% rename from tests/integration/test_merge_tree_blob_storage/configs/config.d/bg_processing_pool_conf.xml rename to tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/bg_processing_pool_conf.xml diff --git a/tests/integration/test_merge_tree_blob_storage/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml similarity index 96% rename from tests/integration/test_merge_tree_blob_storage/configs/config.d/storage_conf.xml rename to tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml index c4f381db941..09fa0d6c767 100644 --- a/tests/integration/test_merge_tree_blob_storage/configs/config.d/storage_conf.xml +++ b/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml @@ -2,7 +2,7 @@ - blob_storage + azure_blob_storage http://azurite1:10000/devstoreaccount1 cont false diff --git a/tests/integration/test_merge_tree_blob_storage/configs/config.xml b/tests/integration/test_merge_tree_azure_blob_storage/configs/config.xml similarity index 100% rename from tests/integration/test_merge_tree_blob_storage/configs/config.xml rename to tests/integration/test_merge_tree_azure_blob_storage/configs/config.xml diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py new file mode 100644 index 00000000000..92b9d52cf86 --- /dev/null +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -0,0 +1,372 @@ +import logging +import time +import os + +import pytest +from helpers.cluster import ClickHouseCluster, get_instances_dir +from helpers.utility import generate_values, replace_config, SafeThread + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +CONFIG_PATH = os.path.join(SCRIPT_DIR, './{}/node/configs/config.d/storage_conf.xml'.format(get_instances_dir())) + +NODE_NAME = "node" +TABLE_NAME = "blob_storage_table" +AZURE_BLOB_STORAGE_DISK = "blob_storage_disk" +LOCAL_DISK = "hdd" +CONTAINER_NAME = "cont" + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance(NODE_NAME, + main_configs=["configs/config.d/storage_conf.xml", "configs/config.d/bg_processing_pool_conf.xml"], + with_azurite=True) + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + +# Note: use this for selects and inserts and create table queries. +# For inserts there is no guarantee that retries will not result in duplicates. +# But it is better to retry anyway because 'Connection was closed by the server' error +# happens in fact only for inserts because reads already have build-in retries in code. +def azure_query(node, query, try_num=3): + for i in range(try_num): + try: + return node.query(query) + except Exception as ex: + retriable_errors = [ + 'DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response', + ] + retry = False + for error in retriable_errors: + if error in str(ex): + retry = True + logging.info(f"Try num: {i}. Having retriable error: {ex}") + break + if not retry or i == try_num - 1: + raise Exception(ex) + continue + +def create_table(node, table_name, **additional_settings): + settings = { + "storage_policy": "blob_storage_policy", + "old_parts_lifetime": 1, + "index_granularity": 512 + } + settings.update(additional_settings) + + create_table_statement = f""" + CREATE TABLE {table_name} ( + dt Date, + id Int64, + data String, + INDEX min_max (id) TYPE minmax GRANULARITY 3 + ) ENGINE=MergeTree() + PARTITION BY dt + ORDER BY (dt, id) + SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}""" + + node.query(f"DROP TABLE IF EXISTS {table_name}") + azure_query(node, create_table_statement) + assert azure_query(node, f"SELECT COUNT(*) FROM {table_name} FORMAT Values") == "(0)" + + +def test_create_table(cluster): + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME) + + +def test_read_after_cache_is_wiped(cluster): + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME) + + values = "('2021-11-13',3,'hello'),('2021-11-14',4,'heyo')" + + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {values}") + + # Wipe cache + cluster.exec_in_container(cluster.get_container_id(NODE_NAME), ["rm", "-rf", "/var/lib/clickhouse/disks/blob_storage_disk/cache/"]) + + # After cache is populated again, only .bin files should be accessed from Blob Storage. + assert azure_query(node, f"SELECT * FROM {TABLE_NAME} order by dt, id FORMAT Values") == values + + +def test_simple_insert_select(cluster): + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME) + + values = "('2021-11-13',3,'hello')" + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {values}") + assert azure_query(node, f"SELECT dt, id, data FROM {TABLE_NAME} FORMAT Values") == values + blob_container_client = cluster.blob_service_client.get_container_client(CONTAINER_NAME) + assert len(list(blob_container_client.list_blobs())) >= 12 # 1 format file + 2 skip index files + 9 regular MergeTree files + leftovers from other tests + + +def test_inserts_selects(cluster): + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME) + + values1 = generate_values('2020-01-03', 4096) + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {values1}") + assert azure_query(node, f"SELECT * FROM {TABLE_NAME} order by dt, id FORMAT Values") == values1 + + values2 = generate_values('2020-01-04', 4096) + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {values2}") + assert azure_query(node, f"SELECT * FROM {TABLE_NAME} ORDER BY dt, id FORMAT Values") == values1 + "," + values2 + + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} where id = 1 FORMAT Values") == "(2)" + + +@pytest.mark.parametrize( + "merge_vertical", [ + (True), + (False), +]) +def test_insert_same_partition_and_merge(cluster, merge_vertical): + settings = {} + if merge_vertical: + settings['vertical_merge_algorithm_min_rows_to_activate'] = 0 + settings['vertical_merge_algorithm_min_columns_to_activate'] = 0 + + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME, **settings) + + node.query(f"SYSTEM STOP MERGES {TABLE_NAME}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 1024)}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 2048)}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 1024, -1)}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 2048, -1)}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096, -1)}") + assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" + assert azure_query(node, f"SELECT count(distinct(id)) FROM {TABLE_NAME} FORMAT Values") == "(8192)" + + node.query(f"SYSTEM START MERGES {TABLE_NAME}") + + # Wait for merges and old parts deletion + for attempt in range(0, 10): + parts_count = azure_query(node, f"SELECT COUNT(*) FROM system.parts WHERE table = '{TABLE_NAME}' FORMAT Values") + if parts_count == "(1)": + break + + if attempt == 9: + assert parts_count == "(1)" + + time.sleep(1) + + assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" + assert azure_query(node, f"SELECT count(distinct(id)) FROM {TABLE_NAME} FORMAT Values") == "(8192)" + + +def test_alter_table_columns(cluster): + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME) + + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096, -1)}") + + node.query(f"ALTER TABLE {TABLE_NAME} ADD COLUMN col1 UInt64 DEFAULT 1") + # To ensure parts have been merged + node.query(f"OPTIMIZE TABLE {TABLE_NAME}") + + assert azure_query(node, f"SELECT sum(col1) FROM {TABLE_NAME} FORMAT Values") == "(8192)" + assert azure_query(node, f"SELECT sum(col1) FROM {TABLE_NAME} WHERE id > 0 FORMAT Values") == "(4096)" + + node.query(f"ALTER TABLE {TABLE_NAME} MODIFY COLUMN col1 String", settings={"mutations_sync": 2}) + + assert azure_query(node, f"SELECT distinct(col1) FROM {TABLE_NAME} FORMAT Values") == "('1')" + + +def test_attach_detach_partition(cluster): + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME) + + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}") + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" + + node.query(f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-03'") + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(4096)" + + node.query(f"ALTER TABLE {TABLE_NAME} ATTACH PARTITION '2020-01-03'") + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" + + node.query(f"ALTER TABLE {TABLE_NAME} DROP PARTITION '2020-01-03'") + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(4096)" + + node.query(f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-04'") + node.query(f"ALTER TABLE {TABLE_NAME} DROP DETACHED PARTITION '2020-01-04'", settings={"allow_drop_detached": 1}) + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(0)" + + +def test_move_partition_to_another_disk(cluster): + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME) + + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}") + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" + + node.query(f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{LOCAL_DISK}'") + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" + + node.query(f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{AZURE_BLOB_STORAGE_DISK}'") + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" + + +def test_table_manipulations(cluster): + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME) + + renamed_table = TABLE_NAME + "_renamed" + + node.query_with_retry(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") + node.query_with_retry(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}") + + node.query(f"RENAME TABLE {TABLE_NAME} TO {renamed_table}") + assert azure_query(node, f"SELECT count(*) FROM {renamed_table} FORMAT Values") == "(8192)" + + node.query(f"RENAME TABLE {renamed_table} TO {TABLE_NAME}") + assert node.query(f"CHECK TABLE {TABLE_NAME} FORMAT Values") == "(1)" + + node.query(f"DETACH TABLE {TABLE_NAME}") + node.query(f"ATTACH TABLE {TABLE_NAME}") + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" + + node.query(f"TRUNCATE TABLE {TABLE_NAME}") + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(0)" + + +@pytest.mark.long_run +def test_move_replace_partition_to_another_table(cluster): + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME) + + table_clone_name = TABLE_NAME + "_clone" + + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 256)}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 256)}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-05', 256, -1)}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-06', 256, -1)}") + assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(1024)" + + create_table(node, table_clone_name) + + node.query(f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-03' TO TABLE {table_clone_name}") + node.query(f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-05' TO TABLE {table_clone_name}") + assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(512)" + assert azure_query(node, f"SELECT sum(id) FROM {table_clone_name} FORMAT Values") == "(0)" + assert azure_query(node, f"SELECT count(*) FROM {table_clone_name} FORMAT Values") == "(512)" + + # Add new partitions to source table, but with different values and replace them from copied table. + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 256, -1)}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-05', 256)}") + assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(1024)" + + node.query(f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-03' FROM {table_clone_name}") + node.query(f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-05' FROM {table_clone_name}") + assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(1024)" + assert azure_query(node, f"SELECT sum(id) FROM {table_clone_name} FORMAT Values") == "(0)" + assert azure_query(node, f"SELECT count(*) FROM {table_clone_name} FORMAT Values") == "(512)" + + node.query(f"DROP TABLE {table_clone_name} NO DELAY") + assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" + assert azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(1024)" + + node.query(f"ALTER TABLE {TABLE_NAME} FREEZE") + + node.query(f"DROP TABLE {TABLE_NAME} NO DELAY") + + +def test_freeze_unfreeze(cluster): + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME) + + backup1 = 'backup1' + backup2 = 'backup2' + + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") + node.query(f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup1}'") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}") + node.query(f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup2}'") + + azure_query(node, f"TRUNCATE TABLE {TABLE_NAME}") + + # Unfreeze single partition from backup1. + node.query(f"ALTER TABLE {TABLE_NAME} UNFREEZE PARTITION '2020-01-03' WITH NAME '{backup1}'") + # Unfreeze all partitions from backup2. + node.query(f"ALTER TABLE {TABLE_NAME} UNFREEZE WITH NAME '{backup2}'") + + +def test_apply_new_settings(cluster): + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME) + + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") + + # Force multi-part upload mode. + replace_config( + CONFIG_PATH, + "33554432", + "4096") + + node.query("SYSTEM RELOAD CONFIG") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096, -1)}") + + +# NOTE: this test takes a couple of minutes when run together with other tests +@pytest.mark.long_run +def test_restart_during_load(cluster): + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME) + + # Force multi-part upload mode. + replace_config(CONFIG_PATH, "false", "") + + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}") + azure_query(node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-05', 4096, -1)}") + + + def read(): + for ii in range(0, 5): + logging.info(f"Executing {ii} query") + assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" + logging.info(f"Query {ii} executed") + time.sleep(0.2) + + def restart_disk(): + for iii in range(0, 2): + logging.info(f"Restarting disk, attempt {iii}") + node.query(f"SYSTEM RESTART DISK {AZURE_BLOB_STORAGE_DISK}") + logging.info(f"Disk restarted, attempt {iii}") + time.sleep(0.5) + + threads = [] + for _ in range(0, 4): + threads.append(SafeThread(target=read)) + + threads.append(SafeThread(target=restart_disk)) + + for thread in threads: + thread.start() + + for thread in threads: + thread.join() + + +def test_big_insert(cluster): + node = cluster.instances[NODE_NAME] + create_table(node, TABLE_NAME) + azure_query(node, f"INSERT INTO {TABLE_NAME} select '2020-01-03', number, toString(number) from numbers(5000000)") + assert int(azure_query(node, f"SELECT count() FROM {TABLE_NAME}")) == 5000000 diff --git a/tests/integration/test_merge_tree_blob_storage/test.py b/tests/integration/test_merge_tree_blob_storage/test.py deleted file mode 100644 index 2a38ac128ac..00000000000 --- a/tests/integration/test_merge_tree_blob_storage/test.py +++ /dev/null @@ -1,344 +0,0 @@ -import logging -import time -import os - -import pytest -from helpers.cluster import ClickHouseCluster, get_instances_dir -from helpers.utility import generate_values, replace_config, SafeThread - - -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) -CONFIG_PATH = os.path.join(SCRIPT_DIR, './{}/node/configs/config.d/storage_conf.xml'.format(get_instances_dir())) - -NODE_NAME = "node" -TABLE_NAME = "blob_storage_table" -BLOB_STORAGE_DISK = "blob_storage_disk" -LOCAL_DISK = "hdd" -CONTAINER_NAME = "cont" - - -@pytest.fixture(scope="module") -def cluster(): - try: - cluster = ClickHouseCluster(__file__) - cluster.add_instance(NODE_NAME, - main_configs=["configs/config.d/storage_conf.xml", "configs/config.d/bg_processing_pool_conf.xml"], - with_azurite=True) - logging.info("Starting cluster...") - cluster.start() - logging.info("Cluster started") - - yield cluster - finally: - cluster.shutdown() - - -def create_table(node, table_name, **additional_settings): - settings = { - "storage_policy": "blob_storage_policy", - "old_parts_lifetime": 1, - "index_granularity": 512 - } - settings.update(additional_settings) - - create_table_statement = f""" - CREATE TABLE {table_name} ( - dt Date, - id Int64, - data String, - INDEX min_max (id) TYPE minmax GRANULARITY 3 - ) ENGINE=MergeTree() - PARTITION BY dt - ORDER BY (dt, id) - SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}""" - - node.query(f"DROP TABLE IF EXISTS {table_name}") - node.query(create_table_statement) - assert node.query(f"SELECT COUNT(*) FROM {table_name} FORMAT Values") == "(0)" - - -def test_create_table(cluster): - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME) - - -def test_read_after_cache_is_wiped(cluster): - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME) - - values = "('2021-11-13',3,'hello'),('2021-11-14',4,'heyo')" - - node.query(f"INSERT INTO {TABLE_NAME} VALUES {values}") - - # Wipe cache - cluster.exec_in_container(cluster.get_container_id(NODE_NAME), ["rm", "-rf", "/var/lib/clickhouse/disks/blob_storage_disk/cache/"]) - - # After cache is populated again, only .bin files should be accessed from Blob Storage. - assert node.query(f"SELECT * FROM {TABLE_NAME} order by dt, id FORMAT Values") == values - - -def test_simple_insert_select(cluster): - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME) - - values = "('2021-11-13',3,'hello')" - node.query(f"INSERT INTO {TABLE_NAME} VALUES {values}") - assert node.query(f"SELECT dt, id, data FROM {TABLE_NAME} FORMAT Values") == values - blob_container_client = cluster.blob_service_client.get_container_client(CONTAINER_NAME) - assert len(list(blob_container_client.list_blobs())) >= 12 # 1 format file + 2 skip index files + 9 regular MergeTree files + leftovers from other tests - - -def test_inserts_selects(cluster): - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME) - - values1 = generate_values('2020-01-03', 4096) - node.query(f"INSERT INTO {TABLE_NAME} VALUES {values1}") - assert node.query(f"SELECT * FROM {TABLE_NAME} order by dt, id FORMAT Values") == values1 - - values2 = generate_values('2020-01-04', 4096) - node.query(f"INSERT INTO {TABLE_NAME} VALUES {values2}") - assert node.query(f"SELECT * FROM {TABLE_NAME} ORDER BY dt, id FORMAT Values") == values1 + "," + values2 - - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} where id = 1 FORMAT Values") == "(2)" - - -@pytest.mark.parametrize( - "merge_vertical", [ - (True), - (False), -]) -def test_insert_same_partition_and_merge(cluster, merge_vertical): - settings = {} - if merge_vertical: - settings['vertical_merge_algorithm_min_rows_to_activate'] = 0 - settings['vertical_merge_algorithm_min_columns_to_activate'] = 0 - - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME, **settings) - - node.query(f"SYSTEM STOP MERGES {TABLE_NAME}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 1024)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 2048)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 1024, -1)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 2048, -1)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096, -1)}") - assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" - assert node.query(f"SELECT count(distinct(id)) FROM {TABLE_NAME} FORMAT Values") == "(8192)" - - node.query(f"SYSTEM START MERGES {TABLE_NAME}") - - # Wait for merges and old parts deletion - for attempt in range(0, 10): - parts_count = node.query(f"SELECT COUNT(*) FROM system.parts WHERE table = '{TABLE_NAME}' FORMAT Values") - if parts_count == "(1)": - break - - if attempt == 9: - assert parts_count == "(1)" - - time.sleep(1) - - assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" - assert node.query(f"SELECT count(distinct(id)) FROM {TABLE_NAME} FORMAT Values") == "(8192)" - - -def test_alter_table_columns(cluster): - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME) - - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096, -1)}") - - node.query(f"ALTER TABLE {TABLE_NAME} ADD COLUMN col1 UInt64 DEFAULT 1") - # To ensure parts have been merged - node.query(f"OPTIMIZE TABLE {TABLE_NAME}") - - assert node.query(f"SELECT sum(col1) FROM {TABLE_NAME} FORMAT Values") == "(8192)" - assert node.query(f"SELECT sum(col1) FROM {TABLE_NAME} WHERE id > 0 FORMAT Values") == "(4096)" - - node.query(f"ALTER TABLE {TABLE_NAME} MODIFY COLUMN col1 String", settings={"mutations_sync": 2}) - - assert node.query(f"SELECT distinct(col1) FROM {TABLE_NAME} FORMAT Values") == "('1')" - - -def test_attach_detach_partition(cluster): - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME) - - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}") - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" - - node.query(f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-03'") - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(4096)" - - node.query(f"ALTER TABLE {TABLE_NAME} ATTACH PARTITION '2020-01-03'") - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" - - node.query(f"ALTER TABLE {TABLE_NAME} DROP PARTITION '2020-01-03'") - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(4096)" - - node.query(f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-04'") - node.query(f"ALTER TABLE {TABLE_NAME} DROP DETACHED PARTITION '2020-01-04'", settings={"allow_drop_detached": 1}) - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(0)" - - -def test_move_partition_to_another_disk(cluster): - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME) - - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}") - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" - - node.query(f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{LOCAL_DISK}'") - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" - - node.query(f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{BLOB_STORAGE_DISK}'") - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" - - -def test_table_manipulations(cluster): - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME) - - renamed_table = TABLE_NAME + "_renamed" - - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}") - - node.query(f"RENAME TABLE {TABLE_NAME} TO {renamed_table}") - assert node.query(f"SELECT count(*) FROM {renamed_table} FORMAT Values") == "(8192)" - - node.query(f"RENAME TABLE {renamed_table} TO {TABLE_NAME}") - assert node.query(f"CHECK TABLE {TABLE_NAME} FORMAT Values") == "(1)" - - node.query(f"DETACH TABLE {TABLE_NAME}") - node.query(f"ATTACH TABLE {TABLE_NAME}") - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" - - node.query(f"TRUNCATE TABLE {TABLE_NAME}") - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(0)" - - -@pytest.mark.long_run -def test_move_replace_partition_to_another_table(cluster): - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME) - - table_clone_name = TABLE_NAME + "_clone" - - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 256)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 256)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-05', 256, -1)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-06', 256, -1)}") - assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(1024)" - - create_table(node, table_clone_name) - - node.query(f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-03' TO TABLE {table_clone_name}") - node.query(f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-05' TO TABLE {table_clone_name}") - assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(512)" - assert node.query(f"SELECT sum(id) FROM {table_clone_name} FORMAT Values") == "(0)" - assert node.query(f"SELECT count(*) FROM {table_clone_name} FORMAT Values") == "(512)" - - # Add new partitions to source table, but with different values and replace them from copied table. - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 256, -1)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-05', 256)}") - assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(1024)" - - node.query(f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-03' FROM {table_clone_name}") - node.query(f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-05' FROM {table_clone_name}") - assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(1024)" - assert node.query(f"SELECT sum(id) FROM {table_clone_name} FORMAT Values") == "(0)" - assert node.query(f"SELECT count(*) FROM {table_clone_name} FORMAT Values") == "(512)" - - node.query(f"DROP TABLE {table_clone_name} NO DELAY") - assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" - assert node.query(f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(1024)" - - node.query(f"ALTER TABLE {TABLE_NAME} FREEZE") - - node.query(f"DROP TABLE {TABLE_NAME} NO DELAY") - - -def test_freeze_unfreeze(cluster): - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME) - - backup1 = 'backup1' - backup2 = 'backup2' - - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") - node.query(f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup1}'") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}") - node.query(f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup2}'") - - node.query(f"TRUNCATE TABLE {TABLE_NAME}") - - # Unfreeze single partition from backup1. - node.query(f"ALTER TABLE {TABLE_NAME} UNFREEZE PARTITION '2020-01-03' WITH NAME '{backup1}'") - # Unfreeze all partitions from backup2. - node.query(f"ALTER TABLE {TABLE_NAME} UNFREEZE WITH NAME '{backup2}'") - - -def test_apply_new_settings(cluster): - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME) - - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}") - - # Force multi-part upload mode. - replace_config( - CONFIG_PATH, - "33554432", - "4096") - - node.query("SYSTEM RELOAD CONFIG") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096, -1)}") - - -# NOTE: this test takes a couple of minutes when run together with other tests -@pytest.mark.long_run -def test_restart_during_load(cluster): - node = cluster.instances[NODE_NAME] - create_table(node, TABLE_NAME) - - # Force multi-part upload mode. - replace_config(CONFIG_PATH, "false", "") - - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}") - node.query(f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-05', 4096, -1)}") - - - def read(): - for ii in range(0, 5): - logging.info(f"Executing {ii} query") - assert node.query(f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" - logging.info(f"Query {ii} executed") - time.sleep(0.2) - - def restart_disk(): - for iii in range(0, 2): - logging.info(f"Restarting disk, attempt {iii}") - node.query(f"SYSTEM RESTART DISK {BLOB_STORAGE_DISK}") - logging.info(f"Disk restarted, attempt {iii}") - time.sleep(0.5) - - threads = [] - for _ in range(0, 4): - threads.append(SafeThread(target=read)) - - threads.append(SafeThread(target=restart_disk)) - - for thread in threads: - thread.start() - - for thread in threads: - thread.join() diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 1e607e94119..04981523432 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -456,3 +456,16 @@ def test_s3_disk_reads_on_unstable_connection(cluster, node_name): for i in range(30): print(f"Read sequence {i}") assert node.query("SELECT sum(id) FROM s3_test").splitlines() == ["40499995500000"] + + +@pytest.mark.parametrize("node_name", ["node"]) +def test_lazy_seek_optimization_for_async_read(cluster, node_name): + node = cluster.instances[node_name] + node.query("DROP TABLE IF EXISTS s3_test NO DELAY") + node.query("CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3';") + node.query("INSERT INTO s3_test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000000") + node.query("SELECT * FROM s3_test WHERE value LIKE '%abc%' ORDER BY value LIMIT 10") + node.query("DROP TABLE IF EXISTS s3_test NO DELAY") + minio = cluster.minio_client + for obj in list(minio.list_objects(cluster.minio_bucket, 'data/')): + minio.remove_object(cluster.minio_bucket, obj.object_name) diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py index b6b47417523..44e7e0ae5ad 100644 --- a/tests/integration/test_merge_tree_s3_failover/test.py +++ b/tests/integration/test_merge_tree_s3_failover/test.py @@ -37,7 +37,6 @@ def fail_request(cluster, request): ["curl", "-s", "http://resolver:8080/fail_request/{}".format(request)]) assert response == 'OK', 'Expected "OK", but got "{}"'.format(response) - def throttle_request(cluster, request): response = cluster.exec_in_container(cluster.get_container_id('resolver'), ["curl", "-s", "http://resolver:8080/throttle_request/{}".format(request)]) diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py index babbea2beba..e12b69cdf17 100644 --- a/tests/integration/test_merge_tree_s3_restore/test.py +++ b/tests/integration/test_merge_tree_s3_restore/test.py @@ -7,6 +7,7 @@ import time import pytest from helpers.cluster import ClickHouseCluster, get_instances_dir + SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) NOT_RESTORABLE_CONFIG_PATH = os.path.join(SCRIPT_DIR, './{}/node_not_restorable/configs/config.d/storage_conf_not_restorable.xml'.format(get_instances_dir())) COMMON_CONFIGS = ["configs/config.d/bg_processing_pool_conf.xml", "configs/config.d/clusters.xml"] diff --git a/tests/integration/test_merge_tree_s3_with_cache/test.py b/tests/integration/test_merge_tree_s3_with_cache/test.py index e15eaf61812..be3d2709873 100644 --- a/tests/integration/test_merge_tree_s3_with_cache/test.py +++ b/tests/integration/test_merge_tree_s3_with_cache/test.py @@ -36,7 +36,6 @@ def get_query_stat(instance, hint): result[ev[0]] = int(ev[1]) return result - @pytest.mark.parametrize("min_rows_for_wide_part,read_requests", [(0, 2), (8192, 1)]) def test_write_is_cached(cluster, min_rows_for_wide_part, read_requests): node = cluster.instances["node"] diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 49206ab1abe..ff1c955d78b 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -6,9 +6,10 @@ import pymysql.cursors import pytest from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager cluster = ClickHouseCluster(__file__) -clickhouse_node = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml', 'configs/named_collections.xml'], with_mysql=True) +clickhouse_node = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml', 'configs/named_collections.xml'], with_mysql=True, stay_alive=True) @pytest.fixture(scope="module") @@ -27,6 +28,7 @@ class MySQLNodeInstance: self.hostname = hostname self.password = password self.mysql_connection = None # lazy init + self.ip_address = hostname def query(self, execution_query): if self.mysql_connection is None: @@ -424,3 +426,24 @@ def test_predefined_connection_configuration(started_cluster): clickhouse_node.query("CREATE DATABASE test_database ENGINE = MySQL(mysql1, port=3306)") assert clickhouse_node.query("SELECT count() FROM `test_database`.`test_table`").rstrip() == '100' + + +def test_restart_server(started_cluster): + with contextlib.closing(MySQLNodeInstance('root', 'clickhouse', started_cluster.mysql_ip, started_cluster.mysql_port)) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_restart") + clickhouse_node.query("DROP DATABASE IF EXISTS test_restart") + clickhouse_node.query_and_get_error("CREATE DATABASE test_restart ENGINE = MySQL('mysql57:3306', 'test_restart', 'root', 'clickhouse')") + assert 'test_restart' not in clickhouse_node.query('SHOW DATABASES') + + mysql_node.query("CREATE DATABASE test_restart DEFAULT CHARACTER SET 'utf8'") + mysql_node.query("CREATE TABLE `test_restart`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;") + clickhouse_node.query("CREATE DATABASE test_restart ENGINE = MySQL('mysql57:3306', 'test_restart', 'root', 'clickhouse')") + + assert 'test_restart' in clickhouse_node.query('SHOW DATABASES') + assert 'test_table' in clickhouse_node.query('SHOW TABLES FROM test_restart') + + with PartitionManager() as pm: + pm.partition_instances(clickhouse_node, mysql_node, action='REJECT --reject-with tcp-reset') + clickhouse_node.restart_clickhouse() + clickhouse_node.query_and_get_error('SHOW TABLES FROM test_restart') + assert 'test_table' in clickhouse_node.query('SHOW TABLES FROM test_restart') diff --git a/tests/integration/test_parts_delete_zookeeper/test.py b/tests/integration/test_parts_delete_zookeeper/test.py index 8a4aafaa55c..62e14b68bd1 100644 --- a/tests/integration/test_parts_delete_zookeeper/test.py +++ b/tests/integration/test_parts_delete_zookeeper/test.py @@ -49,12 +49,16 @@ def test_merge_doesnt_work_without_zookeeper(start_cluster): node1.query("INSERT INTO test_table VALUES ('2018-10-01', 1), ('2018-10-02', 2), ('2018-10-03', 3)") node1.query("INSERT INTO test_table VALUES ('2018-10-01', 4), ('2018-10-02', 5), ('2018-10-03', 6)") - assert node1.query("SELECT count(*) from system.parts where table = 'test_table'") == "2\n" + assert node1.query("SELECT count(*) from system.parts where table = 'test_table' and active") == "2\n" with PartitionManager() as pm: node1.query("OPTIMIZE TABLE test_table FINAL") pm.drop_instance_zk_connections(node1) - time.sleep(10) # > old_parts_lifetime - assert node1.query("SELECT count(*) from system.parts where table = 'test_table'") == "3\n" + # unfortunately we can be too fast and delete node before partition with ZK + if node1.query("SELECT count(*) from system.parts where table = 'test_table'") == "1\n": + print("We were too fast and deleted parts before partition with ZK") + else: + time.sleep(10) # > old_parts_lifetime + assert node1.query("SELECT count(*) from system.parts where table = 'test_table'") == "3\n" assert_eq_with_retry(node1, "SELECT count(*) from system.parts where table = 'test_table' and active = 1", "1") diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 99f2facbaf6..7aee454c4a9 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -34,6 +34,10 @@ postgres_table_template_4 = """ CREATE TABLE IF NOT EXISTS "{}"."{}" ( key Integer NOT NULL, value Integer, PRIMARY KEY(key)) """ +postgres_table_template_5 = """ + CREATE TABLE IF NOT EXISTS "{}" ( + key Integer NOT NULL, value UUID, PRIMARY KEY(key)) + """ def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name='postgres_database', replication=False): if database == True: @@ -93,7 +97,7 @@ def drop_clickhouse_postgres_db(name='postgres_database'): def create_materialized_db(ip, port, materialized_database='test_database', postgres_database='postgres_database', - settings=[]): + settings=[], table_overrides=''): instance.query(f"DROP DATABASE IF EXISTS {materialized_database}") create_query = f"CREATE DATABASE {materialized_database} ENGINE = MaterializedPostgreSQL('{ip}:{port}', '{postgres_database}', 'postgres', 'mysecretpassword')" if len(settings) > 0: @@ -102,6 +106,7 @@ def create_materialized_db(ip, port, if i != 0: create_query += ', ' create_query += settings[i] + create_query += table_overrides instance.query(create_query) assert materialized_database in instance.query('SHOW DATABASES') @@ -173,7 +178,7 @@ def assert_number_of_columns(expected, table_name, database_name='test_database' def check_tables_are_synchronized(table_name, order_by='key', postgres_database='postgres_database', materialized_database='test_database', schema_name=''): assert_nested_table_is_created(table_name, materialized_database, schema_name) - print("Checking table is synchronized:", table_name) + print(f"Checking table is synchronized. Table name: {table_name}, table schema: {schema_name}") expected = instance.query('select * from {}.{} order by {};'.format(postgres_database, table_name, order_by)) if len(schema_name) == 0: result = instance.query('select * from {}.{} order by {};'.format(materialized_database, table_name, order_by)) @@ -351,6 +356,11 @@ def test_remove_table_from_replication(started_cluster): for i in range(NUM_TABLES): cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + # Removing from replication table which does not exist in PostgreSQL must be ok. + instance.query('DETACH TABLE test_database.postgresql_replica_0'); + assert instance.contains_in_log("from publication, because table does not exist in PostgreSQL") + drop_materialized_db() + def test_predefined_connection_configuration(started_cluster): drop_materialized_db() @@ -374,6 +384,7 @@ def test_database_with_single_non_default_schema(started_cluster): NUM_TABLES=5 schema_name = 'test_schema' + materialized_db = 'test_database' clickhouse_postgres_db = 'postgres_database_with_schema' global insert_counter insert_counter = 0 @@ -425,6 +436,14 @@ def test_database_with_single_non_default_schema(started_cluster): instance.query(f"INSERT INTO {clickhouse_postgres_db}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(5000, 1000)") assert_number_of_columns(3, f'postgresql_replica_{altered_table}') check_tables_are_synchronized(f"postgresql_replica_{altered_table}", postgres_database=clickhouse_postgres_db); + + print('DETACH-ATTACH') + detached_table_name = "postgresql_replica_1" + instance.query(f"DETACH TABLE {materialized_db}.{detached_table_name}") + assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL") + instance.query(f"ATTACH TABLE {materialized_db}.{detached_table_name}") + check_tables_are_synchronized(detached_table_name, postgres_database=clickhouse_postgres_db); + drop_materialized_db() @@ -435,6 +454,7 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): NUM_TABLES = 5 schema_name = 'test_schema' clickhouse_postgres_db = 'postgres_database_with_schema' + materialized_db = 'test_database' publication_tables = '' global insert_counter insert_counter = 0 @@ -489,6 +509,15 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): instance.query(f"INSERT INTO {clickhouse_postgres_db}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(5000, 1000)") assert_number_of_columns(3, f'{schema_name}.postgresql_replica_{altered_table}') check_tables_are_synchronized(f"postgresql_replica_{altered_table}", schema_name=schema_name, postgres_database=clickhouse_postgres_db); + + print('DETACH-ATTACH') + detached_table_name = "postgresql_replica_1" + instance.query(f"DETACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}`") + assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL") + instance.query(f"ATTACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}`") + assert_show_tables("test_schema.postgresql_replica_0\ntest_schema.postgresql_replica_1\ntest_schema.postgresql_replica_2\ntest_schema.postgresql_replica_3\ntest_schema.postgresql_replica_4\n") + check_tables_are_synchronized(detached_table_name, schema_name=schema_name, postgres_database=clickhouse_postgres_db); + drop_materialized_db() @@ -499,6 +528,7 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): NUM_TABLES = 2 schemas_num = 2 schema_list = 'schema0, schema1' + materialized_db = 'test_database' global insert_counter insert_counter = 0 @@ -552,14 +582,50 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): print('ALTER') altered_schema = random.randint(0, schemas_num-1) altered_table = random.randint(0, NUM_TABLES-1) + clickhouse_postgres_db = f'clickhouse_postgres_db{altered_schema}' cursor.execute(f"ALTER TABLE schema{altered_schema}.postgresql_replica_{altered_table} ADD COLUMN value2 integer") instance.query(f"INSERT INTO clickhouse_postgres_db{altered_schema}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(1000 * {insert_counter}, 1000)") assert_number_of_columns(3, f'schema{altered_schema}.postgresql_replica_{altered_table}') - check_tables_are_synchronized(f"postgresql_replica_{altered_table}", schema_name=schema_name, postgres_database=clickhouse_postgres_db); + check_tables_are_synchronized(f"postgresql_replica_{altered_table}", schema_name=f"schema{altered_schema}", postgres_database=clickhouse_postgres_db); + + print('DETACH-ATTACH') + detached_table_name = "postgresql_replica_1" + detached_table_schema = "schema0" + clickhouse_postgres_db = f'clickhouse_postgres_db0' + instance.query(f"DETACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}`") + assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL") + instance.query(f"ATTACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}`") + assert_show_tables("schema0.postgresql_replica_0\nschema0.postgresql_replica_1\nschema1.postgresql_replica_0\nschema1.postgresql_replica_1\n") + check_tables_are_synchronized(f"postgresql_replica_{altered_table}", schema_name=detached_table_schema, postgres_database=clickhouse_postgres_db); + drop_materialized_db() +def test_table_override(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) + cursor = conn.cursor() + table_name = 'table_override' + materialized_database = 'test_database' + create_postgres_table(cursor, table_name, template=postgres_table_template_5); + instance.query(f"create table {table_name}(key Int32, value UUID) engine = PostgreSQL (postgres1, table={table_name})") + instance.query(f"insert into {table_name} select number, generateUUIDv4() from numbers(10)") + table_overrides = f" TABLE OVERRIDE {table_name} (COLUMNS (key Int32, value UUID))" + create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + settings=[f"materialized_postgresql_tables_list = '{table_name}'"], table_overrides=table_overrides) + assert_nested_table_is_created(table_name, materialized_database) + result = instance.query(f"show create table {materialized_database}.{table_name}") + print(result) + expected = "CREATE TABLE test_database.table_override\\n(\\n `key` Int32,\\n `value` UUID,\\n `_sign` Int8() MATERIALIZED 1,\\n `_version` UInt64() MATERIALIZED 1\\n)\\nENGINE = ReplacingMergeTree(_version)\\nORDER BY tuple(key)" + assert(result.strip() == expected) + time.sleep(5) + query = f"select * from {materialized_database}.{table_name} order by key" + expected = instance.query(f"select * from {table_name} order by key") + assert_eq_with_retry(instance, query, expected) + drop_materialized_db() + drop_postgres_table(cursor, table_name) + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py index 793abc53566..edf39969b47 100644 --- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py +++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py @@ -65,7 +65,6 @@ def create_table(cluster, additional_settings=None): list(cluster.instances.values())[0].query(create_table_statement) - @pytest.fixture(autouse=True) def drop_table(cluster): yield diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index 5be98ab5188..1c3713c02a2 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -60,6 +60,8 @@ def wait_for_active_parts(node, num_expected_parts, table_name, timeout=30): assert num_parts == num_expected_parts +# Result of `get_large_objects_count` can be changed in other tests, so run this case at the beginning +@pytest.mark.order(0) @pytest.mark.parametrize( "policy", ["s3"] ) @@ -82,7 +84,7 @@ def test_s3_zero_copy_replication(cluster, policy): assert node1.query("SELECT * FROM s3_test order by id FORMAT Values") == "(0,'data'),(1,'data')" assert node2.query("SELECT * FROM s3_test order by id FORMAT Values") == "(0,'data'),(1,'data')" - # Based on version 20.x - should be only one file with size 100+ (checksums.txt), used by both nodes + # Based on version 21.x - should be only 1 file with size 100+ (checksums.txt), used by both nodes assert get_large_objects_count(cluster) == 1 node2.query("INSERT INTO s3_test VALUES (2,'data'),(3,'data')") @@ -91,15 +93,15 @@ def test_s3_zero_copy_replication(cluster, policy): assert node2.query("SELECT * FROM s3_test order by id FORMAT Values") == "(0,'data'),(1,'data'),(2,'data'),(3,'data')" assert node1.query("SELECT * FROM s3_test order by id FORMAT Values") == "(0,'data'),(1,'data'),(2,'data'),(3,'data')" - # Based on version 20.x - two parts + # Based on version 21.x - two parts wait_for_large_objects_count(cluster, 2) node1.query("OPTIMIZE TABLE s3_test FINAL") - # Based on version 20.x - after merge, two old parts and one merged + # Based on version 21.x - after merge, two old parts and one merged wait_for_large_objects_count(cluster, 3) - # Based on version 20.x - after cleanup - only one merged part + # Based on version 21.x - after cleanup - only one merged part wait_for_large_objects_count(cluster, 1, timeout=60) node1.query("DROP TABLE IF EXISTS s3_test NO DELAY") diff --git a/tests/integration/test_server_reload/.gitignore b/tests/integration/test_server_reload/.gitignore new file mode 100644 index 00000000000..edf565ec632 --- /dev/null +++ b/tests/integration/test_server_reload/.gitignore @@ -0,0 +1 @@ +_gen diff --git a/tests/integration/test_server_reload/__init__.py b/tests/integration/test_server_reload/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_server_reload/configs/default_passwd.xml b/tests/integration/test_server_reload/configs/default_passwd.xml new file mode 100644 index 00000000000..5c23be0dcb0 --- /dev/null +++ b/tests/integration/test_server_reload/configs/default_passwd.xml @@ -0,0 +1,13 @@ + + + + + + + + + + 123 + + + diff --git a/tests/integration/test_server_reload/configs/dhparam.pem b/tests/integration/test_server_reload/configs/dhparam.pem new file mode 100644 index 00000000000..fb935b9c898 --- /dev/null +++ b/tests/integration/test_server_reload/configs/dhparam.pem @@ -0,0 +1,8 @@ +-----BEGIN DH PARAMETERS----- +MIIBCAKCAQEAkPGhfLY5nppeQkFBKYRpiisxzrRQfyyTUu6aabZP2CbAMAuoYzaC +Z+iqeWSQZKRYeA21SZXkC9xE1e5FJsc5IWzCRiMNZeLuj4ApUNysMu89DpX8/b91 ++Ka6wRJnaO43ZqHj/9FpU4JiYtxoIpXDC9HeiSAnwLwJc3L+nkYfnSGgvzWIxhGV +gCoVmVBoTe7wrqCyVlM5nrNZSjhlSugvXmu2bSK3MwYF08QLKvlF68eedbs0PMWh +WC0bFM/X7gMBEqL4DiINufAShbZPKxD6eL2APiHPUo6xun3ed/Po/5j8QBmiku0c +5Jb12ZhOTRTQjaRg2aFF8LPdW2tDE7HmewIBAg== +-----END DH PARAMETERS----- diff --git a/tests/integration/test_server_reload/configs/ports_from_zk.xml b/tests/integration/test_server_reload/configs/ports_from_zk.xml new file mode 100644 index 00000000000..ae3435a3d3c --- /dev/null +++ b/tests/integration/test_server_reload/configs/ports_from_zk.xml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/tests/integration/test_server_reload/configs/server.crt b/tests/integration/test_server_reload/configs/server.crt new file mode 100644 index 00000000000..6f4deca038f --- /dev/null +++ b/tests/integration/test_server_reload/configs/server.crt @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC+zCCAeOgAwIBAgIJAIhI9ozZJ+TWMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV +BAMMCWxvY2FsaG9zdDAeFw0xOTA0MjIwNDMyNTJaFw0yMDA0MjEwNDMyNTJaMBQx +EjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAK+wVUEdqF2uXvN0MJBgnAHyXi6JTi4p/F6igsrCjSNjJWzHH0vQmK8ujfcF +CkifW88i+W5eHctuEtQqNHK+t9x9YiZtXrj6m/XkOXs20mYgENSmbbbHbriTPnZB +zZrq6UqMlwIHNNAa+I3NMORQxVRaI0ybXnGVO5elr70xHpk03xL0JWKHpEqYp4db +2aBQgF6y3Ww4khxjIYqpUYXWXGFnVIRU7FKVEAM1xyKqvQzXjQ5sVM/wyHknveEF +3b/X4ggN+KNl5KOc0cWDh1/XaatJAPaUUPqZcq76tynLbP64Xm3dxHcj+gtRkO67 +ef6MSg6l63m3XQP6Qb+MIkd06OsCAwEAAaNQME4wHQYDVR0OBBYEFDmODTO8QLDN +ykR3x0LIOnjNhrKhMB8GA1UdIwQYMBaAFDmODTO8QLDNykR3x0LIOnjNhrKhMAwG +A1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAAwaiJc7uqEpnH3aukbftDwX +m8GfEnj1HVdgg+9GGNq+9rvUYBF6gdPmjRCX9dO0cclLFx8jc2org0rTSq9WoOhX +E6qL4Eqrmc5SE3Y9jZM0h6GRD4oXK014FmtZ3T6ddZU3dQLj3BS2r1XrvmubTvGN +ZuTJNY8nx8Hh6H5XINmsEjUF9E5hog+PwCE03xt2adIdYL+gsbxASeNYyeUFpZv5 +zcXR3VoakBWnAaOVgCHq2qh96QAnL7ZKzFkGf/MdwV10KU3dmb+ICbQUUdf9Gc17 +aaDCIRws312F433FdXBkGs2UkB7ZZme9dfn6O1QbeTNvex2VLMqYx/CTkfFbOQA= +-----END CERTIFICATE----- diff --git a/tests/integration/test_server_reload/configs/server.key b/tests/integration/test_server_reload/configs/server.key new file mode 100644 index 00000000000..6eddb3295db --- /dev/null +++ b/tests/integration/test_server_reload/configs/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCvsFVBHahdrl7z +dDCQYJwB8l4uiU4uKfxeooLKwo0jYyVsxx9L0JivLo33BQpIn1vPIvluXh3LbhLU +KjRyvrfcfWImbV64+pv15Dl7NtJmIBDUpm22x264kz52Qc2a6ulKjJcCBzTQGviN +zTDkUMVUWiNMm15xlTuXpa+9MR6ZNN8S9CVih6RKmKeHW9mgUIBest1sOJIcYyGK +qVGF1lxhZ1SEVOxSlRADNcciqr0M140ObFTP8Mh5J73hBd2/1+IIDfijZeSjnNHF +g4df12mrSQD2lFD6mXKu+rcpy2z+uF5t3cR3I/oLUZDuu3n+jEoOpet5t10D+kG/ +jCJHdOjrAgMBAAECggEARF66zrxb6RkSmmt8+rKeA6PuQu3sHsr4C1vyyjUr97l9 +tvdGlpp20LWtSZQMjHZ3pARYTTsTHTeY3DgQcRcHNicVKx8k3ZepWeeW9vw+pL+V +zSt3RsoVrH6gsCSrfr4sS3aqzX9AbjwQvh48CJ3mLQ1m70kHV+xbZIh1+4pB/hyP +1wKyUE18ZkOptXvO/TtoHzLQCecpkXtWzmry1Eh2isvXA+NMrAtLibGsyM1mtm7i +5ozevzHabvvCDBEe+KgZdONgVhhhvm2eOd+/s4w3rw4ETud4fI/ZAJyWXhiIKFnA +VJbElWruSAoVBW7p2bsF5PbmVzvo8vXL+VylxYD+AQKBgQDhLoRKTVhNkn/QjKxq +sdOh+QZra0LzjVpAmkQzu7wZMSHEz9qePQciDQQrYKrmRF1vNcIRCVUTqWYheJ/1 +lKRrCGa0ab6k96zkWMqLHD5u+UeJV7r1dJIx08ME9kNJ+x/XtB8klRIji16NiQUS +qc6p8z0M2AnbJzsRfWZRH8FeYwKBgQDHu8dzdtVGI7MtxfPOE/bfajiopDg8BdTC +pdug2T8XofRHRq7Q+0vYjTAZFT/slib91Pk6VvvPdo9VBZiL4omv4dAq6mOOdX/c +U14mJe1X5GCrr8ExZ8BfNJ3t/6sV1fcxyJwAw7iBguqxA2JqdM/wFk10K8XqvzVn +CD6O9yGt2QKBgFX1BMi8N538809vs41S7l9hCQNOQZNo/O+2M5yv6ECRkbtoQKKw +1x03bMUGNJaLuELweXE5Z8GGo5bZTe5X3F+DKHlr+DtO1C+ieUaa9HY2MAmMdLCn +2/qrREGLo+oEs4YKmuzC/taUp/ZNPKOAMISNdluFyFVg51pozPrgrVbTAoGBAKkE +LBl3O67o0t0vH8sJdeVFG8EJhlS0koBMnfgVHqC++dm+5HwPyvTrNQJkyv1HaqNt +r6FArkG3ED9gRuBIyT6+lctbIPgSUip9mbQqcBfqOCvQxGksZMur2ODncz09HLtS +CUFUXjOqNzOnq4ZuZu/Bz7U4vXiSaXxQq6+LTUKxAoGAFZU/qrI06XxnrE9A1X0W +l7DSkpZaDcu11NrZ473yONih/xOZNh4SSBpX8a7F6Pmh9BdtGqphML8NFPvQKcfP +b9H2iid2tc292uyrUEb5uTMmv61zoTwtitqLzO0+tS6PT3fXobX+eyeEWKzPBljL +HFtxG5CCXpkdnWRmaJnhTzA= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_server_reload/configs/ssl_conf.xml b/tests/integration/test_server_reload/configs/ssl_conf.xml new file mode 100644 index 00000000000..43b25032059 --- /dev/null +++ b/tests/integration/test_server_reload/configs/ssl_conf.xml @@ -0,0 +1,18 @@ + + + + + + + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + + /etc/clickhouse-server/config.d/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + diff --git a/tests/integration/test_server_reload/protos/clickhouse_grpc.proto b/tests/integration/test_server_reload/protos/clickhouse_grpc.proto new file mode 100644 index 00000000000..c6cafaf6e40 --- /dev/null +++ b/tests/integration/test_server_reload/protos/clickhouse_grpc.proto @@ -0,0 +1,174 @@ +/* This file describes gRPC protocol supported in ClickHouse. + * + * To use this protocol a client should send one or more messages of the QueryInfo type + * and then receive one or more messages of the Result type. + * According to that the service provides four methods for that: + * ExecuteQuery(QueryInfo) returns (Result) + * ExecuteQueryWithStreamInput(stream QueryInfo) returns (Result) + * ExecuteQueryWithStreamOutput(QueryInfo) returns (stream Result) + * ExecuteQueryWithStreamIO(stream QueryInfo) returns (stream Result) + * It's up to the client to choose which method to use. + * For example, ExecuteQueryWithStreamInput() allows the client to add data multiple times + * while executing a query, which is suitable for inserting many rows. + */ + +syntax = "proto3"; + +package clickhouse.grpc; + +message NameAndType { + string name = 1; + string type = 2; +} + +// Describes an external table - a table which will exists only while a query is executing. +message ExternalTable { + // Name of the table. If omitted, "_data" is used. + string name = 1; + + // Columns of the table. Types are required, names can be omitted. If the names are omitted, "_1", "_2", ... is used. + repeated NameAndType columns = 2; + + // Data to insert to the external table. + // If a method with streaming input (i.e. ExecuteQueryWithStreamInput() or ExecuteQueryWithStreamIO()) is used, + // then data for insertion to the same external table can be split between multiple QueryInfos. + bytes data = 3; + + // Format of the data to insert to the external table. + string format = 4; + + // Settings for executing that insertion, applied after QueryInfo.settings. + map settings = 5; +} + +enum CompressionAlgorithm { + NO_COMPRESSION = 0; + DEFLATE = 1; + GZIP = 2; + STREAM_GZIP = 3; +} + +enum CompressionLevel { + COMPRESSION_NONE = 0; + COMPRESSION_LOW = 1; + COMPRESSION_MEDIUM = 2; + COMPRESSION_HIGH = 3; +} + +message Compression { + CompressionAlgorithm algorithm = 1; + CompressionLevel level = 2; +} + +// Information about a query which a client sends to a ClickHouse server. +// The first QueryInfo can set any of the following fields. Extra QueryInfos only add extra data. +// In extra QueryInfos only `input_data`, `external_tables`, `next_query_info` and `cancel` fields can be set. +message QueryInfo { + string query = 1; + string query_id = 2; + map settings = 3; + + // Default database. + string database = 4; + + // Input data, used both as data for INSERT query and as data for the input() function. + bytes input_data = 5; + + // Delimiter for input_data, inserted between input_data from adjacent QueryInfos. + bytes input_data_delimiter = 6; + + // Default output format. If not specified, 'TabSeparated' is used. + string output_format = 7; + + repeated ExternalTable external_tables = 8; + + string user_name = 9; + string password = 10; + string quota = 11; + + // Works exactly like sessions in the HTTP protocol. + string session_id = 12; + bool session_check = 13; + uint32 session_timeout = 14; + + // Set `cancel` to true to stop executing the query. + bool cancel = 15; + + // If true there will be at least one more QueryInfo in the input stream. + // `next_query_info` is allowed to be set only if a method with streaming input (i.e. ExecuteQueryWithStreamInput() or ExecuteQueryWithStreamIO()) is used. + bool next_query_info = 16; + + /// Controls how a ClickHouse server will compress query execution results before sending back to the client. + /// If not set the compression settings from the configuration file will be used. + Compression result_compression = 17; +} + +enum LogsLevel { + LOG_NONE = 0; + LOG_FATAL = 1; + LOG_CRITICAL = 2; + LOG_ERROR = 3; + LOG_WARNING = 4; + LOG_NOTICE = 5; + LOG_INFORMATION = 6; + LOG_DEBUG = 7; + LOG_TRACE = 8; +} + +message LogEntry { + uint32 time = 1; + uint32 time_microseconds = 2; + uint64 thread_id = 3; + string query_id = 4; + LogsLevel level = 5; + string source = 6; + string text = 7; +} + +message Progress { + uint64 read_rows = 1; + uint64 read_bytes = 2; + uint64 total_rows_to_read = 3; + uint64 written_rows = 4; + uint64 written_bytes = 5; +} + +message Stats { + uint64 rows = 1; + uint64 blocks = 2; + uint64 allocated_bytes = 3; + bool applied_limit = 4; + uint64 rows_before_limit = 5; +} + +message Exception { + int32 code = 1; + string name = 2; + string display_text = 3; + string stack_trace = 4; +} + +// Result of execution of a query which is sent back by the ClickHouse server to the client. +message Result { + // Output of the query, represented in the `output_format` or in a format specified in `query`. + bytes output = 1; + bytes totals = 2; + bytes extremes = 3; + + repeated LogEntry logs = 4; + Progress progress = 5; + Stats stats = 6; + + // Set by the ClickHouse server if there was an exception thrown while executing. + Exception exception = 7; + + // Set by the ClickHouse server if executing was cancelled by the `cancel` field in QueryInfo. + bool cancelled = 8; +} + +service ClickHouse { + rpc ExecuteQuery(QueryInfo) returns (Result) {} + rpc ExecuteQueryWithStreamInput(stream QueryInfo) returns (Result) {} + rpc ExecuteQueryWithStreamOutput(QueryInfo) returns (stream Result) {} + rpc ExecuteQueryWithStreamIO(stream QueryInfo) returns (stream Result) {} +} diff --git a/tests/integration/test_server_reload/test.py b/tests/integration/test_server_reload/test.py new file mode 100644 index 00000000000..3c22b476f64 --- /dev/null +++ b/tests/integration/test_server_reload/test.py @@ -0,0 +1,284 @@ +import contextlib +import grpc +import psycopg2 +import pymysql.connections +import pymysql.err +import pytest +import sys +import time +from helpers.cluster import ClickHouseCluster, run_and_check +from helpers.client import Client, QueryRuntimeException +from kazoo.exceptions import NodeExistsError +from pathlib import Path +from requests.exceptions import ConnectionError +from urllib3.util.retry import Retry + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "instance", + main_configs=[ + "configs/ports_from_zk.xml", "configs/ssl_conf.xml", "configs/dhparam.pem", "configs/server.crt", "configs/server.key" + ], + user_configs=["configs/default_passwd.xml"], + with_zookeeper=True) + + +LOADS_QUERY = "SELECT value FROM system.events WHERE event = 'MainConfigLoads'" + + +# Use grpcio-tools to generate *pb2.py files from *.proto. + +proto_dir = Path(__file__).parent / "protos" +gen_dir = Path(__file__).parent / "_gen" +gen_dir.mkdir(exist_ok=True) +run_and_check( + f"python3 -m grpc_tools.protoc -I{proto_dir!s} --python_out={gen_dir!s} --grpc_python_out={gen_dir!s} \ + {proto_dir!s}/clickhouse_grpc.proto", shell=True) + +sys.path.append(str(gen_dir)) +import clickhouse_grpc_pb2 +import clickhouse_grpc_pb2_grpc + + +@pytest.fixture(name="cluster", scope="module") +def fixture_cluster(): + try: + cluster.add_zookeeper_startup_command(configure_ports_from_zk) + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(name="zk", scope="module") +def fixture_zk(cluster): + return cluster.get_kazoo_client("zoo1") + + +def get_client(cluster, port): + return Client(host=cluster.get_instance_ip("instance"), port=port, command=cluster.client_bin_path) + + +def get_mysql_client(cluster, port): + start_time = time.monotonic() + while True: + try: + return pymysql.connections.Connection( + host=cluster.get_instance_ip("instance"), user="default", password="", database="default", port=port) + except pymysql.err.OperationalError: + if time.monotonic() - start_time > 10: + raise + time.sleep(0.1) + + +def get_pgsql_client(cluster, port): + start_time = time.monotonic() + while True: + try: + return psycopg2.connect( + host=cluster.get_instance_ip("instance"), user="postgresql", password="123", database="default", port=port) + except psycopg2.OperationalError: + if time.monotonic() - start_time > 10: + raise + time.sleep(0.1) + + +def get_grpc_channel(cluster, port): + host_port = cluster.get_instance_ip("instance") + f":{port}" + channel = grpc.insecure_channel(host_port) + grpc.channel_ready_future(channel).result(timeout=10) + return channel + + +def grpc_query(channel, query_text): + query_info = clickhouse_grpc_pb2.QueryInfo(query=query_text) + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(channel) + result = stub.ExecuteQuery(query_info) + if result and result.HasField("exception"): + raise Exception(result.exception.display_text) + return result.output.decode() + + +def configure_ports_from_zk(zk, querier=None): + default_config = [ + ("/clickhouse/listen_hosts", b"0.0.0.0"), + ("/clickhouse/ports/tcp", b"9000"), + ("/clickhouse/ports/http", b"8123"), + ("/clickhouse/ports/mysql", b"9004"), + ("/clickhouse/ports/postgresql", b"9005"), + ("/clickhouse/ports/grpc", b"9100"), + ] + for path, value in default_config: + if querier is not None: + loads_before = querier(LOADS_QUERY) + has_changed = False + try: + zk.create(path=path, value=value, makepath=True) + has_changed = True + except NodeExistsError: + if zk.get(path) != value: + zk.set(path=path, value=value) + has_changed = True + if has_changed and querier is not None: + wait_loaded_config_changed(loads_before, querier) + + +@contextlib.contextmanager +def sync_loaded_config(querier): + # Depending on whether we test a change on tcp or http + # we monitor canges using the other, untouched, protocol + loads_before = querier(LOADS_QUERY) + yield + wait_loaded_config_changed(loads_before, querier) + + +def wait_loaded_config_changed(loads_before, querier): + loads_after = None + start_time = time.monotonic() + while time.monotonic() - start_time < 10: + try: + loads_after = querier(LOADS_QUERY) + if loads_after != loads_before: + return + except (QueryRuntimeException, ConnectionError): + pass + time.sleep(0.1) + assert loads_after is not None and loads_after != loads_before + + +@contextlib.contextmanager +def default_client(cluster, zk, restore_via_http=False): + client = get_client(cluster, port=9000) + try: + yield client + finally: + querier = instance.http_query if restore_via_http else client.query + configure_ports_from_zk(zk, querier) + + +def test_change_tcp_port(cluster, zk): + with default_client(cluster, zk, restore_via_http=True) as client: + assert client.query("SELECT 1") == "1\n" + with sync_loaded_config(instance.http_query): + zk.set("/clickhouse/ports/tcp", b"9090") + with pytest.raises(QueryRuntimeException, match="Connection refused"): + client.query("SELECT 1") + client_on_new_port = get_client(cluster, port=9090) + assert client_on_new_port.query("SELECT 1") == "1\n" + + +def test_change_http_port(cluster, zk): + with default_client(cluster, zk) as client: + retry_strategy = Retry(total=10, backoff_factor=0.1) + assert instance.http_query("SELECT 1", retry_strategy=retry_strategy) == "1\n" + with sync_loaded_config(client.query): + zk.set("/clickhouse/ports/http", b"9090") + with pytest.raises(ConnectionError, match="Connection refused"): + instance.http_query("SELECT 1") + instance.http_query("SELECT 1", port=9090) == "1\n" + + +def test_change_mysql_port(cluster, zk): + with default_client(cluster, zk) as client: + mysql_client = get_mysql_client(cluster, port=9004) + assert mysql_client.query("SELECT 1") == 1 + with sync_loaded_config(client.query): + zk.set("/clickhouse/ports/mysql", b"9090") + with pytest.raises(pymysql.err.OperationalError, match="Lost connection"): + mysql_client.query("SELECT 1") + mysql_client_on_new_port = get_mysql_client(cluster, port=9090) + assert mysql_client_on_new_port.query("SELECT 1") == 1 + + +def test_change_postgresql_port(cluster, zk): + with default_client(cluster, zk) as client: + pgsql_client = get_pgsql_client(cluster, port=9005) + cursor = pgsql_client.cursor() + cursor.execute("SELECT 1") + assert cursor.fetchall() == [(1,)] + with sync_loaded_config(client.query): + zk.set("/clickhouse/ports/postgresql", b"9090") + with pytest.raises(psycopg2.OperationalError, match="closed"): + cursor.execute("SELECT 1") + pgsql_client_on_new_port = get_pgsql_client(cluster, port=9090) + cursor = pgsql_client_on_new_port.cursor() + cursor.execute("SELECT 1") + cursor.fetchall() == [(1,)] + + +def test_change_grpc_port(cluster, zk): + with default_client(cluster, zk) as client: + grpc_channel = get_grpc_channel(cluster, port=9100) + assert grpc_query(grpc_channel, "SELECT 1") == "1\n" + with sync_loaded_config(client.query): + zk.set("/clickhouse/ports/grpc", b"9090") + with pytest.raises(grpc._channel._InactiveRpcError, match="StatusCode.UNAVAILABLE"): + grpc_query(grpc_channel, "SELECT 1") + grpc_channel_on_new_port = get_grpc_channel(cluster, port=9090) + assert grpc_query(grpc_channel_on_new_port, "SELECT 1") == "1\n" + + +def test_remove_tcp_port(cluster, zk): + with default_client(cluster, zk, restore_via_http=True) as client: + assert client.query("SELECT 1") == "1\n" + with sync_loaded_config(instance.http_query): + zk.delete("/clickhouse/ports/tcp") + with pytest.raises(QueryRuntimeException, match="Connection refused"): + client.query("SELECT 1") + + +def test_remove_http_port(cluster, zk): + with default_client(cluster, zk) as client: + assert instance.http_query("SELECT 1") == "1\n" + with sync_loaded_config(client.query): + zk.delete("/clickhouse/ports/http") + with pytest.raises(ConnectionError, match="Connection refused"): + instance.http_query("SELECT 1") + + +def test_remove_mysql_port(cluster, zk): + with default_client(cluster, zk) as client: + mysql_client = get_mysql_client(cluster, port=9004) + assert mysql_client.query("SELECT 1") == 1 + with sync_loaded_config(client.query): + zk.delete("/clickhouse/ports/mysql") + with pytest.raises(pymysql.err.OperationalError, match="Lost connection"): + mysql_client.query("SELECT 1") + + +def test_remove_postgresql_port(cluster, zk): + with default_client(cluster, zk) as client: + pgsql_client = get_pgsql_client(cluster, port=9005) + cursor = pgsql_client.cursor() + cursor.execute("SELECT 1") + assert cursor.fetchall() == [(1,)] + with sync_loaded_config(client.query): + zk.delete("/clickhouse/ports/postgresql") + with pytest.raises(psycopg2.OperationalError, match="closed"): + cursor.execute("SELECT 1") + + +def test_remove_grpc_port(cluster, zk): + with default_client(cluster, zk) as client: + grpc_channel = get_grpc_channel(cluster, port=9100) + assert grpc_query(grpc_channel, "SELECT 1") == "1\n" + with sync_loaded_config(client.query): + zk.delete("/clickhouse/ports/grpc") + with pytest.raises(grpc._channel._InactiveRpcError, match="StatusCode.UNAVAILABLE"): + grpc_query(grpc_channel, "SELECT 1") + + +def test_change_listen_host(cluster, zk): + localhost_client = Client(host="127.0.0.1", port=9000, command="/usr/bin/clickhouse") + localhost_client.command = ["docker", "exec", "-i", instance.docker_id] + localhost_client.command + try: + client = get_client(cluster, port=9000) + with sync_loaded_config(localhost_client.query): + zk.set("/clickhouse/listen_hosts", b"127.0.0.1") + with pytest.raises(QueryRuntimeException, match="Connection refused"): + client.query("SELECT 1") + assert localhost_client.query("SELECT 1") == "1\n" + finally: + with sync_loaded_config(localhost_client.query): + configure_ports_from_zk(zk) + diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index ede1dafefb1..33ce94a7a29 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -323,6 +323,28 @@ def test_read_table_with_default(started_cluster): +def test_hdfsCluster(started_cluster): + hdfs_api = started_cluster.hdfs_api + fs = HdfsClient(hosts=started_cluster.hdfs_ip) + dir = '/test_hdfsCluster' + exists = fs.exists(dir) + if exists: + fs.delete(dir, recursive=True) + fs.mkdirs(dir) + hdfs_api.write_data("/test_hdfsCluster/file1", "1\n") + hdfs_api.write_data("/test_hdfsCluster/file2", "2\n") + hdfs_api.write_data("/test_hdfsCluster/file3", "3\n") + + actual = node1.query("select id, _file as file_name, _path as file_path from hdfs('hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') order by id") + expected = "1\tfile1\thdfs://hdfs1:9000/test_hdfsCluster/file1\n2\tfile2\thdfs://hdfs1:9000/test_hdfsCluster/file2\n3\tfile3\thdfs://hdfs1:9000/test_hdfsCluster/file3\n" + assert actual == expected + + actual = node1.query("select id, _file as file_name, _path as file_path from hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') order by id") + expected = "1\tfile1\thdfs://hdfs1:9000/test_hdfsCluster/file1\n2\tfile2\thdfs://hdfs1:9000/test_hdfsCluster/file2\n3\tfile3\thdfs://hdfs1:9000/test_hdfsCluster/file3\n" + assert actual == expected + fs.delete(dir, recursive=True) + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py index 5f5063a879f..567a9b7184d 100644 --- a/tests/integration/test_storage_kerberized_kafka/test.py +++ b/tests/integration/test_storage_kerberized_kafka/test.py @@ -97,6 +97,22 @@ def test_kafka_json_as_string(kafka_cluster): assert instance.contains_in_log("Parsing of message (topic: kafka_json_as_string, partition: 0, offset: 1) return no rows") def test_kafka_json_as_string_no_kdc(kafka_cluster): + # When the test is run alone (not preceded by any other kerberized kafka test), + # we need a ticket to + # assert instance.contains_in_log("Ticket expired") + instance.query(''' + CREATE TABLE test.kafka_no_kdc_warm_up (field String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kerberized_kafka1:19092', + kafka_topic_list = 'kafka_json_as_string_no_kdc_warm_up', + kafka_group_name = 'kafka_json_as_string_no_kdc_warm_up', + kafka_commit_on_select = 1, + kafka_format = 'JSONAsString', + kafka_flush_interval_ms=1000; + ''') + + instance.query('SELECT * FROM test.kafka_no_kdc_warm_up;') + kafka_produce(kafka_cluster, 'kafka_json_as_string_no_kdc', ['{"t": 123, "e": {"x": "woof"} }', '', '{"t": 124, "e": {"x": "test"} }', '{"F1":"V1","F2":{"F21":"V21","F22":{},"F23":"V23","F24":"2019-12-24T16:28:04"},"F3":"V3"}']) kafka_cluster.pause_container('kafka_kerberos') diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 6f43036e64d..b6ac121cd0c 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -424,6 +424,21 @@ def test_predefined_connection_configuration(started_cluster): cursor.execute(f'DROP TABLE test_table ') +def test_where_false(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + cursor.execute("DROP TABLE IF EXISTS test") + cursor.execute('CREATE TABLE test (a Integer)') + cursor.execute("INSERT INTO test SELECT 1") + + result = node1.query("SELECT count() FROM postgresql('postgres1:5432', 'postgres', 'test', 'postgres', 'mysecretpassword') WHERE 1=0") + assert(int(result) == 0) + result = node1.query("SELECT count() FROM postgresql('postgres1:5432', 'postgres', 'test', 'postgres', 'mysecretpassword') WHERE 0") + assert(int(result) == 0) + result = node1.query("SELECT count() FROM postgresql('postgres1:5432', 'postgres', 'test', 'postgres', 'mysecretpassword') WHERE 1=1") + assert(int(result) == 1) + cursor.execute("DROP TABLE test") + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 66ec97ac027..2c2a9e41509 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -67,8 +67,8 @@ def rabbitmq_cluster(): def rabbitmq_setup_teardown(): print("RabbitMQ is available - running test") yield # run test - for table_name in ['view', 'consumer', 'rabbitmq']: - instance.query(f'DROP TABLE IF EXISTS test.{table_name}') + instance.query('DROP DATABASE test NO DELAY') + instance.query('CREATE DATABASE test') # Tests @@ -284,6 +284,12 @@ def test_rabbitmq_materialized_view(rabbitmq_cluster): ORDER BY key; CREATE MATERIALIZED VIEW test.consumer TO test.view AS SELECT * FROM test.rabbitmq; + + CREATE TABLE test.view2 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS + SELECT * FROM test.rabbitmq group by (key, value); ''') credentials = pika.PlainCredentials('root', 'clickhouse') @@ -297,14 +303,26 @@ def test_rabbitmq_materialized_view(rabbitmq_cluster): for message in messages: channel.basic_publish(exchange='mv', routing_key='', body=message) - while True: + time_limit_sec = 60 + deadline = time.monotonic() + time_limit_sec + + while time.monotonic() < deadline: result = instance.query('SELECT * FROM test.view ORDER BY key') if (rabbitmq_check_result(result)): break - connection.close() rabbitmq_check_result(result, True) + deadline = time.monotonic() + time_limit_sec + + while time.monotonic() < deadline: + result = instance.query('SELECT * FROM test.view2 ORDER BY key') + if (rabbitmq_check_result(result)): + break + + rabbitmq_check_result(result, True) + connection.close() + def test_rabbitmq_materialized_view_with_subquery(rabbitmq_cluster): instance.query(''' diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index a4ba7a95dc7..f3c4b1dd0cf 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -11,6 +11,7 @@ import helpers.client import pytest from helpers.cluster import ClickHouseCluster, ClickHouseInstance, get_instances_dir from helpers.network import PartitionManager +from helpers.test_tools import exec_query_with_retry MINIO_INTERNAL_PORT = 9001 @@ -809,11 +810,12 @@ def test_seekable_formats(started_cluster): assert(int(result) == 5000000) table_function = f"s3(s3_orc, structure='a Int32, b String', format='ORC')" - instance.query(f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000)") + exec_query_with_retry(instance, f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000)") result = instance.query(f"SELECT count() FROM {table_function}") assert(int(result) == 5000000) + instance.query("SYSTEM FLUSH LOGS") result = instance.query(f"SELECT formatReadableSize(memory_usage) FROM system.query_log WHERE startsWith(query, 'SELECT count() FROM s3') AND memory_usage > 0 ORDER BY event_time desc") print(result[:3]) assert(int(result[:3]) < 200) @@ -831,12 +833,35 @@ def test_seekable_formats_url(started_cluster): assert(int(result) == 5000000) table_function = f"s3(s3_orc, structure='a Int32, b String', format='ORC')" - instance.query(f"insert into table function {table_function} select number, randomString(100) from numbers(5000000)") + exec_query_with_retry(instance, f"insert into table function {table_function} select number, randomString(100) from numbers(5000000)") table_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_orc', 'ORC', 'a Int32, b String')" result = instance.query(f"SELECT count() FROM {table_function}") assert(int(result) == 5000000) + instance.query("SYSTEM FLUSH LOGS") result = instance.query(f"SELECT formatReadableSize(memory_usage) FROM system.query_log WHERE startsWith(query, 'SELECT count() FROM url') AND memory_usage > 0 ORDER BY event_time desc") print(result[:3]) assert(int(result[:3]) < 200) + + +def test_empty_file(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] + + name = "empty" + url = f'http://{started_cluster.minio_ip}:{MINIO_INTERNAL_PORT}/{bucket}/{name}' + + minio = started_cluster.minio_client + minio.put_object(bucket, name, io.BytesIO(b""), 0) + + table_function = f"s3('{url}', 'CSV', 'id Int32')" + result = instance.query(f"SELECT count() FROM {table_function}") + assert(int(result) == 0) + + +def test_insert_with_path_with_globs(started_cluster): + instance = started_cluster.instances["dummy"] + + table_function_3 = f"s3('http://minio1:9001/root/test_parquet*', 'minio', 'minio123', 'Parquet', 'a Int32, b String')" + instance.query_and_get_error(f"insert into table function {table_function_3} SELECT number, randomString(100) FROM numbers(500)") diff --git a/tests/integration/test_system_metrics/test.py b/tests/integration/test_system_metrics/test.py index 9e8eac162f6..efcc6f88a24 100644 --- a/tests/integration/test_system_metrics/test.py +++ b/tests/integration/test_system_metrics/test.py @@ -59,3 +59,32 @@ def test_readonly_metrics(start_cluster): node1.query("ATTACH TABLE test.test_table") assert_eq_with_retry(node1, "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'", "0\n", retry_count=300, sleep_time=1) +#For LowCardinality-columns, the bytes for N rows is not N*size of 1 row. +def test_metrics_storage_buffer_size(start_cluster): + node1.query(''' + CREATE TABLE test.test_mem_table + ( + `str` LowCardinality(String) + ) + ENGINE = Memory; + + CREATE TABLE test.buffer_table + ( + `str` LowCardinality(String) + ) + ENGINE = Buffer('test', 'test_mem_table', 1, 600, 600, 1000, 100000, 100000, 10000000); + ''') + + #before flush + node1.query("INSERT INTO test.buffer_table VALUES('hello');") + assert node1.query("SELECT value FROM system.metrics WHERE metric = 'StorageBufferRows'") == "1\n" + assert node1.query("SELECT value FROM system.metrics WHERE metric = 'StorageBufferBytes'") == "24\n" + + node1.query("INSERT INTO test.buffer_table VALUES('hello');") + assert node1.query("SELECT value FROM system.metrics WHERE metric = 'StorageBufferRows'") == "2\n" + assert node1.query("SELECT value FROM system.metrics WHERE metric = 'StorageBufferBytes'") == "25\n" + + #flush + node1.query("OPTIMIZE TABLE test.buffer_table") + assert node1.query("SELECT value FROM system.metrics WHERE metric = 'StorageBufferRows'") == "0\n" + assert node1.query("SELECT value FROM system.metrics WHERE metric = 'StorageBufferBytes'") == "0\n" diff --git a/tests/integration/test_table_functions_access_rights/test.py b/tests/integration/test_table_functions_access_rights/test.py index 16f18407960..90106303315 100644 --- a/tests/integration/test_table_functions_access_rights/test.py +++ b/tests/integration/test_table_functions_access_rights/test.py @@ -39,7 +39,7 @@ def test_merge(): instance.query("GRANT CREATE TEMPORARY TABLE ON *.* TO A") assert "no tables in database matches" in instance.query_and_get_error(select_query, user = 'A') - + instance.query("GRANT SELECT ON default.table1 TO A") assert instance.query(select_query, user = 'A') == "1\n" diff --git a/tests/performance/sparse_column.xml b/tests/performance/sparse_column.xml new file mode 100644 index 00000000000..6523d37df44 --- /dev/null +++ b/tests/performance/sparse_column.xml @@ -0,0 +1,58 @@ + + + + serialization + + sparse + full + + + + ratio + + 10 + 100 + 1000 + + + + + + CREATE TABLE test_full_{ratio} (id UInt64, u8 UInt8, u64 UInt64, str String) + ENGINE = MergeTree ORDER BY id + + + + CREATE TABLE test_sparse_{ratio} (id UInt64, u8 UInt8, u64 UInt64, str String) + ENGINE = MergeTree ORDER BY id + SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9 + + + SYSTEM STOP MERGES test_{serialization}_{ratio} + + + INSERT INTO test_{serialization}_{ratio} SELECT + number, + number % {ratio} = 0 ? rand(1) : 0, + number % {ratio} = 0 ? rand(2) : 0, + number % {ratio} = 0 ? randomPrintableASCII(64, 3) : '' + FROM numbers(100000000) + + + SELECT u8 FROM test_{serialization}_{ratio} FORMAT Null + SELECT u64 FROM test_{serialization}_{ratio} FORMAT Null + SELECT str FROM test_{serialization}_{ratio} FORMAT Null + + SELECT erf(u64) FROM test_{serialization}_{ratio} FORMAT Null + SELECT lower(str) FROM test_{serialization}_{ratio} FORMAT Null + + SELECT id FROM test_{serialization}_{ratio} ORDER BY u64 DESC LIMIT 100 FORMAT Null + SELECT id FROM test_{serialization}_{ratio} ORDER BY str DESC LIMIT 100 FORMAT Null + SELECT id FROM test_{serialization}_{ratio} ORDER BY u8, u64 DESC LIMIT 100 FORMAT Null + SELECT * FROM test_{serialization}_{ratio} ORDER BY u8, u64 DESC LIMIT 100 FORMAT Null + + SELECT sum(u64) FROM test_{serialization}_{ratio} GROUP BY id % 11 FORMAT Null + SELECT uniq(str) FROM test_{serialization}_{ratio} GROUP BY id % 11 FORMAT Null + + + diff --git a/tests/queries/0_stateless/00700_decimal_math.reference b/tests/queries/0_stateless/00700_decimal_math.reference index eb556ac49b8..389b428e27b 100644 --- a/tests/queries/0_stateless/00700_decimal_math.reference +++ b/tests/queries/0_stateless/00700_decimal_math.reference @@ -4,7 +4,7 @@ 42.42 6.513 42.419169 42.42 3.4875 42.417263671875 1 0.8427007929497149 0.15729920705028513 -42.42 115.60113124678627 1.6029995567009473e50 +42.42 115.601131 1.603 0 0 1 0 3.14159265 0 -1 -0 1 1.5707963267948966 0 0.7853981633974483 @@ -14,7 +14,7 @@ 42.42 6.513 42.419169 42.42 3.4875 42.417263671875 1 0.8427007929497149 0.15729920705028513 -42.42 115.60113124678627 1.6029995567009473e50 +42.42 115.601131 1.603 0 0 1 0 3.14159265358979328 0 -1 -0 1 1.5707963267948966 0 0.7853981633974483 @@ -24,7 +24,7 @@ 42.42 6.513 42.419169 42.42 3.4875 42.417263671875 1 0.8427007929497149 0.15729920705028513 -42.42 115.60113124678627 1.6029995567009473e50 +42.42 115.601131 1.603 0 0 1 0 3.14159265358979 0 -1 -0 1 1.5707963267948966 0 0.7853981633974483 diff --git a/tests/queries/0_stateless/00700_decimal_math.sql b/tests/queries/0_stateless/00700_decimal_math.sql index 237bee1c691..cefbf2fd604 100644 --- a/tests/queries/0_stateless/00700_decimal_math.sql +++ b/tests/queries/0_stateless/00700_decimal_math.sql @@ -5,7 +5,7 @@ SELECT toDecimal32('42.42', 4) AS x, toDecimal32(log10(x), 4) AS y, round(exp10( SELECT toDecimal32('42.42', 4) AS x, toDecimal32(sqrt(x), 3) AS y, y * y; SELECT toDecimal32('42.42', 4) AS x, toDecimal32(cbrt(x), 4) AS y, toDecimal64(y, 4) * y * y; SELECT toDecimal32('1.0', 5) AS x, erf(x), erfc(x); -SELECT toDecimal32('42.42', 4) AS x, lgamma(x), tgamma(x); +SELECT toDecimal32('42.42', 4) AS x, round(lgamma(x), 6), round(tgamma(x) / 1e50, 6); SELECT toDecimal32('0.0', 2) AS x, round(sin(x), 8), round(cos(x), 8), round(tan(x), 8); SELECT toDecimal32(pi(), 8) AS x, round(sin(x), 8), round(cos(x), 8), round(tan(x), 8); @@ -19,7 +19,7 @@ SELECT toDecimal64('42.42', 4) AS x, toDecimal32(log10(x), 4) AS y, round(exp10( SELECT toDecimal64('42.42', 4) AS x, toDecimal32(sqrt(x), 3) AS y, y * y; SELECT toDecimal64('42.42', 4) AS x, toDecimal32(cbrt(x), 4) AS y, toDecimal64(y, 4) * y * y; SELECT toDecimal64('1.0', 5) AS x, erf(x), erfc(x); -SELECT toDecimal64('42.42', 4) AS x, lgamma(x), tgamma(x); +SELECT toDecimal64('42.42', 4) AS x, round(lgamma(x), 6), round(tgamma(x) / 1e50, 6); SELECT toDecimal64('0.0', 2) AS x, round(sin(x), 8), round(cos(x), 8), round(tan(x), 8); SELECT toDecimal64(pi(), 17) AS x, round(sin(x), 8), round(cos(x), 8), round(tan(x), 8); @@ -33,7 +33,7 @@ SELECT toDecimal128('42.42', 4) AS x, toDecimal32(log10(x), 4) AS y, round(exp10 SELECT toDecimal128('42.42', 4) AS x, toDecimal32(sqrt(x), 3) AS y, y * y; SELECT toDecimal128('42.42', 4) AS x, toDecimal32(cbrt(x), 4) AS y, toDecimal64(y, 4) * y * y; SELECT toDecimal128('1.0', 5) AS x, erf(x), erfc(x); -SELECT toDecimal128('42.42', 4) AS x, lgamma(x), tgamma(x); +SELECT toDecimal128('42.42', 4) AS x, round(lgamma(x), 6), round(tgamma(x) / 1e50, 6); SELECT toDecimal128('0.0', 2) AS x, round(sin(x), 8), round(cos(x), 8), round(tan(x), 8); SELECT toDecimal128(pi(), 14) AS x, round(sin(x), 8), round(cos(x), 8), round(tan(x), 8); diff --git a/tests/queries/0_stateless/00900_long_parquet_load.reference b/tests/queries/0_stateless/00900_long_parquet_load.reference index 421def88e41..89da3c6fa43 100644 --- a/tests/queries/0_stateless/00900_long_parquet_load.reference +++ b/tests/queries/0_stateless/00900_long_parquet_load.reference @@ -89,7 +89,7 @@ idx10 ['This','is','a','test'] 23 24 === Try load data from datapage_v2.snappy.parquet -Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Not yet implemented: Unsupported encoding.: While executing ParquetBlockInputFormat: data for INSERT was parsed from stdin: (in query: INSERT INTO parquet_load FORMAT Parquet). (CANNOT_READ_ALL_DATA) +Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Unknown encoding type.: While executing ParquetBlockInputFormat: data for INSERT was parsed from stdin: (in query: INSERT INTO parquet_load FORMAT Parquet). (CANNOT_READ_ALL_DATA) === Try load data from datatype-date32.parquet 1925-01-01 diff --git a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py index adab2988e39..eb2b7835483 100755 --- a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py +++ b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +#Tags: no-parallel + import os import sys import signal @@ -37,11 +39,11 @@ with client(name='client1>', log=log) as client1, client(name='client2>', log=lo client1.send('WATCH 01069_window_view_proc_tumble_watch.wv') client1.expect('Query id' + end_of_block) - client2.send("INSERT INTO 01069_window_view_proc_tumble_watch.mt VALUES (1, now('US/Samoa') + 1)") + client2.send("INSERT INTO 01069_window_view_proc_tumble_watch.mt VALUES (1, now('US/Samoa') + 3)") client2.expect("Ok.") client1.expect('1' + end_of_block) client1.expect('Progress: 1.00 rows.*\)') - client2.send("INSERT INTO 01069_window_view_proc_tumble_watch.mt VALUES (1, now('US/Samoa') + 1)") + client2.send("INSERT INTO 01069_window_view_proc_tumble_watch.mt VALUES (1, now('US/Samoa') + 3)") client2.expect("Ok.") client1.expect('1' + end_of_block) client1.expect('Progress: 2.00 rows.*\)') diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.expect b/tests/queries/0_stateless/01176_mysql_client_interactive.expect index 37087dd85f7..5bbc77ccf14 100755 --- a/tests/queries/0_stateless/01176_mysql_client_interactive.expect +++ b/tests/queries/0_stateless/01176_mysql_client_interactive.expect @@ -5,11 +5,12 @@ log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail + expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/01179_insert_values_semicolon.expect b/tests/queries/0_stateless/01179_insert_values_semicolon.expect index 0e65e5c4cbf..bf937c3a6a4 100755 --- a/tests/queries/0_stateless/01179_insert_values_semicolon.expect +++ b/tests/queries/0_stateless/01179_insert_values_semicolon.expect @@ -1,13 +1,14 @@ #!/usr/bin/expect -f +# Tags: long log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/01180_client_syntax_errors.expect b/tests/queries/0_stateless/01180_client_syntax_errors.expect index e4b108fc9a9..6e4e975988e 100755 --- a/tests/queries/0_stateless/01180_client_syntax_errors.expect +++ b/tests/queries/0_stateless/01180_client_syntax_errors.expect @@ -1,14 +1,13 @@ #!/usr/bin/expect -f -# Tags: no-fasttest log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect index 14ca38093bf..e4442047c87 100755 --- a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect +++ b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect @@ -1,15 +1,14 @@ #!/usr/bin/expect -f -# Tags: no-fasttest log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect b/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect index 3abed6cae03..2f871ab46d8 100755 --- a/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect +++ b/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect @@ -1,14 +1,13 @@ #!/usr/bin/expect -f -# Tags: no-fasttest log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference index aa27ef83f52..75e0ec5ffec 100644 --- a/tests/queries/0_stateless/01293_show_settings.reference +++ b/tests/queries/0_stateless/01293_show_settings.reference @@ -2,6 +2,7 @@ send_timeout Seconds 300 connect_timeout Seconds 10 connect_timeout_with_failover_ms Milliseconds 2000 connect_timeout_with_failover_secure_ms Milliseconds 3000 +external_storage_connect_timeout_sec UInt64 10 max_memory_usage UInt64 10000000000 max_untracked_memory UInt64 1048576 memory_profiler_step UInt64 1048576 diff --git a/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect b/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect index 62a3df95abc..ad5b7625929 100755 --- a/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect +++ b/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect @@ -1,14 +1,14 @@ #!/usr/bin/expect -f -# Tags: long, no-fasttest +# Tags: long log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/01305_polygons_union.reference b/tests/queries/0_stateless/01305_polygons_union.reference index f87d03c151c..6deb8dce7ce 100644 --- a/tests/queries/0_stateless/01305_polygons_union.reference +++ b/tests/queries/0_stateless/01305_polygons_union.reference @@ -1,8 +1,8 @@ [[[(1,2.9),(1,4),(4,4),(4,1),(2.9,1),(3,0),(0,0),(0,3),(1,2.9)]]] -[[[(4.3666052904432435,50.84337386140151),(4.366227,50.840809),(4.344961,50.833264),(4.338074,50.848677),(4.346693,50.858306),(4.3526804582393535,50.856658100365976),(4.3613577,50.8651821),(4.3613148,50.8651279),(4.3904543,50.8564867),(4.3830299,50.8428851),(4.3666052904432435,50.84337386140151)]]] +[[[(4.366605,50.843374),(4.366227,50.840809),(4.344961,50.833264),(4.338074,50.848677),(4.346693,50.858306),(4.35268,50.856658),(4.361358,50.865182),(4.361315,50.865128),(4.390454,50.856487),(4.38303,50.842885),(4.366605,50.843374)]]] -------- MultiPolygon with Polygon -MULTIPOLYGON(((35.5408 58.9593,37.2817 59.9768,38.7325 59.9465,36.9725 59.0149,37.3119 59.0258,37.8553 58.9075,39.7299 59.9314,44.4751 59.81,44.4146 55.3097,40.0925 52.1652,38.3395 52.1652,39.1456 52.7573,38.0214 52.8989,37.1608 52.2393,35.4682 52.2022,36.5022 53.0008,35.3776 53.0462,35.3645 53.076,34.2895 52.2208,32.5969 52.2208,33.4048 52.8423,33.1712 52.8276,32.5275 53.1741,31.2368 52.1652,29.7861 52.1466,32.2523 53.964,31.8748 54.1736,29.3931 52.2763,29.4536 59.7796,30.5719 59.9919,30.4812 58.8542,32.3249 59.9465,33.6548 59.9465,30.179 57.9196,30.179 56.9764,32.2175 58.3664,32.2342 58.4928,32.5691 58.5924,34.8637 59.9768,36.2843 59.9616,34.0496 58.6717,34.9952 58.6226,35.3712 58.8556,34.6522 58.9167,35.5408 58.9593),(36.4989 58.7512,36.1498 58.553,36.3447 58.5402,36.0877 58.5174,35.4314 58.1349,36.403 58.0507,36.5949 58.1673,36.0123 58.2869,37.191 58.6819,36.4989 58.7512),(34.4816 56.8232,34.8098 57.0409,33.5602 56.9781,33.3418 56.8364,33.8361 56.6953,34.1885 56.6259,34.3867 56.7596,34.229 56.7948,34.4816 56.8232),(35.9179 57.7512,35.7402 57.7909,36.0848 57.855,36.3932 58.0447,35.1134 57.9454,34.6332 57.6538,35.613 57.5595,35.9179 57.7512),(36.8709 53.2765,37.135 53.4711,36.9794 53.5878,37.3119 53.9273,37.0035 54.2999,36.6985 54.0791,36.919 53.8561,36.3552 53.8269,36.1528 53.6763,36.8709 53.2765),(38.1601 55.1091,38.3093 55.1546,38.2609 55.1775,38.1601 55.1091),(38.1688 56.0758,38.4339 56.2361,37.5054 56.5484,37.2281 56.3799,38.1688 56.0758),(38.1319 56.0534,36.647 55.9411,37.6238 55.7402,38.1319 56.0534),(37.2824 55.5258,36.8283 55.4471,37.06 55.3843,37.2824 55.5258),(36.151 54.791,36.0123 54.7554,36.0472 54.7217,36.151 54.791),(34.9611 53.9765,34.894 54.1226,35.6193 54.4929,34.9706 54.9262,34.7231 54.7576,35.0753 54.5981,34.1081 54.1757,34.7279 53.8116,34.9611 53.9765),(38.2312 56.9795,37.565 56.5843,38.9742 56.8774,38.4915 57.1308,38.2699 57.0021,38.3093 56.9929,38.2312 56.9795),(36.5334 56.6753,36.375 56.6455,36.4446 56.6242,36.5334 56.6753),(36.1999 57.0022,36.9794 57.0751,36.4587 57.1544,36.1999 57.0022),(34.6028 58.3749,33.6245 58.271,34.3593 58.2189,34.6028 58.3749),(33.7581 57.8255,33.2316 57.7748,33.6325 57.7419,33.7581 57.8255),(31.6069 56.3194,31.7506 56.8609,31.6514 57.1258,30.3301 56.1942,30.2394 55.2753,31.6069 56.3194),(34.2274 57.4023,34.0208 57.2724,35.0338 57.1875,35.4682 57.4674,34.2274 57.4023),(31.7782 55.7778,30.2092 54.6331,30.2394 53.6774,31.7439 54.8677,31.8413 54.9989,32.204 55.5156,31.7782 55.7778),(33.7222 56.3063,32.8387 56.3117,33.5244 56.1686,33.7222 56.3063),(33.1204 55.8832,32.748 55.9072,32.9547 55.7645,33.1204 55.8832),(35.2275 55.0993,36.4354 55.3441,35.7505 55.4454,35.2275 55.0993),(35.9817 55.5958,36.5563 55.6352,36.193 55.7319,35.9817 55.5958),(35.0954 55.822,35.3188 55.9582,34.7331 56.1049,34.4996 55.9565,35.0954 55.822),(34.9721 55.7463,34.2598 55.8023,33.6125 55.3778,34.3709 55.3709,34.9721 55.7463),(35.6571 56.1619,36.0233 56.3789,35.4083 56.5254,35.2273 56.414,35.71 56.3117,35.0485 56.303,34.744 56.1118,35.6571 56.1619),(40.2143 54.467,40.3948 54.4403,40.6064 54.034,39.9716 53.9807,40.2437 53.5878,39.5485 53.5878,39.9942 53.358,43.0243 55.3269,43.0243 56.2614,40.2143 54.467),(38.5511 53.2922,38.4609 53.226,39.2704 52.8471,39.9877 53.3534,38.5511 53.2922),(40.5716 55.8007,43.0243 57.2554,43.0243 58.0797,40.4543 56.5923,40.4855 56.4957,40.2529 56.4682,39.8102 56.1914,39.8205 56.0763,40.425 56.1942,40.5716 55.8007),(40.5504 55.7875,39.7601 55.7544,39.8151 55.3187,40.5504 55.7875),(39.7863 57.025,42.5105 58.477,41.6944 58.8542,40.1389 58.048,40.2437 58.0478,40.3343 57.4673,39.7299 57.4673,39.7863 57.025),(38.0744 57.5312,38.3737 57.6908,38.3395 57.7103,38.8533 58.0638,38.432 58.2584,38.0535 58.0542,38.3395 57.9356,37.4328 57.7103,38.0744 57.5312),(37.9669 57.4734,37.1608 57.2554,37.4489 57.1909,37.9669 57.4734),(40.4136 58.7241,41.2108 59.1035,40.6366 59.3817,39.8163 58.9766,40.4552 58.9011,40.4136 58.7241),(39.7184 58.3823,39.6392 58.3821,39.6392 58.3427,39.7184 58.3823),(38.7465 58.4255,39.5485 58.7133,39.4085 58.7696,38.7465 58.4255))) +MULTIPOLYGON(((35.5408 58.9593,37.2817 59.9768,38.7325 59.9465,36.9725 59.0149,37.3119 59.0258,37.8553 58.9075,39.7299 59.9314,44.4751 59.81,44.4146 55.3097,40.0925 52.1652,38.3395 52.1652,39.1456 52.7572,38.0214 52.8989,37.1608 52.2393,35.4682 52.2022,36.5022 53.0008,35.3776 53.0462,35.3645 53.076,34.2895 52.2208,32.5969 52.2208,33.4048 52.8423,33.1712 52.8276,32.5275 53.1741,31.2368 52.1652,29.7861 52.1466,32.2523 53.964,31.8748 54.1736,29.3931 52.2763,29.4536 59.7796,30.5719 59.9919,30.4812 58.8542,32.3249 59.9465,33.6548 59.9465,30.179 57.9196,30.179 56.9764,32.2175 58.3664,32.2342 58.4928,32.5691 58.5924,34.8637 59.9768,36.2843 59.9616,34.0496 58.6717,34.9952 58.6226,35.3712 58.8556,34.6522 58.9167,35.5408 58.9593),(36.4989 58.7512,36.1498 58.553,36.3447 58.5402,36.0877 58.5174,35.4314 58.1349,36.403 58.0507,36.5949 58.1673,36.0123 58.2869,37.191 58.6819,36.4989 58.7512),(34.4816 56.8232,34.8098 57.0409,33.5602 56.9781,33.3418 56.8364,33.8361 56.6953,34.1885 56.6259,34.3867 56.7596,34.229 56.7948,34.4816 56.8232),(35.9179 57.7512,35.7402 57.7909,36.0848 57.855,36.3932 58.0447,35.1134 57.9454,34.6332 57.6538,35.613 57.5595,35.9179 57.7512),(36.8709 53.2765,37.135 53.4711,36.9794 53.5878,37.3119 53.9273,37.0035 54.2999,36.6985 54.0791,36.919 53.8561,36.3552 53.8269,36.1528 53.6763,36.8709 53.2765),(38.1601 55.1091,38.3093 55.1546,38.2609 55.1775,38.1601 55.1091),(38.1688 56.0758,38.4339 56.2361,37.5054 56.5484,37.2281 56.3799,38.1688 56.0758),(38.1319 56.0534,36.647 55.9411,37.6238 55.7402,38.1319 56.0534),(37.2824 55.5258,36.8283 55.4471,37.06 55.3843,37.2824 55.5258),(36.151 54.791,36.0123 54.7554,36.0472 54.7217,36.151 54.791),(34.9611 53.9765,34.894 54.1226,35.6193 54.4929,34.9706 54.9262,34.7231 54.7576,35.0753 54.5981,34.1081 54.1757,34.7279 53.8116,34.9611 53.9765),(38.2312 56.9795,37.565 56.5843,38.9742 56.8774,38.4915 57.1308,38.2699 57.0021,38.3093 56.9929,38.2312 56.9795),(36.5334 56.6753,36.375 56.6455,36.4446 56.6242,36.5334 56.6753),(36.1999 57.0022,36.9794 57.0751,36.4587 57.1544,36.1999 57.0022),(34.6028 58.3749,33.6245 58.271,34.3593 58.2189,34.6028 58.3749),(33.7581 57.8255,33.2316 57.7748,33.6325 57.7419,33.7581 57.8255),(31.6069 56.3194,31.7506 56.8609,31.6514 57.1258,30.3301 56.1942,30.2394 55.2753,31.6069 56.3194),(34.2274 57.4023,34.0208 57.2724,35.0338 57.1875,35.4682 57.4674,34.2274 57.4023),(31.7782 55.7778,30.2092 54.6331,30.2394 53.6774,31.7439 54.8677,31.8413 54.9989,32.204 55.5156,31.7782 55.7778),(33.7222 56.3063,32.8387 56.3117,33.5244 56.1686,33.7222 56.3063),(33.1204 55.8832,32.748 55.9072,32.9547 55.7645,33.1204 55.8832),(35.2275 55.0993,36.4354 55.3441,35.7505 55.4454,35.2275 55.0993),(35.9817 55.5958,36.5563 55.6352,36.193 55.7319,35.9817 55.5958),(35.0954 55.822,35.3188 55.9582,34.7331 56.1049,34.4996 55.9565,35.0954 55.822),(34.9721 55.7463,34.2598 55.8023,33.6125 55.3778,34.3709 55.3709,34.9721 55.7463),(35.6571 56.1619,36.0233 56.3789,35.4083 56.5254,35.2273 56.414,35.71 56.3117,35.0485 56.303,34.744 56.1118,35.6571 56.1619),(40.2143 54.467,40.3948 54.4403,40.6064 54.034,39.9716 53.9807,40.2437 53.5878,39.5485 53.5878,39.9942 53.358,43.0243 55.3269,43.0243 56.2614,40.2143 54.467),(38.5511 53.2922,38.4609 53.226,39.2704 52.8471,39.9877 53.3534,38.5511 53.2922),(40.5716 55.8007,43.0243 57.2554,43.0243 58.0797,40.4543 56.5923,40.4855 56.4957,40.2529 56.4682,39.8102 56.1914,39.8205 56.0763,40.425 56.1942,40.5716 55.8007),(40.5504 55.7875,39.7601 55.7544,39.8151 55.3187,40.5504 55.7875),(39.7863 57.025,42.5105 58.477,41.6944 58.8542,40.1389 58.048,40.2437 58.0478,40.3343 57.4673,39.7299 57.4673,39.7863 57.025),(38.0744 57.5312,38.3737 57.6908,38.3395 57.7103,38.8533 58.0638,38.432 58.2584,38.0535 58.0542,38.3395 57.9356,37.4328 57.7103,38.0744 57.5312),(37.9669 57.4734,37.1608 57.2554,37.4489 57.1909,37.9669 57.4734),(40.4136 58.7241,41.2108 59.1035,40.6366 59.3817,39.8163 58.9766,40.4552 58.9011,40.4136 58.7241),(39.7184 58.3823,39.6392 58.3821,39.6392 58.3427,39.7184 58.3823),(38.7465 58.4255,39.5485 58.7133,39.4085 58.7696,38.7465 58.4255))) -------- MultiPolygon with Polygon with Holes -MULTIPOLYGON(((24.3677 61.4598,26.6528 61.1008,26.8726 61.7107,30.564 61.0583,31.3989 62.0215,36.0132 61.1432,36.8921 62.0009,42.6489 60.6301,43.5718 61.3757,47.0435 59.8889,49.5923 60.0868,49.1528 58.1707,51.9214 57.9148,50.2515 56.1455,52.6685 55.826,51.6577 54.2909,52.8882 53.9302,50.647 53.0148,51.394 52.4828,48.0542 51.1793,49.2847 50.5414,47.1753 49.153,43.9233 49.8096,42.561 48.7779,36.936 49.6676,35.2661 48.7489,32.8052 49.5252,27.2241 48.9802,26.1255 50.4015,21.2036 50.205,20.0171 51.5634,17.4683 53.0148,19.4458 54.0852,19.4458 55.8753,19.5776 57.4922,19.5776 58.6769,24.3677 61.4598),(24.4556 59.4227,21.2036 58.4937,21.3354 56.897,21.5991 55.9246,25.2026 55.9984,28.8501 57.0646,27.0923 57.8448,28.8062 59.1759,26.2573 59.1759,24.4556 59.4227),(33.1079 56.9523,33.1392 56.8934,33.7182 56.7292,35.1489 56.5859,34.229 56.7948,36.9794 57.0751,35.7705 57.2554,37.0097 57.4998,35.7402 57.7909,37.1608 58.0478,36.0123 58.2869,37.191 58.6819,34.6522 58.9167,37.2327 59.0233,37.1118 59.6677,35.1343 59.8448,31.9702 58.9727,32.25 58.4976,33.4734 58.8542,34.7428 59.5659,33.8361 58.6819,36.3447 58.5402,33.6245 58.271,36.4354 58.0478,33.2316 57.7748,36.1936 57.4998,33.1712 57.337,36.0727 57.0915,33.1079 56.9523),(37.0604 52.9744,34.9585 51.4814,36.5405 50.4015,39.6606 50.2893,39.7925 52.1335,41.77 50.6808,44.4946 51.9713,47.3071 52.5095,44.0552 53.5403,46.604 53.6967,47.6147 55.4041,45.3735 55.4041,42.8247 56.5837,40.4412 56.1511,40.5761 55.7884,39.7601 55.7544,39.8205 55.2753,40.3948 55.2408,40.3948 54.8773,39.5485 54.8773,39.5485 54.5631,40.3948 54.4403,40.6064 54.034,39.9716 53.9807,40.2437 53.5878,39.5485 53.5878,40.0019 53.354,38.3395 53.2817,39.5787 52.6996,37.8559 52.9188,37.4471 53.2343,37.2165 53.0798,37.4328 52.9552,37.0604 52.9744),(31.627 54.7093,29.5972 55.5037,29.1577 55.7518,22.5659 55.1286,22.5659 53.5403,22.0386 51.4814,26.2573 51.4266,30.1245 50.5414,32.1899 51.1793,30.1245 53.1731,32.4808 53.1989,32.0831 53.408,32.476 53.8383,31.4182 54.4227,31.627 54.7093),(34.7731 53.3243,34.7731 53.1793,35.0903 53.1731,34.7731 53.3243),(36.9508 55.414,37.7653 55.1891,36.8822 54.975,37.0572 54.7635,38.3093 55.1546,37.7955 55.3956,38.4907 55.5327,38.3184 55.7179,38.0262 55.6546,38.0373 55.6523,37.9482 55.6376,36.9508 55.414),(38.3092 56.9929,38.5798 57.0849,38.2186 57.2717,38.7325 57.4835,38.3395 57.7103,38.8533 58.0638,38.3698 58.2869,39.5485 58.7133,38.8838 58.9777,38.0944 58.8545,38.5813 58.7446,37.4026 58.3187,38.3395 57.9356,37.4328 57.7103,38.128 57.516,37.1608 57.2554,38.3092 56.9929),(38.309 56.9928,36.375 56.6455,36.8799 56.4895,38.309 56.9928),(40.3237 57.5365,42.6929 58.0314,40.8911 59.2659,39.2792 59.0373,40.4552 58.9011,40.3343 58.3821,39.6392 58.3821,39.6392 58.0478,40.2437 58.0478,40.3237 57.5365),(40.0149 57.4677,39.7299 57.4673,39.7379 57.4051,40.0149 57.4677))) +MULTIPOLYGON(((24.3677 61.4598,26.6528 61.1008,26.8726 61.7107,30.564 61.0583,31.3989 62.0215,36.0132 61.1432,36.8921 62.0009,42.6489 60.6301,43.5718 61.3757,47.0435 59.8889,49.5923 60.0868,49.1528 58.1707,51.9214 57.9148,50.2515 56.1456,52.6685 55.826,51.6577 54.2909,52.8882 53.9302,50.647 53.0148,51.394 52.4828,48.0542 51.1793,49.2847 50.5414,47.1753 49.153,43.9233 49.8096,42.561 48.7779,36.936 49.6676,35.2661 48.7489,32.8052 49.5252,27.2241 48.9802,26.1255 50.4015,21.2036 50.205,20.0171 51.5634,17.4683 53.0148,19.4458 54.0852,19.4458 55.8753,19.5776 57.4922,19.5776 58.6769,24.3677 61.4598),(24.4556 59.4227,21.2036 58.4937,21.3354 56.897,21.5991 55.9246,25.2026 55.9984,28.8501 57.0646,27.0923 57.8448,28.8062 59.1759,26.2573 59.1759,24.4556 59.4227),(33.1079 56.9523,33.1392 56.8934,33.7182 56.7292,35.1489 56.5859,34.229 56.7948,36.9794 57.0751,35.7705 57.2554,37.0097 57.4998,35.7402 57.7909,37.1608 58.0478,36.0123 58.2869,37.191 58.6819,34.6522 58.9167,37.2327 59.0233,37.1118 59.6677,35.1343 59.8448,31.9702 58.9727,32.25 58.4976,33.4734 58.8542,34.7428 59.5659,33.8361 58.6819,36.3447 58.5402,33.6245 58.271,36.4354 58.0478,33.2316 57.7748,36.1936 57.4998,33.1712 57.337,36.0727 57.0915,33.1079 56.9523),(37.0604 52.9744,34.9585 51.4814,36.5405 50.4015,39.6606 50.2893,39.7925 52.1335,41.77 50.6808,44.4946 51.9713,47.3071 52.5095,44.0552 53.5403,46.604 53.6967,47.6147 55.4041,45.3735 55.4041,42.8247 56.5837,40.4412 56.1511,40.5761 55.7884,39.7601 55.7544,39.8205 55.2753,40.3948 55.2408,40.3948 54.8773,39.5485 54.8773,39.5485 54.5631,40.3948 54.4403,40.6064 54.034,39.9716 53.9807,40.2437 53.5878,39.5485 53.5878,40.0019 53.354,38.3395 53.2817,39.5787 52.6996,37.8559 52.9188,37.4471 53.2343,37.2165 53.0798,37.4328 52.9552,37.0604 52.9744),(31.627 54.7093,29.5972 55.5037,29.1577 55.7518,22.5659 55.1286,22.5659 53.5403,22.0386 51.4814,26.2573 51.4266,30.1245 50.5414,32.1899 51.1793,30.1245 53.1731,32.4808 53.1989,32.0831 53.408,32.476 53.8383,31.4182 54.4227,31.627 54.7093),(34.7731 53.3243,34.7731 53.1793,35.0903 53.1731,34.7731 53.3243),(36.9508 55.414,37.7653 55.1891,36.8822 54.975,37.0572 54.7635,38.3093 55.1546,37.7955 55.3956,38.4907 55.5327,38.3184 55.7179,38.0262 55.6546,38.0373 55.6523,37.9482 55.6376,36.9508 55.414),(38.3092 56.9929,38.5798 57.0849,38.2186 57.2717,38.7325 57.4835,38.3395 57.7103,38.8533 58.0638,38.3698 58.2869,39.5485 58.7133,38.8838 58.9777,38.0944 58.8545,38.5813 58.7446,37.4026 58.3187,38.3395 57.9356,37.4328 57.7103,38.128 57.516,37.1608 57.2554,38.3092 56.9929),(38.309 56.9928,36.375 56.6455,36.8799 56.4895,38.309 56.9928),(40.3237 57.5365,42.6929 58.0314,40.8911 59.2659,39.2792 59.0373,40.4552 58.9011,40.3343 58.3821,39.6392 58.3821,39.6392 58.0478,40.2437 58.0478,40.3237 57.5365),(40.0149 57.4677,39.7299 57.4673,39.7379 57.4051,40.0149 57.4677))) -------- Polygon with Polygon with Holes -MULTIPOLYGON(((24.3677 61.4598,26.6528 61.1008,26.8726 61.7107,30.564 61.0583,31.3989 62.0215,36.0132 61.1432,36.8921 62.0009,42.6489 60.6301,43.5718 61.3757,47.0435 59.8889,49.5923 60.0868,49.1528 58.1707,51.9214 57.9148,50.2515 56.1455,52.6685 55.826,51.6577 54.2909,52.8882 53.9302,50.647 53.0148,51.394 52.4828,48.0542 51.1793,49.2847 50.5414,47.1753 49.153,43.9233 49.8096,42.561 48.7779,36.936 49.6676,35.2661 48.7489,32.8052 49.5252,27.2241 48.9802,26.1255 50.4015,21.2036 50.205,20.0171 51.5634,17.4683 53.0148,19.4458 54.0852,19.4458 55.8753,19.5776 57.4922,19.5776 58.6769,24.3677 61.4598),(24.4556 59.4227,21.2036 58.4937,21.3354 56.897,21.5991 55.9246,25.2026 55.9984,28.8501 57.0646,27.0923 57.8448,28.8062 59.1759,26.2573 59.1759,24.4556 59.4227),(32.6512 57.792,32.9378 57.2699,36.7912 59.6986,35.9475 59.7758,32.6512 57.792),(33.2446 56.7729,34.2635 56.6767,37.6322 58.7797,37.2876 58.7226,37.2102 59.1452,33.2446 56.7729),(36.1815 56.4715,41.168 59.0834,40.9299 59.2404,40.8804 59.2644,40.2079 59.1718,35.4536 56.5531,36.1815 56.4715),(30.7705 55.0525,30.2092 54.6331,30.2394 53.6774,31.5682 54.7333,30.7705 55.0525),(33.8733 53.1922,34.3351 53.53,33.5144 53.9057,32.5603 53.1989,33.8733 53.1922),(31.1968 52.1649,29.7861 52.1466,30.5785 52.7531,30.3098 53.0028,29.3931 52.2763,29.4171 55.606,29.1577 55.7518,22.5659 55.1286,22.5659 53.5403,22.0386 51.4814,26.2573 51.4266,30.1245 50.5414,32.1899 51.1793,31.1968 52.1649),(31.1682 53.1903,32.6907 54.2663,32.2591 54.4483,30.5408 53.1811,31.1682 53.1903),(39.4328 55.9511,37.2766 54.4948,37.7431 53.9104,41.4519 56.3413,39.4328 55.9511),(40.9691 57.677,42.2498 58.3455,41.5887 58.8012,38.1759 56.9472,39.0894 57.2553,40.9691 57.677),(37.1934 55.4694,36.5845 55.3291,36.7219 55.1665,37.1934 55.4694),(32.2964 58.4175,34.2247 59.6064,31.9702 58.9727,32.2964 58.4175),(35.9681 52.2157,34.9585 51.4814,36.5405 50.4015,39.6606 50.2893,39.7925 52.1335,41.77 50.6808,44.4946 51.9713,47.3071 52.5095,44.0552 53.5403,46.604 53.6967,47.6147 55.4041,45.3735 55.4041,44.4212 55.8594,44.4146 55.3097,40.0925 52.1652,38.3395 52.1652,43.0243 55.3269,43.0243 56.2614,37.1608 52.2393,35.9681 52.2157))) +MULTIPOLYGON(((24.3677 61.4598,26.6528 61.1008,26.8726 61.7107,30.564 61.0583,31.3989 62.0215,36.0132 61.1432,36.8921 62.0009,42.6489 60.6301,43.5718 61.3757,47.0435 59.8889,49.5923 60.0868,49.1528 58.1707,51.9214 57.9148,50.2515 56.1456,52.6685 55.826,51.6577 54.2909,52.8882 53.9302,50.647 53.0148,51.394 52.4828,48.0542 51.1793,49.2847 50.5414,47.1753 49.153,43.9233 49.8096,42.561 48.7779,36.936 49.6676,35.2661 48.7489,32.8052 49.5252,27.2241 48.9802,26.1255 50.4015,21.2036 50.205,20.0171 51.5634,17.4683 53.0148,19.4458 54.0852,19.4458 55.8753,19.5776 57.4922,19.5776 58.6769,24.3677 61.4598),(24.4556 59.4227,21.2036 58.4937,21.3354 56.897,21.5991 55.9246,25.2026 55.9984,28.8501 57.0646,27.0923 57.8448,28.8062 59.1759,26.2573 59.1759,24.4556 59.4227),(32.6512 57.792,32.9378 57.2699,36.7912 59.6986,35.9475 59.7758,32.6512 57.792),(33.2446 56.7729,34.2635 56.6767,37.6322 58.7797,37.2876 58.7226,37.2102 59.1452,33.2446 56.7729),(36.1815 56.4715,41.168 59.0834,40.9299 59.2404,40.8804 59.2644,40.2079 59.1718,35.4536 56.5531,36.1815 56.4715),(30.7705 55.0525,30.2092 54.6331,30.2394 53.6774,31.5682 54.7333,30.7705 55.0525),(33.8733 53.1922,34.3351 53.53,33.5144 53.9057,32.5603 53.1989,33.8733 53.1922),(31.1968 52.1649,29.7861 52.1466,30.5785 52.7531,30.3098 53.0028,29.3931 52.2763,29.4171 55.606,29.1577 55.7518,22.5659 55.1286,22.5659 53.5403,22.0386 51.4814,26.2573 51.4266,30.1245 50.5414,32.1899 51.1793,31.1968 52.1649),(31.1682 53.1903,32.6907 54.2663,32.2591 54.4483,30.5408 53.1811,31.1682 53.1903),(39.4328 55.9511,37.2766 54.4948,37.7431 53.9104,41.4519 56.3413,39.4328 55.9511),(40.9691 57.677,42.2498 58.3455,41.5887 58.8012,38.1759 56.9472,39.0894 57.2553,40.9691 57.677),(37.1934 55.4694,36.5845 55.3291,36.7219 55.1665,37.1934 55.4694),(32.2964 58.4175,34.2247 59.6064,31.9702 58.9727,32.2964 58.4175),(35.9681 52.2157,34.9585 51.4814,36.5405 50.4015,39.6606 50.2893,39.7925 52.1335,41.77 50.6808,44.4946 51.9713,47.3071 52.5095,44.0552 53.5403,46.604 53.6967,47.6147 55.4041,45.3735 55.4041,44.4212 55.8594,44.4146 55.3097,40.0925 52.1652,38.3395 52.1652,43.0243 55.3269,43.0243 56.2614,37.1608 52.2393,35.9681 52.2157))) diff --git a/tests/queries/0_stateless/01305_polygons_union.sql b/tests/queries/0_stateless/01305_polygons_union.sql index 01982c21e6e..23ea0d050c3 100644 --- a/tests/queries/0_stateless/01305_polygons_union.sql +++ b/tests/queries/0_stateless/01305_polygons_union.sql @@ -1,15 +1,18 @@ select polygonsUnionCartesian([[[(0., 0.),(0., 3.),(1., 2.9),(2., 2.6),(2.6, 2.),(2.9, 1),(3., 0.),(0., 0.)]]], [[[(1., 1.),(1., 4.),(4., 4.),(4., 1.),(1., 1.)]]]); -SELECT polygonsUnionCartesian([[[(2., 100.0000991821289), (0., 3.), (1., 2.9), (2., 2.6), (2.6, 2.), (2.9, 1), (3., 0.), (100.0000991821289, 2.)]]], [[[(1., 1.), (1000.0001220703125, nan), (4., 4.), (4., 1.), (1., 1.)]]]); -- { serverError 43 } +SELECT arrayMap(a -> arrayMap(b -> arrayMap(c -> (round(c.1, 6), round(c.2, 6)), b), a), polygonsUnionCartesian([[[(2., 100.0000991821289), (0., 3.), (1., 2.9), (2., 2.6), (2.6, 2.), (2.9, 1), (3., 0.), (100.0000991821289, 2.)]]], [[[(1., 1.), (1000.0001220703125, nan), (4., 4.), (4., 1.), (1., 1.)]]])); -- { serverError 43 } -select polygonsUnionSpherical([[[(4.3613577, 50.8651821), (4.349556, 50.8535879), (4.3602419, 50.8435626), (4.3830299, 50.8428851), (4.3904543, 50.8564867), (4.3613148, 50.8651279)]]], [[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]]); +select arrayMap(a -> arrayMap(b -> arrayMap(c -> (round(c.1, 6), round(c.2, 6)), b), a), polygonsUnionSpherical([[[(4.3613577, 50.8651821), (4.349556, 50.8535879), (4.3602419, 50.8435626), (4.3830299, 50.8428851), (4.3904543, 50.8564867), (4.3613148, 50.8651279)]]], [[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]])); select '-------- MultiPolygon with Polygon'; -select wkt(polygonsUnionSpherical([[(29.453587685533865,59.779570356240356),(29.393139070478895,52.276266797422124),(40.636581470703206,59.38168915000267),(41.21084331372543,59.103467777099866),(29.786055068336193,52.146627480315004),(31.23682182965546,52.16517054781818),(41.69443223416517,58.85424941916091),(42.51048853740727,58.47703162291134),(32.59691566839227,52.22075341251539),(34.289476889931414,52.22075341251539),(43.02430176537451,58.07974369546071),(43.02430176537451,57.25537683364851),(35.468224883503325,52.2022335126388),(37.16078610504247,52.23926559241349),(43.02430176537451,56.26136189644947),(43.02430176537451,55.326904361850836),(38.33953409861437,52.16517054781818),(40.09254393520848,52.16517054781818),(44.4146199116388,55.3097062225408),(44.47506852669377,59.80998197603594),(39.72985224487867,59.931351417569715),(30.23941968124846,53.67744677450975),(30.20919537372098,54.63314259659509),(38.73245009647167,59.94649146557819),(37.2816833351524,59.97675082987618),(30.23941968124846,55.2752875586599),(30.33009260383092,56.19415599955667),(36.28428118674541,59.96162460231375),(34.863738732953635,59.97675082987618),(30.178971066193498,56.97640788219866),(30.178971066193498,57.91957806959033),(33.65476643185424,59.94649146557819),(32.32489690064491,59.94649146557819),(30.481214141468342,58.85424941916091),(30.571887064050795,59.99187015036608),(29.453587685533865,59.779570356240356)]], [[[(33.473420586689336,58.85424941916091),(32.23422397806246,58.492830557036),(32.173775363007486,58.03176922751564),(31.508840597402823,57.499784781503735),(31.750635057622702,56.86092686957355),(31.508840597402823,55.941082594334574),(32.20399967053497,55.515591939372456),(31.84130798020516,54.998862226280465),(31.418167674820367,54.422670886434275),(32.47601843828233,53.83826377018255),(32.08310244042503,53.408048308050866),(33.171177511414484,52.82758702113742),(34.77306581037117,52.91880107773494),(34.77306581037117,53.784726518357985),(34.108131044766516,54.17574726780569),(35.07530888564602,54.59813930694554),(34.25925258240394,54.96417435716029),(35.01486027059106,55.361278263643584),(33.50364489421682,55.37845402950552),(32.7480372060297,55.90721384574556),(35.67979503619571,55.68634475630185),(32.83871012861215,56.311688992608396),(34.591719965206266,56.29492065473883),(35.7100193437232,56.311688992608396),(33.83611227701915,56.695333481003644),(32.95960735872209,56.9434497616887),(36.072711034053015,57.091531913901434),(33.171177511414484,57.33702717078384),(36.193608264162954,57.499784781503735),(33.23162612646945,57.77481561306047),(36.43540272438284,58.04776787540811),(33.62454212432676,58.27099811968307),(36.344729801800376,58.54018474404165),(33.83611227701915,58.68186423448108),(34.74284150284369,59.565911441555244),(33.473420586689336,58.85424941916091)]], [[(34.65216858026123,58.91672306881671),(37.19101041256995,58.68186423448108),(36.01226241899805,58.28688958537609),(37.16078610504247,58.04776787540811),(35.74024365125068,57.79092907387934),(37.009664567405046,57.499784781503735),(35.77046795877817,57.25537683364851),(36.979440259877556,57.07510745541089),(34.22902827487645,56.794777197297435),(36.7074214921302,56.210968525786996),(34.712617195316206,56.10998276812964),(36.55629995449277,55.63519693782703),(35.13575750070099,55.53270067649592),(36.43540272438284,55.34409504165558),(34.83351442542614,55.01619492319591),(35.61934642114075,54.49294870011772),(34.89396304048112,54.12264226523038),(35.37755196092087,53.046178687628185),(37.43280487278982,52.95523300597458),(35.92158949641559,53.80257986695776),(36.91899164482259,53.856094327816805),(36.01226241899805,54.75541714463799),(37.765272255592166,55.189110239786885),(36.828318722240134,55.44708256557195),(38.03729102333953,55.652253637168315),(36.64697287707522,55.941082594334574),(38.21863686850443,56.05939028508024),(36.37495410932787,56.64551287174558),(38.30930979108689,56.992876013526654),(37.16078610504247,57.25537683364851),(38.127963945921984,57.516020773674256),(37.43280487278982,57.710289827306724),(38.33953409861437,57.935626886818994),(37.40258056526235,58.31865112960426),(38.58132855883426,58.744648733419496),(37.31190764267989,59.02578062465136),(34.65216858026123,58.91672306881671)]], [[(38.52087994377928,59.11898412389468),(39.54850639971376,58.713270635642914),(38.369758406141855,58.28688958537609),(38.85334732658162,58.06375936407028),(38.33953409861437,57.710289827306724),(38.73245009647167,57.48354156434209),(38.21863686850443,57.271721400459285),(38.97424455669155,56.87744603722649),(37.463029180317314,56.5623320541159),(38.94402024916407,56.05939028508024),(38.18841256097694,55.856355210835915),(38.490655636251795,55.53270067649592),(37.795496563119656,55.39562234093384),(38.30930979108689,55.154587013355666),(36.7074214921302,54.65063295250911),(37.31190764267989,53.92734063371401),(36.979440259877556,53.58783775557231),(37.855945178174615,52.91880107773497),(39.57873070724124,52.69956490610895),(38.33953409861437,53.281741738901104),(40.00187101262603,53.35396273604752),(39.54850639971376,53.58783775557231),(40.24366547284591,53.58783775557231),(39.97164670509855,53.98069568468355),(40.60635716317572,54.03398248547225),(40.39478701048334,54.44025165268903),(39.54850639971376,54.56310590284329),(39.54850639971376,54.87732350170489),(40.39478701048334,54.87732350170489),(40.39478701048334,55.24083903654295),(39.82052516746112,55.2752875586599),(39.760076552406154,55.75443792473942),(40.57613285564824,55.78844000174894),(40.425011318010824,56.19415599955667),(39.82052516746112,56.07626182891758),(39.79030085993364,56.41214455508424),(40.48545993306579,56.495655446714636),(40.33433839542836,56.95993246553937),(39.79030085993364,56.992876013526654),(39.72985224487867,57.46729112028032),(40.33433839542836,57.46729112028032),(40.24366547284591,58.04776787540811),(39.63917932229622,58.04776787540811),(39.63917932229622,58.382088724871295),(40.33433839542836,58.382088724871295),(40.45523562553831,58.9011152358548),(38.52087994377928,59.11898412389468)]]])) format TSV; +select wkt(arrayMap(a -> arrayMap(b -> arrayMap(c -> (round(c.1, 6), round(c.2, 6)), b), a), +polygonsUnionSpherical([[(29.453587685533865,59.779570356240356),(29.393139070478895,52.276266797422124),(40.636581470703206,59.38168915000267),(41.21084331372543,59.103467777099866),(29.786055068336193,52.146627480315004),(31.23682182965546,52.16517054781818),(41.69443223416517,58.85424941916091),(42.51048853740727,58.47703162291134),(32.59691566839227,52.22075341251539),(34.289476889931414,52.22075341251539),(43.02430176537451,58.07974369546071),(43.02430176537451,57.25537683364851),(35.468224883503325,52.2022335126388),(37.16078610504247,52.23926559241349),(43.02430176537451,56.26136189644947),(43.02430176537451,55.326904361850836),(38.33953409861437,52.16517054781818),(40.09254393520848,52.16517054781818),(44.4146199116388,55.3097062225408),(44.47506852669377,59.80998197603594),(39.72985224487867,59.931351417569715),(30.23941968124846,53.67744677450975),(30.20919537372098,54.63314259659509),(38.73245009647167,59.94649146557819),(37.2816833351524,59.97675082987618),(30.23941968124846,55.2752875586599),(30.33009260383092,56.19415599955667),(36.28428118674541,59.96162460231375),(34.863738732953635,59.97675082987618),(30.178971066193498,56.97640788219866),(30.178971066193498,57.91957806959033),(33.65476643185424,59.94649146557819),(32.32489690064491,59.94649146557819),(30.481214141468342,58.85424941916091),(30.571887064050795,59.99187015036608),(29.453587685533865,59.779570356240356)]], [[[(33.473420586689336,58.85424941916091),(32.23422397806246,58.492830557036),(32.173775363007486,58.03176922751564),(31.508840597402823,57.499784781503735),(31.750635057622702,56.86092686957355),(31.508840597402823,55.941082594334574),(32.20399967053497,55.515591939372456),(31.84130798020516,54.998862226280465),(31.418167674820367,54.422670886434275),(32.47601843828233,53.83826377018255),(32.08310244042503,53.408048308050866),(33.171177511414484,52.82758702113742),(34.77306581037117,52.91880107773494),(34.77306581037117,53.784726518357985),(34.108131044766516,54.17574726780569),(35.07530888564602,54.59813930694554),(34.25925258240394,54.96417435716029),(35.01486027059106,55.361278263643584),(33.50364489421682,55.37845402950552),(32.7480372060297,55.90721384574556),(35.67979503619571,55.68634475630185),(32.83871012861215,56.311688992608396),(34.591719965206266,56.29492065473883),(35.7100193437232,56.311688992608396),(33.83611227701915,56.695333481003644),(32.95960735872209,56.9434497616887),(36.072711034053015,57.091531913901434),(33.171177511414484,57.33702717078384),(36.193608264162954,57.499784781503735),(33.23162612646945,57.77481561306047),(36.43540272438284,58.04776787540811),(33.62454212432676,58.27099811968307),(36.344729801800376,58.54018474404165),(33.83611227701915,58.68186423448108),(34.74284150284369,59.565911441555244),(33.473420586689336,58.85424941916091)]], [[(34.65216858026123,58.91672306881671),(37.19101041256995,58.68186423448108),(36.01226241899805,58.28688958537609),(37.16078610504247,58.04776787540811),(35.74024365125068,57.79092907387934),(37.009664567405046,57.499784781503735),(35.77046795877817,57.25537683364851),(36.979440259877556,57.07510745541089),(34.22902827487645,56.794777197297435),(36.7074214921302,56.210968525786996),(34.712617195316206,56.10998276812964),(36.55629995449277,55.63519693782703),(35.13575750070099,55.53270067649592),(36.43540272438284,55.34409504165558),(34.83351442542614,55.01619492319591),(35.61934642114075,54.49294870011772),(34.89396304048112,54.12264226523038),(35.37755196092087,53.046178687628185),(37.43280487278982,52.95523300597458),(35.92158949641559,53.80257986695776),(36.91899164482259,53.856094327816805),(36.01226241899805,54.75541714463799),(37.765272255592166,55.189110239786885),(36.828318722240134,55.44708256557195),(38.03729102333953,55.652253637168315),(36.64697287707522,55.941082594334574),(38.21863686850443,56.05939028508024),(36.37495410932787,56.64551287174558),(38.30930979108689,56.992876013526654),(37.16078610504247,57.25537683364851),(38.127963945921984,57.516020773674256),(37.43280487278982,57.710289827306724),(38.33953409861437,57.935626886818994),(37.40258056526235,58.31865112960426),(38.58132855883426,58.744648733419496),(37.31190764267989,59.02578062465136),(34.65216858026123,58.91672306881671)]], [[(38.52087994377928,59.11898412389468),(39.54850639971376,58.713270635642914),(38.369758406141855,58.28688958537609),(38.85334732658162,58.06375936407028),(38.33953409861437,57.710289827306724),(38.73245009647167,57.48354156434209),(38.21863686850443,57.271721400459285),(38.97424455669155,56.87744603722649),(37.463029180317314,56.5623320541159),(38.94402024916407,56.05939028508024),(38.18841256097694,55.856355210835915),(38.490655636251795,55.53270067649592),(37.795496563119656,55.39562234093384),(38.30930979108689,55.154587013355666),(36.7074214921302,54.65063295250911),(37.31190764267989,53.92734063371401),(36.979440259877556,53.58783775557231),(37.855945178174615,52.91880107773497),(39.57873070724124,52.69956490610895),(38.33953409861437,53.281741738901104),(40.00187101262603,53.35396273604752),(39.54850639971376,53.58783775557231),(40.24366547284591,53.58783775557231),(39.97164670509855,53.98069568468355),(40.60635716317572,54.03398248547225),(40.39478701048334,54.44025165268903),(39.54850639971376,54.56310590284329),(39.54850639971376,54.87732350170489),(40.39478701048334,54.87732350170489),(40.39478701048334,55.24083903654295),(39.82052516746112,55.2752875586599),(39.760076552406154,55.75443792473942),(40.57613285564824,55.78844000174894),(40.425011318010824,56.19415599955667),(39.82052516746112,56.07626182891758),(39.79030085993364,56.41214455508424),(40.48545993306579,56.495655446714636),(40.33433839542836,56.95993246553937),(39.79030085993364,56.992876013526654),(39.72985224487867,57.46729112028032),(40.33433839542836,57.46729112028032),(40.24366547284591,58.04776787540811),(39.63917932229622,58.04776787540811),(39.63917932229622,58.382088724871295),(40.33433839542836,58.382088724871295),(40.45523562553831,58.9011152358548),(38.52087994377928,59.11898412389468)]]]))) format TSV; select '-------- MultiPolygon with Polygon with Holes'; -select wkt(polygonsUnionSpherical([[[(33.473420586689336,58.85424941916091),(32.23422397806246,58.492830557036),(32.173775363007486,58.03176922751564),(31.508840597402823,57.499784781503735),(31.750635057622702,56.86092686957355),(31.508840597402823,55.941082594334574),(32.20399967053497,55.515591939372456),(31.84130798020516,54.998862226280465),(31.418167674820367,54.422670886434275),(32.47601843828233,53.83826377018255),(32.08310244042503,53.408048308050866),(33.171177511414484,52.82758702113742),(34.77306581037117,52.91880107773494),(34.77306581037117,53.784726518357985),(34.108131044766516,54.17574726780569),(35.07530888564602,54.59813930694554),(34.25925258240394,54.96417435716029),(35.01486027059106,55.361278263643584),(33.50364489421682,55.37845402950552),(32.7480372060297,55.90721384574556),(35.67979503619571,55.68634475630185),(32.83871012861215,56.311688992608396),(34.591719965206266,56.29492065473883),(35.7100193437232,56.311688992608396),(33.83611227701915,56.695333481003644),(32.95960735872209,56.9434497616887),(36.072711034053015,57.091531913901434),(33.171177511414484,57.33702717078384),(36.193608264162954,57.499784781503735),(33.23162612646945,57.77481561306047),(36.43540272438284,58.04776787540811),(33.62454212432676,58.27099811968307),(36.344729801800376,58.54018474404165),(33.83611227701915,58.68186423448108),(34.74284150284369,59.565911441555244),(33.473420586689336,58.85424941916091)]], [[(34.65216858026123,58.91672306881671),(37.19101041256995,58.68186423448108),(36.01226241899805,58.28688958537609),(37.16078610504247,58.04776787540811),(35.74024365125068,57.79092907387934),(37.009664567405046,57.499784781503735),(35.77046795877817,57.25537683364851),(36.979440259877556,57.07510745541089),(34.22902827487645,56.794777197297435),(36.7074214921302,56.210968525786996),(34.712617195316206,56.10998276812964),(36.55629995449277,55.63519693782703),(35.13575750070099,55.53270067649592),(36.43540272438284,55.34409504165558),(34.83351442542614,55.01619492319591),(35.61934642114075,54.49294870011772),(34.89396304048112,54.12264226523038),(35.37755196092087,53.046178687628185),(37.43280487278982,52.95523300597458),(35.92158949641559,53.80257986695776),(36.91899164482259,53.856094327816805),(36.01226241899805,54.75541714463799),(37.765272255592166,55.189110239786885),(36.828318722240134,55.44708256557195),(38.03729102333953,55.652253637168315),(36.64697287707522,55.941082594334574),(38.21863686850443,56.05939028508024),(36.37495410932787,56.64551287174558),(38.30930979108689,56.992876013526654),(37.16078610504247,57.25537683364851),(38.127963945921984,57.516020773674256),(37.43280487278982,57.710289827306724),(38.33953409861437,57.935626886818994),(37.40258056526235,58.31865112960426),(38.58132855883426,58.744648733419496),(37.31190764267989,59.02578062465136),(34.65216858026123,58.91672306881671)]], [[(38.52087994377928,59.11898412389468),(39.54850639971376,58.713270635642914),(38.369758406141855,58.28688958537609),(38.85334732658162,58.06375936407028),(38.33953409861437,57.710289827306724),(38.73245009647167,57.48354156434209),(38.21863686850443,57.271721400459285),(38.97424455669155,56.87744603722649),(37.463029180317314,56.5623320541159),(38.94402024916407,56.05939028508024),(38.18841256097694,55.856355210835915),(38.490655636251795,55.53270067649592),(37.795496563119656,55.39562234093384),(38.30930979108689,55.154587013355666),(36.7074214921302,54.65063295250911),(37.31190764267989,53.92734063371401),(36.979440259877556,53.58783775557231),(37.855945178174615,52.91880107773497),(39.57873070724124,52.69956490610895),(38.33953409861437,53.281741738901104),(40.00187101262603,53.35396273604752),(39.54850639971376,53.58783775557231),(40.24366547284591,53.58783775557231),(39.97164670509855,53.98069568468355),(40.60635716317572,54.03398248547225),(40.39478701048334,54.44025165268903),(39.54850639971376,54.56310590284329),(39.54850639971376,54.87732350170489),(40.39478701048334,54.87732350170489),(40.39478701048334,55.24083903654295),(39.82052516746112,55.2752875586599),(39.760076552406154,55.75443792473942),(40.57613285564824,55.78844000174894),(40.425011318010824,56.19415599955667),(39.82052516746112,56.07626182891758),(39.79030085993364,56.41214455508424),(40.48545993306579,56.495655446714636),(40.33433839542836,56.95993246553937),(39.79030085993364,56.992876013526654),(39.72985224487867,57.46729112028032),(40.33433839542836,57.46729112028032),(40.24366547284591,58.04776787540811),(39.63917932229622,58.04776787540811),(39.63917932229622,58.382088724871295),(40.33433839542836,58.382088724871295),(40.45523562553831,58.9011152358548),(38.52087994377928,59.11898412389468)]]], [[(24.367675781249993,61.45977057029751),(19.577636718749993,58.67693767258692),(19.577636718749993,57.492213666700735),(19.445800781249996,55.87531083569678),(19.445800781249996,54.085173420886775),(17.468261718749996,53.014783245859235),(20.017089843749993,51.563412328675895),(21.203613281249993,50.205033264943324),(26.125488281249993,50.40151532278236),(27.22412109374999,48.980216985374994),(32.80517578124999,49.525208341974405),(35.26611328124999,48.74894534343292),(36.93603515624999,49.66762782262194),(42.56103515625,48.77791275550183),(43.92333984374999,49.8096315635631),(47.17529296875,49.152969656170455),(49.28466796875,50.54136296522162),(48.05419921875,51.17934297928929),(51.39404296875,52.48278022207825),(50.64697265625,53.014783245859235),(52.88818359375,53.93021986394004),(51.65771484374999,54.29088164657006),(52.66845703125,55.825973254619015),(50.25146484375,56.145549500679095),(51.92138671875,57.914847767009206),(49.15283203125,58.17070248348605),(49.59228515625,60.086762746260064),(47.043457031249986,59.88893689676584),(43.57177734375,61.37567331572748),(42.64892578125,60.630101766266705),(36.89208984374999,62.000904713685856),(36.01318359374999,61.143235250840576),(31.398925781249993,62.02152819100766),(30.563964843749996,61.05828537037917),(26.872558593749993,61.71070595883174),(26.652832031249993,61.10078883158897),(24.367675781249993,61.45977057029751)], [(24.455566406249993,59.42272750081452),(21.203613281249993,58.49369382056807),(21.335449218749993,56.89700392127261),(21.599121093749993,55.92458580482949),(25.202636718749993,55.998380955359636),(28.850097656249993,57.06463027327854),(27.09228515625,57.844750992890994),(28.806152343749996,59.17592824927138),(26.257324218749993,59.17592824927138),(24.455566406249993,59.42272750081452)], [(35.13427734375,59.84481485969107),(31.970214843749993,58.97266715450152),(33.20068359374999,56.776808316568406),(36.67236328125,56.41390137600675),(39.08935546874999,57.25528054528888),(42.69287109374999,58.03137242177638),(40.89111328124999,59.26588062825809),(37.28759765625,58.722598828043374),(37.11181640624999,59.66774058164964),(35.13427734375,59.84481485969107)], [(29.157714843749993,55.75184939173528),(22.565917968749993,55.128649068488784),(22.565917968749993,53.54030739150019),(22.038574218749996,51.48138289610097),(26.257324218749993,51.42661449707484),(30.124511718749993,50.54136296522162),(32.18994140624999,51.17934297928929),(30.124511718749993,53.173119202640635),(35.09033203124999,53.173119202640635),(33.11279296875,54.085173420886775),(29.597167968749993,55.50374985927513),(29.157714843749993,55.75184939173528)], [(42.82470703125,56.58369172128337),(36.584472656249986,55.329144408405085),(37.99072265625,53.592504809039355),(34.95849609374999,51.48138289610097),(36.54052734374999,50.40151532278236),(39.66064453124999,50.289339253291786),(39.79248046875,52.13348804077148),(41.77001953125,50.68079714532166),(44.49462890624999,51.97134580885171),(47.30712890624999,52.509534770327264),(44.05517578125,53.54030739150019),(46.60400390625,53.696706475303245),(47.61474609375,55.40406982700608),(45.37353515625,55.40406982700608),(42.82470703125,56.58369172128337)]])) format TSV; +select wkt(arrayMap(a -> arrayMap(b -> arrayMap(c -> (round(c.1, 6), round(c.2, 6)), b), a), +polygonsUnionSpherical([[[(33.473420586689336,58.85424941916091),(32.23422397806246,58.492830557036),(32.173775363007486,58.03176922751564),(31.508840597402823,57.499784781503735),(31.750635057622702,56.86092686957355),(31.508840597402823,55.941082594334574),(32.20399967053497,55.515591939372456),(31.84130798020516,54.998862226280465),(31.418167674820367,54.422670886434275),(32.47601843828233,53.83826377018255),(32.08310244042503,53.408048308050866),(33.171177511414484,52.82758702113742),(34.77306581037117,52.91880107773494),(34.77306581037117,53.784726518357985),(34.108131044766516,54.17574726780569),(35.07530888564602,54.59813930694554),(34.25925258240394,54.96417435716029),(35.01486027059106,55.361278263643584),(33.50364489421682,55.37845402950552),(32.7480372060297,55.90721384574556),(35.67979503619571,55.68634475630185),(32.83871012861215,56.311688992608396),(34.591719965206266,56.29492065473883),(35.7100193437232,56.311688992608396),(33.83611227701915,56.695333481003644),(32.95960735872209,56.9434497616887),(36.072711034053015,57.091531913901434),(33.171177511414484,57.33702717078384),(36.193608264162954,57.499784781503735),(33.23162612646945,57.77481561306047),(36.43540272438284,58.04776787540811),(33.62454212432676,58.27099811968307),(36.344729801800376,58.54018474404165),(33.83611227701915,58.68186423448108),(34.74284150284369,59.565911441555244),(33.473420586689336,58.85424941916091)]], [[(34.65216858026123,58.91672306881671),(37.19101041256995,58.68186423448108),(36.01226241899805,58.28688958537609),(37.16078610504247,58.04776787540811),(35.74024365125068,57.79092907387934),(37.009664567405046,57.499784781503735),(35.77046795877817,57.25537683364851),(36.979440259877556,57.07510745541089),(34.22902827487645,56.794777197297435),(36.7074214921302,56.210968525786996),(34.712617195316206,56.10998276812964),(36.55629995449277,55.63519693782703),(35.13575750070099,55.53270067649592),(36.43540272438284,55.34409504165558),(34.83351442542614,55.01619492319591),(35.61934642114075,54.49294870011772),(34.89396304048112,54.12264226523038),(35.37755196092087,53.046178687628185),(37.43280487278982,52.95523300597458),(35.92158949641559,53.80257986695776),(36.91899164482259,53.856094327816805),(36.01226241899805,54.75541714463799),(37.765272255592166,55.189110239786885),(36.828318722240134,55.44708256557195),(38.03729102333953,55.652253637168315),(36.64697287707522,55.941082594334574),(38.21863686850443,56.05939028508024),(36.37495410932787,56.64551287174558),(38.30930979108689,56.992876013526654),(37.16078610504247,57.25537683364851),(38.127963945921984,57.516020773674256),(37.43280487278982,57.710289827306724),(38.33953409861437,57.935626886818994),(37.40258056526235,58.31865112960426),(38.58132855883426,58.744648733419496),(37.31190764267989,59.02578062465136),(34.65216858026123,58.91672306881671)]], [[(38.52087994377928,59.11898412389468),(39.54850639971376,58.713270635642914),(38.369758406141855,58.28688958537609),(38.85334732658162,58.06375936407028),(38.33953409861437,57.710289827306724),(38.73245009647167,57.48354156434209),(38.21863686850443,57.271721400459285),(38.97424455669155,56.87744603722649),(37.463029180317314,56.5623320541159),(38.94402024916407,56.05939028508024),(38.18841256097694,55.856355210835915),(38.490655636251795,55.53270067649592),(37.795496563119656,55.39562234093384),(38.30930979108689,55.154587013355666),(36.7074214921302,54.65063295250911),(37.31190764267989,53.92734063371401),(36.979440259877556,53.58783775557231),(37.855945178174615,52.91880107773497),(39.57873070724124,52.69956490610895),(38.33953409861437,53.281741738901104),(40.00187101262603,53.35396273604752),(39.54850639971376,53.58783775557231),(40.24366547284591,53.58783775557231),(39.97164670509855,53.98069568468355),(40.60635716317572,54.03398248547225),(40.39478701048334,54.44025165268903),(39.54850639971376,54.56310590284329),(39.54850639971376,54.87732350170489),(40.39478701048334,54.87732350170489),(40.39478701048334,55.24083903654295),(39.82052516746112,55.2752875586599),(39.760076552406154,55.75443792473942),(40.57613285564824,55.78844000174894),(40.425011318010824,56.19415599955667),(39.82052516746112,56.07626182891758),(39.79030085993364,56.41214455508424),(40.48545993306579,56.495655446714636),(40.33433839542836,56.95993246553937),(39.79030085993364,56.992876013526654),(39.72985224487867,57.46729112028032),(40.33433839542836,57.46729112028032),(40.24366547284591,58.04776787540811),(39.63917932229622,58.04776787540811),(39.63917932229622,58.382088724871295),(40.33433839542836,58.382088724871295),(40.45523562553831,58.9011152358548),(38.52087994377928,59.11898412389468)]]], [[(24.367675781249993,61.45977057029751),(19.577636718749993,58.67693767258692),(19.577636718749993,57.492213666700735),(19.445800781249996,55.87531083569678),(19.445800781249996,54.085173420886775),(17.468261718749996,53.014783245859235),(20.017089843749993,51.563412328675895),(21.203613281249993,50.205033264943324),(26.125488281249993,50.40151532278236),(27.22412109374999,48.980216985374994),(32.80517578124999,49.525208341974405),(35.26611328124999,48.74894534343292),(36.93603515624999,49.66762782262194),(42.56103515625,48.77791275550183),(43.92333984374999,49.8096315635631),(47.17529296875,49.152969656170455),(49.28466796875,50.54136296522162),(48.05419921875,51.17934297928929),(51.39404296875,52.48278022207825),(50.64697265625,53.014783245859235),(52.88818359375,53.93021986394004),(51.65771484374999,54.29088164657006),(52.66845703125,55.825973254619015),(50.25146484375,56.145549500679095),(51.92138671875,57.914847767009206),(49.15283203125,58.17070248348605),(49.59228515625,60.086762746260064),(47.043457031249986,59.88893689676584),(43.57177734375,61.37567331572748),(42.64892578125,60.630101766266705),(36.89208984374999,62.000904713685856),(36.01318359374999,61.143235250840576),(31.398925781249993,62.02152819100766),(30.563964843749996,61.05828537037917),(26.872558593749993,61.71070595883174),(26.652832031249993,61.10078883158897),(24.367675781249993,61.45977057029751)], [(24.455566406249993,59.42272750081452),(21.203613281249993,58.49369382056807),(21.335449218749993,56.89700392127261),(21.599121093749993,55.92458580482949),(25.202636718749993,55.998380955359636),(28.850097656249993,57.06463027327854),(27.09228515625,57.844750992890994),(28.806152343749996,59.17592824927138),(26.257324218749993,59.17592824927138),(24.455566406249993,59.42272750081452)], [(35.13427734375,59.84481485969107),(31.970214843749993,58.97266715450152),(33.20068359374999,56.776808316568406),(36.67236328125,56.41390137600675),(39.08935546874999,57.25528054528888),(42.69287109374999,58.03137242177638),(40.89111328124999,59.26588062825809),(37.28759765625,58.722598828043374),(37.11181640624999,59.66774058164964),(35.13427734375,59.84481485969107)], [(29.157714843749993,55.75184939173528),(22.565917968749993,55.128649068488784),(22.565917968749993,53.54030739150019),(22.038574218749996,51.48138289610097),(26.257324218749993,51.42661449707484),(30.124511718749993,50.54136296522162),(32.18994140624999,51.17934297928929),(30.124511718749993,53.173119202640635),(35.09033203124999,53.173119202640635),(33.11279296875,54.085173420886775),(29.597167968749993,55.50374985927513),(29.157714843749993,55.75184939173528)], [(42.82470703125,56.58369172128337),(36.584472656249986,55.329144408405085),(37.99072265625,53.592504809039355),(34.95849609374999,51.48138289610097),(36.54052734374999,50.40151532278236),(39.66064453124999,50.289339253291786),(39.79248046875,52.13348804077148),(41.77001953125,50.68079714532166),(44.49462890624999,51.97134580885171),(47.30712890624999,52.509534770327264),(44.05517578125,53.54030739150019),(46.60400390625,53.696706475303245),(47.61474609375,55.40406982700608),(45.37353515625,55.40406982700608),(42.82470703125,56.58369172128337)]]))) format TSV; select '-------- Polygon with Polygon with Holes'; -select wkt(polygonsUnionSpherical([[(29.453587685533865,59.779570356240356),(29.393139070478895,52.276266797422124),(40.636581470703206,59.38168915000267),(41.21084331372543,59.103467777099866),(29.786055068336193,52.146627480315004),(31.23682182965546,52.16517054781818),(41.69443223416517,58.85424941916091),(42.51048853740727,58.47703162291134),(32.59691566839227,52.22075341251539),(34.289476889931414,52.22075341251539),(43.02430176537451,58.07974369546071),(43.02430176537451,57.25537683364851),(35.468224883503325,52.2022335126388),(37.16078610504247,52.23926559241349),(43.02430176537451,56.26136189644947),(43.02430176537451,55.326904361850836),(38.33953409861437,52.16517054781818),(40.09254393520848,52.16517054781818),(44.4146199116388,55.3097062225408),(44.47506852669377,59.80998197603594),(39.72985224487867,59.931351417569715),(30.23941968124846,53.67744677450975),(30.20919537372098,54.63314259659509),(38.73245009647167,59.94649146557819),(37.2816833351524,59.97675082987618),(30.23941968124846,55.2752875586599),(30.33009260383092,56.19415599955667),(36.28428118674541,59.96162460231375),(34.863738732953635,59.97675082987618),(30.178971066193498,56.97640788219866),(30.178971066193498,57.91957806959033),(33.65476643185424,59.94649146557819),(32.32489690064491,59.94649146557819),(30.481214141468342,58.85424941916091),(30.571887064050795,59.99187015036608),(29.453587685533865,59.779570356240356)]], [[(24.367675781249993,61.45977057029751),(19.577636718749993,58.67693767258692),(19.577636718749993,57.492213666700735),(19.445800781249996,55.87531083569678),(19.445800781249996,54.085173420886775),(17.468261718749996,53.014783245859235),(20.017089843749993,51.563412328675895),(21.203613281249993,50.205033264943324),(26.125488281249993,50.40151532278236),(27.22412109374999,48.980216985374994),(32.80517578124999,49.525208341974405),(35.26611328124999,48.74894534343292),(36.93603515624999,49.66762782262194),(42.56103515625,48.77791275550183),(43.92333984374999,49.8096315635631),(47.17529296875,49.152969656170455),(49.28466796875,50.54136296522162),(48.05419921875,51.17934297928929),(51.39404296875,52.48278022207825),(50.64697265625,53.014783245859235),(52.88818359375,53.93021986394004),(51.65771484374999,54.29088164657006),(52.66845703125,55.825973254619015),(50.25146484375,56.145549500679095),(51.92138671875,57.914847767009206),(49.15283203125,58.17070248348605),(49.59228515625,60.086762746260064),(47.043457031249986,59.88893689676584),(43.57177734375,61.37567331572748),(42.64892578125,60.630101766266705),(36.89208984374999,62.000904713685856),(36.01318359374999,61.143235250840576),(31.398925781249993,62.02152819100766),(30.563964843749996,61.05828537037917),(26.872558593749993,61.71070595883174),(26.652832031249993,61.10078883158897),(24.367675781249993,61.45977057029751)], [(24.455566406249993,59.42272750081452),(21.203613281249993,58.49369382056807),(21.335449218749993,56.89700392127261),(21.599121093749993,55.92458580482949),(25.202636718749993,55.998380955359636),(28.850097656249993,57.06463027327854),(27.09228515625,57.844750992890994),(28.806152343749996,59.17592824927138),(26.257324218749993,59.17592824927138),(24.455566406249993,59.42272750081452)], [(35.13427734375,59.84481485969107),(31.970214843749993,58.97266715450152),(33.20068359374999,56.776808316568406),(36.67236328125,56.41390137600675),(39.08935546874999,57.25528054528888),(42.69287109374999,58.03137242177638),(40.89111328124999,59.26588062825809),(37.28759765625,58.722598828043374),(37.11181640624999,59.66774058164964),(35.13427734375,59.84481485969107)], [(29.157714843749993,55.75184939173528),(22.565917968749993,55.128649068488784),(22.565917968749993,53.54030739150019),(22.038574218749996,51.48138289610097),(26.257324218749993,51.42661449707484),(30.124511718749993,50.54136296522162),(32.18994140624999,51.17934297928929),(30.124511718749993,53.173119202640635),(35.09033203124999,53.173119202640635),(33.11279296875,54.085173420886775),(29.597167968749993,55.50374985927513),(29.157714843749993,55.75184939173528)], [(42.82470703125,56.58369172128337),(36.584472656249986,55.329144408405085),(37.99072265625,53.592504809039355),(34.95849609374999,51.48138289610097),(36.54052734374999,50.40151532278236),(39.66064453124999,50.289339253291786),(39.79248046875,52.13348804077148),(41.77001953125,50.68079714532166),(44.49462890624999,51.97134580885171),(47.30712890624999,52.509534770327264),(44.05517578125,53.54030739150019),(46.60400390625,53.696706475303245),(47.61474609375,55.40406982700608),(45.37353515625,55.40406982700608),(42.82470703125,56.58369172128337)]])) format TSV; +select wkt(arrayMap(a -> arrayMap(b -> arrayMap(c -> (round(c.1, 6), round(c.2, 6)), b), a), +polygonsUnionSpherical([[(29.453587685533865,59.779570356240356),(29.393139070478895,52.276266797422124),(40.636581470703206,59.38168915000267),(41.21084331372543,59.103467777099866),(29.786055068336193,52.146627480315004),(31.23682182965546,52.16517054781818),(41.69443223416517,58.85424941916091),(42.51048853740727,58.47703162291134),(32.59691566839227,52.22075341251539),(34.289476889931414,52.22075341251539),(43.02430176537451,58.07974369546071),(43.02430176537451,57.25537683364851),(35.468224883503325,52.2022335126388),(37.16078610504247,52.23926559241349),(43.02430176537451,56.26136189644947),(43.02430176537451,55.326904361850836),(38.33953409861437,52.16517054781818),(40.09254393520848,52.16517054781818),(44.4146199116388,55.3097062225408),(44.47506852669377,59.80998197603594),(39.72985224487867,59.931351417569715),(30.23941968124846,53.67744677450975),(30.20919537372098,54.63314259659509),(38.73245009647167,59.94649146557819),(37.2816833351524,59.97675082987618),(30.23941968124846,55.2752875586599),(30.33009260383092,56.19415599955667),(36.28428118674541,59.96162460231375),(34.863738732953635,59.97675082987618),(30.178971066193498,56.97640788219866),(30.178971066193498,57.91957806959033),(33.65476643185424,59.94649146557819),(32.32489690064491,59.94649146557819),(30.481214141468342,58.85424941916091),(30.571887064050795,59.99187015036608),(29.453587685533865,59.779570356240356)]], [[(24.367675781249993,61.45977057029751),(19.577636718749993,58.67693767258692),(19.577636718749993,57.492213666700735),(19.445800781249996,55.87531083569678),(19.445800781249996,54.085173420886775),(17.468261718749996,53.014783245859235),(20.017089843749993,51.563412328675895),(21.203613281249993,50.205033264943324),(26.125488281249993,50.40151532278236),(27.22412109374999,48.980216985374994),(32.80517578124999,49.525208341974405),(35.26611328124999,48.74894534343292),(36.93603515624999,49.66762782262194),(42.56103515625,48.77791275550183),(43.92333984374999,49.8096315635631),(47.17529296875,49.152969656170455),(49.28466796875,50.54136296522162),(48.05419921875,51.17934297928929),(51.39404296875,52.48278022207825),(50.64697265625,53.014783245859235),(52.88818359375,53.93021986394004),(51.65771484374999,54.29088164657006),(52.66845703125,55.825973254619015),(50.25146484375,56.145549500679095),(51.92138671875,57.914847767009206),(49.15283203125,58.17070248348605),(49.59228515625,60.086762746260064),(47.043457031249986,59.88893689676584),(43.57177734375,61.37567331572748),(42.64892578125,60.630101766266705),(36.89208984374999,62.000904713685856),(36.01318359374999,61.143235250840576),(31.398925781249993,62.02152819100766),(30.563964843749996,61.05828537037917),(26.872558593749993,61.71070595883174),(26.652832031249993,61.10078883158897),(24.367675781249993,61.45977057029751)], [(24.455566406249993,59.42272750081452),(21.203613281249993,58.49369382056807),(21.335449218749993,56.89700392127261),(21.599121093749993,55.92458580482949),(25.202636718749993,55.998380955359636),(28.850097656249993,57.06463027327854),(27.09228515625,57.844750992890994),(28.806152343749996,59.17592824927138),(26.257324218749993,59.17592824927138),(24.455566406249993,59.42272750081452)], [(35.13427734375,59.84481485969107),(31.970214843749993,58.97266715450152),(33.20068359374999,56.776808316568406),(36.67236328125,56.41390137600675),(39.08935546874999,57.25528054528888),(42.69287109374999,58.03137242177638),(40.89111328124999,59.26588062825809),(37.28759765625,58.722598828043374),(37.11181640624999,59.66774058164964),(35.13427734375,59.84481485969107)], [(29.157714843749993,55.75184939173528),(22.565917968749993,55.128649068488784),(22.565917968749993,53.54030739150019),(22.038574218749996,51.48138289610097),(26.257324218749993,51.42661449707484),(30.124511718749993,50.54136296522162),(32.18994140624999,51.17934297928929),(30.124511718749993,53.173119202640635),(35.09033203124999,53.173119202640635),(33.11279296875,54.085173420886775),(29.597167968749993,55.50374985927513),(29.157714843749993,55.75184939173528)], [(42.82470703125,56.58369172128337),(36.584472656249986,55.329144408405085),(37.99072265625,53.592504809039355),(34.95849609374999,51.48138289610097),(36.54052734374999,50.40151532278236),(39.66064453124999,50.289339253291786),(39.79248046875,52.13348804077148),(41.77001953125,50.68079714532166),(44.49462890624999,51.97134580885171),(47.30712890624999,52.509534770327264),(44.05517578125,53.54030739150019),(46.60400390625,53.696706475303245),(47.61474609375,55.40406982700608),(45.37353515625,55.40406982700608),(42.82470703125,56.58369172128337)]]))) format TSV; diff --git a/tests/queries/0_stateless/01306_polygons_intersection.reference b/tests/queries/0_stateless/01306_polygons_intersection.reference index 43ee975913e..99b26f7acc7 100644 --- a/tests/queries/0_stateless/01306_polygons_intersection.reference +++ b/tests/queries/0_stateless/01306_polygons_intersection.reference @@ -1,9 +1,9 @@ [[[(1,2.9),(2,2.6),(2.6,2),(2.9,1),(1,1),(1,2.9)]]] [] [] -[[[(4.3666052904432435,50.84337386140151),(4.3602419,50.8435626),(4.349556,50.8535879),(4.3526804582393535,50.856658100365976),(4.367945,50.852455),(4.3666052904432435,50.84337386140151)]]] +[[[(4.366605,50.843374),(4.360242,50.843563),(4.349556,50.853588),(4.35268,50.856658),(4.367945,50.852455),(4.366605,50.843374)]]] -------- MultiPolygon with Polygon -MULTIPOLYGON(((35.5408 58.9593,36.9725 59.0149,36.4989 58.7512,35.3712 58.8556,35.5408 58.9593)),((34.4816 56.8232,36.1999 57.0022,35.4083 56.5254,34.3867 56.7596,34.4816 56.8232)),((35.9179 57.7512,36.0848 57.855,37.1608 58.0478,36.5949 58.1673,37.8553 58.9075,38.5813 58.7446,37.4026 58.3187,38.0535 58.0542,36.4587 57.1544,35.7705 57.2554,37.0097 57.4998,35.9179 57.7512)),((36.8709 53.2765,37.4328 52.9552,36.5022 53.0008,36.8709 53.2765)),((36.1528 53.6763,35.3645 53.076,34.9611 53.9765,36.0472 54.7217,36.6985 54.0791,36.3552 53.8269,35.9216 53.8026,36.1528 53.6763)),((37.0035 54.2999,36.7074 54.6506,38.1601 55.1091,37.0035 54.2999)),((38.1688 56.0758,38.2186 56.0594,38.1319 56.0534,38.1688 56.0758)),((37.6238 55.7402,38.0373 55.6523,37.2824 55.5258,37.6238 55.7402)),((37.06 55.3843,37.7653 55.1891,36.151 54.791,37.06 55.3843)),((38.2312 56.9795,36.5334 56.6753,37.4489 57.1909,38.2699 57.0021,38.2312 56.9795)),((37.2281 56.3799,36.193 55.7319,35.3188 55.9582,35.6571 56.1619,36.7074 56.211,36.0233 56.3789,36.4446 56.6242,37.2281 56.3799)),((34.9952 58.6226,36.1498 58.553,36.0877 58.5174,34.6028 58.3749,34.9952 58.6226)),((34.3593 58.2189,35.4314 58.1349,35.1134 57.9454,33.7581 57.8255,34.3593 58.2189)),((33.6325 57.7419,34.6332 57.6538,34.2274 57.4023,33.1712 57.337,34.0208 57.2724,33.5602 56.9781,32.9596 56.9434,33.3418 56.8364,31.7782 55.7778,31.5088 55.9411,31.6069 56.3194,33.6325 57.7419)),((36.403 58.0507,36.4354 58.0478,36.3932 58.0447,36.403 58.0507)),((35.613 57.5595,36.1936 57.4998,35.4682 57.4674,35.613 57.5595)),((35.0338 57.1875,36.0727 57.0915,34.8098 57.0409,35.0338 57.1875)),((34.1885 56.6259,35.2273 56.414,35.0485 56.303,34.5917 56.2949,33.7222 56.3063,34.1885 56.6259)),((33.5244 56.1686,34.4996 55.9565,34.2598 55.8023,33.1204 55.8832,33.5244 56.1686)),((32.9547 55.7645,33.5036 55.3785,33.6125 55.3778,31.8748 54.1736,31.4182 54.4227,31.7439 54.8677,32.9547 55.7645)),((34.7279 53.8116,34.7731 53.7847,34.7731 52.9188,33.4048 52.8423,34.7279 53.8116)),((34.7231 54.7576,32.5275 53.1741,32.0831 53.408,32.476 53.8383,32.2523 53.964,34.3709 55.3709,35.0149 55.3613,34.2593 54.9642,34.7231 54.7576)),((34.9706 54.9262,34.8335 55.0162,35.2275 55.0993,34.9706 54.9262)),((35.7505 55.4454,35.1358 55.5327,35.9817 55.5958,35.7505 55.4454)),((35.0954 55.822,35.6798 55.6863,34.9721 55.7463,35.0954 55.822)),((34.7331 56.1049,34.7126 56.11,34.744 56.1118,34.7331 56.1049)),((40.2143 54.467,38.5511 53.2922,38.3395 53.2817,38.4609 53.226,38.0214 52.8989,37.8559 52.9188,37.135 53.4711,39.8151 55.3187,39.8205 55.2753,40.3948 55.2408,40.3948 54.8773,39.5485 54.8773,39.5485 54.5631,40.2143 54.467)),((40.5716 55.8007,40.5761 55.7884,40.5504 55.7875,40.5716 55.8007)),((40.4543 56.5923,40.2529 56.4682,39.7903 56.4121,39.8102 56.1914,38.2609 55.1775,37.7955 55.3956,38.4907 55.5327,38.1884 55.8564,38.944 56.0594,38.4339 56.2361,39.7863 57.025,39.7903 56.9929,40.3343 56.9599,40.4543 56.5923)),((40.1389 58.048,38.4915 57.1308,38.2186 57.2717,38.7325 57.4835,38.3737 57.6908,39.6392 58.3427,39.6392 58.0478,40.1389 58.048)),((37.5054 56.5484,37.463 56.5623,37.565 56.5843,37.5054 56.5484)),((38.0744 57.5312,38.128 57.516,37.9669 57.4734,38.0744 57.5312)),((40.4136 58.7241,40.3343 58.3821,39.7184 58.3823,40.4136 58.7241)),((39.8163 58.9766,39.4085 58.7696,38.5209 59.119,39.8163 58.9766)),((38.432 58.2584,38.3698 58.2869,38.7465 58.4255,38.432 58.2584)),((32.2175 58.3664,32.5691 58.5924,33.4734 58.8542,34.7428 59.5659,33.8361 58.6819,34.0496 58.6717,31.6514 57.1258,31.5088 57.4998,32.1738 58.0318,32.2175 58.3664)),((39.9942 53.358,40.0019 53.354,39.9877 53.3534,39.9942 53.358)),((39.2704 52.8471,39.5787 52.6996,39.1456 52.7573,39.2704 52.8471))) +MULTIPOLYGON(((35.5408 58.9593,36.9725 59.0149,36.4989 58.7512,35.3712 58.8556,35.5408 58.9593)),((34.4816 56.8232,36.1999 57.0022,35.4083 56.5254,34.3867 56.7596,34.4816 56.8232)),((35.9179 57.7512,36.0848 57.855,37.1608 58.0478,36.5949 58.1673,37.8553 58.9075,38.5813 58.7446,37.4026 58.3187,38.0535 58.0542,36.4587 57.1544,35.7705 57.2554,37.0097 57.4998,35.9179 57.7512)),((36.8709 53.2765,37.4328 52.9552,36.5022 53.0008,36.8709 53.2765)),((36.1528 53.6763,35.3645 53.076,34.9611 53.9765,36.0472 54.7217,36.6985 54.0791,36.3552 53.8269,35.9216 53.8026,36.1528 53.6763)),((37.0035 54.2999,36.7074 54.6506,38.1601 55.1091,37.0035 54.2999)),((38.1688 56.0758,38.2186 56.0594,38.1319 56.0534,38.1688 56.0758)),((37.6238 55.7402,38.0373 55.6523,37.2824 55.5258,37.6238 55.7402)),((37.06 55.3843,37.7653 55.1891,36.151 54.791,37.06 55.3843)),((38.2312 56.9795,36.5334 56.6753,37.4489 57.1909,38.2699 57.0021,38.2312 56.9795)),((37.2281 56.3799,36.193 55.7319,35.3188 55.9582,35.6571 56.1619,36.7074 56.211,36.0233 56.3789,36.4446 56.6242,37.2281 56.3799)),((34.9952 58.6226,36.1498 58.553,36.0877 58.5174,34.6028 58.3749,34.9952 58.6226)),((34.3593 58.2189,35.4314 58.1349,35.1134 57.9454,33.7581 57.8255,34.3593 58.2189)),((33.6325 57.7419,34.6332 57.6538,34.2274 57.4023,33.1712 57.337,34.0208 57.2724,33.5602 56.9781,32.9596 56.9434,33.3418 56.8364,31.7782 55.7778,31.5088 55.9411,31.6069 56.3194,33.6325 57.7419)),((36.403 58.0507,36.4354 58.0478,36.3932 58.0447,36.403 58.0507)),((35.613 57.5595,36.1936 57.4998,35.4682 57.4674,35.613 57.5595)),((35.0338 57.1875,36.0727 57.0915,34.8098 57.0409,35.0338 57.1875)),((34.1885 56.6259,35.2273 56.414,35.0485 56.303,34.5917 56.2949,33.7222 56.3063,34.1885 56.6259)),((33.5244 56.1686,34.4996 55.9565,34.2598 55.8023,33.1204 55.8832,33.5244 56.1686)),((32.9547 55.7645,33.5036 55.3785,33.6125 55.3778,31.8748 54.1736,31.4182 54.4227,31.7439 54.8677,32.9547 55.7645)),((34.7279 53.8116,34.7731 53.7847,34.7731 52.9188,33.4048 52.8423,34.7279 53.8116)),((34.7231 54.7576,32.5275 53.1741,32.0831 53.408,32.476 53.8383,32.2523 53.964,34.3709 55.3709,35.0149 55.3613,34.2593 54.9642,34.7231 54.7576)),((34.9706 54.9262,34.8335 55.0162,35.2275 55.0993,34.9706 54.9262)),((35.7505 55.4454,35.1358 55.5327,35.9817 55.5958,35.7505 55.4454)),((35.0954 55.822,35.6798 55.6863,34.9721 55.7463,35.0954 55.822)),((34.7331 56.1049,34.7126 56.11,34.744 56.1118,34.7331 56.1049)),((40.2143 54.467,38.5511 53.2922,38.3395 53.2817,38.4609 53.226,38.0214 52.8989,37.8559 52.9188,37.135 53.4711,39.8151 55.3187,39.8205 55.2753,40.3948 55.2408,40.3948 54.8773,39.5485 54.8773,39.5485 54.5631,40.2143 54.467)),((40.5716 55.8007,40.5761 55.7884,40.5504 55.7875,40.5716 55.8007)),((40.4543 56.5923,40.2529 56.4682,39.7903 56.4121,39.8102 56.1914,38.2609 55.1775,37.7955 55.3956,38.4907 55.5327,38.1884 55.8564,38.944 56.0594,38.4339 56.2361,39.7863 57.025,39.7903 56.9929,40.3343 56.9599,40.4543 56.5923)),((40.1389 58.048,38.4915 57.1308,38.2186 57.2717,38.7325 57.4835,38.3737 57.6908,39.6392 58.3427,39.6392 58.0478,40.1389 58.048)),((37.5054 56.5484,37.463 56.5623,37.565 56.5843,37.5054 56.5484)),((38.0744 57.5312,38.128 57.516,37.9669 57.4734,38.0744 57.5312)),((40.4136 58.7241,40.3343 58.3821,39.7184 58.3823,40.4136 58.7241)),((39.8163 58.9766,39.4085 58.7696,38.5209 59.119,39.8163 58.9766)),((38.432 58.2584,38.3698 58.2869,38.7465 58.4255,38.432 58.2584)),((32.2175 58.3664,32.5691 58.5924,33.4734 58.8542,34.7428 59.5659,33.8361 58.6819,34.0496 58.6717,31.6514 57.1258,31.5088 57.4998,32.1738 58.0318,32.2175 58.3664)),((39.9942 53.358,40.0019 53.354,39.9877 53.3534,39.9942 53.358)),((39.2704 52.8471,39.5787 52.6996,39.1456 52.7572,39.2704 52.8471))) -------- MultiPolygon with Polygon with Holes MULTIPOLYGON(((33.1079 56.9523,32.9596 56.9434,33.1392 56.8934,33.2007 56.7768,33.7182 56.7292,33.8361 56.6953,35.71 56.3117,34.5917 56.2949,32.8387 56.3117,35.6798 55.6863,32.748 55.9072,33.5036 55.3785,35.0149 55.3613,34.2593 54.9642,35.0753 54.5981,34.1081 54.1757,34.7731 53.7847,34.7731 53.3243,33.1128 54.0852,31.627 54.7093,31.8413 54.9989,32.204 55.5156,31.5088 55.9411,31.7506 56.8609,31.5088 57.4998,32.1738 58.0318,32.2342 58.4928,32.25 58.4976,33.1079 56.9523)),((35.1489 56.5859,36.6724 56.4139,36.8799 56.4895,38.2186 56.0594,36.647 55.9411,38.0262 55.6546,37.9482 55.6376,36.8283 55.4471,36.9508 55.414,36.5845 55.3291,36.8822 54.975,36.0123 54.7554,36.919 53.8561,35.9216 53.8026,37.2165 53.0798,37.0604 52.9744,35.3776 53.0462,34.894 54.1226,35.6193 54.4929,34.8335 55.0162,36.4354 55.3441,35.1358 55.5327,36.5563 55.6352,34.7126 56.11,36.7074 56.211,35.1489 56.5859)),((37.2327 59.0233,37.3119 59.0258,38.0944 58.8545,37.2876 58.7226,37.2327 59.0233)),((37.4471 53.2343,36.9794 53.5878,37.3119 53.9273,36.7074 54.6506,37.0572 54.7635,37.9907 53.5925,37.4471 53.2343)),((34.7731 53.1793,34.7731 52.9188,33.1712 52.8276,32.4808 53.1989,34.7731 53.1793)),((40.4412 56.1511,38.3184 55.7179,38.1884 55.8564,38.944 56.0594,37.463 56.5623,38.9742 56.8774,38.5798 57.0849,39.0894 57.2553,39.7379 57.4051,39.7903 56.9929,40.3343 56.9599,40.4855 56.4957,39.7903 56.4121,39.8205 56.0763,40.425 56.1942,40.4412 56.1511)),((38.3092 56.9929,38.3093 56.9929,38.309 56.9928,38.3092 56.9929)),((40.3237 57.5365,40.3343 57.4673,40.0149 57.4677,40.3237 57.5365)),((39.2792 59.0373,38.8838 58.9777,38.5209 59.119,39.2792 59.0373))) -------- Polygon with Polygon with Holes diff --git a/tests/queries/0_stateless/01306_polygons_intersection.sql b/tests/queries/0_stateless/01306_polygons_intersection.sql index 144408ca0ae..5bfba6124cd 100644 --- a/tests/queries/0_stateless/01306_polygons_intersection.sql +++ b/tests/queries/0_stateless/01306_polygons_intersection.sql @@ -1,14 +1,17 @@ select polygonsIntersectionCartesian([[[(0., 0.),(0., 3.),(1., 2.9),(2., 2.6),(2.6, 2.),(2.9, 1.),(3., 0.),(0., 0.)]]], [[[(1., 1.),(1., 4.),(4., 4.),(4., 1.),(1., 1.)]]]); select polygonsIntersectionCartesian([[[(0., 0.),(0., 3.),(1., 2.9),(2., 2.6),(2.6, 2.),(2.9, 1.),(3., 0.),(0., 0.)]]], [[[(3., 3.),(3., 4.),(4., 4.),(4., 3.),(3., 3.)]]]); -select polygonsIntersectionSpherical([[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]], [[[(25.0010, 136.9987), (17.7500, 142.5000), (11.3733, 142.5917)]]]); -select polygonsIntersectionSpherical([[[(4.3613577, 50.8651821), (4.349556, 50.8535879), (4.3602419, 50.8435626), (4.3830299, 50.8428851), (4.3904543, 50.8564867), (4.3613148, 50.8651279)]]], [[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]]); +select arrayMap(a -> arrayMap(b -> arrayMap(c -> (round(c.1, 6), round(c.2, 6)), b), a), polygonsIntersectionSpherical([[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]], [[[(25.0010, 136.9987), (17.7500, 142.5000), (11.3733, 142.5917)]]])); +select arrayMap(a -> arrayMap(b -> arrayMap(c -> (round(c.1, 6), round(c.2, 6)), b), a),polygonsIntersectionSpherical([[[(4.3613577, 50.8651821), (4.349556, 50.8535879), (4.3602419, 50.8435626), (4.3830299, 50.8428851), (4.3904543, 50.8564867), (4.3613148, 50.8651279)]]], [[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]])); select '-------- MultiPolygon with Polygon'; -select wkt(polygonsIntersectionSpherical([[(29.453587685533865,59.779570356240356),(29.393139070478895,52.276266797422124),(40.636581470703206,59.38168915000267),(41.21084331372543,59.103467777099866),(29.786055068336193,52.146627480315004),(31.23682182965546,52.16517054781818),(41.69443223416517,58.85424941916091),(42.51048853740727,58.47703162291134),(32.59691566839227,52.22075341251539),(34.289476889931414,52.22075341251539),(43.02430176537451,58.07974369546071),(43.02430176537451,57.25537683364851),(35.468224883503325,52.2022335126388),(37.16078610504247,52.23926559241349),(43.02430176537451,56.26136189644947),(43.02430176537451,55.326904361850836),(38.33953409861437,52.16517054781818),(40.09254393520848,52.16517054781818),(44.4146199116388,55.3097062225408),(44.47506852669377,59.80998197603594),(39.72985224487867,59.931351417569715),(30.23941968124846,53.67744677450975),(30.20919537372098,54.63314259659509),(38.73245009647167,59.94649146557819),(37.2816833351524,59.97675082987618),(30.23941968124846,55.2752875586599),(30.33009260383092,56.19415599955667),(36.28428118674541,59.96162460231375),(34.863738732953635,59.97675082987618),(30.178971066193498,56.97640788219866),(30.178971066193498,57.91957806959033),(33.65476643185424,59.94649146557819),(32.32489690064491,59.94649146557819),(30.481214141468342,58.85424941916091),(30.571887064050795,59.99187015036608),(29.453587685533865,59.779570356240356)]], [[[(33.473420586689336,58.85424941916091),(32.23422397806246,58.492830557036),(32.173775363007486,58.03176922751564),(31.508840597402823,57.499784781503735),(31.750635057622702,56.86092686957355),(31.508840597402823,55.941082594334574),(32.20399967053497,55.515591939372456),(31.84130798020516,54.998862226280465),(31.418167674820367,54.422670886434275),(32.47601843828233,53.83826377018255),(32.08310244042503,53.408048308050866),(33.171177511414484,52.82758702113742),(34.77306581037117,52.91880107773494),(34.77306581037117,53.784726518357985),(34.108131044766516,54.17574726780569),(35.07530888564602,54.59813930694554),(34.25925258240394,54.96417435716029),(35.01486027059106,55.361278263643584),(33.50364489421682,55.37845402950552),(32.7480372060297,55.90721384574556),(35.67979503619571,55.68634475630185),(32.83871012861215,56.311688992608396),(34.591719965206266,56.29492065473883),(35.7100193437232,56.311688992608396),(33.83611227701915,56.695333481003644),(32.95960735872209,56.9434497616887),(36.072711034053015,57.091531913901434),(33.171177511414484,57.33702717078384),(36.193608264162954,57.499784781503735),(33.23162612646945,57.77481561306047),(36.43540272438284,58.04776787540811),(33.62454212432676,58.27099811968307),(36.344729801800376,58.54018474404165),(33.83611227701915,58.68186423448108),(34.74284150284369,59.565911441555244),(33.473420586689336,58.85424941916091)]], [[(34.65216858026123,58.91672306881671),(37.19101041256995,58.68186423448108),(36.01226241899805,58.28688958537609),(37.16078610504247,58.04776787540811),(35.74024365125068,57.79092907387934),(37.009664567405046,57.499784781503735),(35.77046795877817,57.25537683364851),(36.979440259877556,57.07510745541089),(34.22902827487645,56.794777197297435),(36.7074214921302,56.210968525786996),(34.712617195316206,56.10998276812964),(36.55629995449277,55.63519693782703),(35.13575750070099,55.53270067649592),(36.43540272438284,55.34409504165558),(34.83351442542614,55.01619492319591),(35.61934642114075,54.49294870011772),(34.89396304048112,54.12264226523038),(35.37755196092087,53.046178687628185),(37.43280487278982,52.95523300597458),(35.92158949641559,53.80257986695776),(36.91899164482259,53.856094327816805),(36.01226241899805,54.75541714463799),(37.765272255592166,55.189110239786885),(36.828318722240134,55.44708256557195),(38.03729102333953,55.652253637168315),(36.64697287707522,55.941082594334574),(38.21863686850443,56.05939028508024),(36.37495410932787,56.64551287174558),(38.30930979108689,56.992876013526654),(37.16078610504247,57.25537683364851),(38.127963945921984,57.516020773674256),(37.43280487278982,57.710289827306724),(38.33953409861437,57.935626886818994),(37.40258056526235,58.31865112960426),(38.58132855883426,58.744648733419496),(37.31190764267989,59.02578062465136),(34.65216858026123,58.91672306881671)]], [[(38.52087994377928,59.11898412389468),(39.54850639971376,58.713270635642914),(38.369758406141855,58.28688958537609),(38.85334732658162,58.06375936407028),(38.33953409861437,57.710289827306724),(38.73245009647167,57.48354156434209),(38.21863686850443,57.271721400459285),(38.97424455669155,56.87744603722649),(37.463029180317314,56.5623320541159),(38.94402024916407,56.05939028508024),(38.18841256097694,55.856355210835915),(38.490655636251795,55.53270067649592),(37.795496563119656,55.39562234093384),(38.30930979108689,55.154587013355666),(36.7074214921302,54.65063295250911),(37.31190764267989,53.92734063371401),(36.979440259877556,53.58783775557231),(37.855945178174615,52.91880107773497),(39.57873070724124,52.69956490610895),(38.33953409861437,53.281741738901104),(40.00187101262603,53.35396273604752),(39.54850639971376,53.58783775557231),(40.24366547284591,53.58783775557231),(39.97164670509855,53.98069568468355),(40.60635716317572,54.03398248547225),(40.39478701048334,54.44025165268903),(39.54850639971376,54.56310590284329),(39.54850639971376,54.87732350170489),(40.39478701048334,54.87732350170489),(40.39478701048334,55.24083903654295),(39.82052516746112,55.2752875586599),(39.760076552406154,55.75443792473942),(40.57613285564824,55.78844000174894),(40.425011318010824,56.19415599955667),(39.82052516746112,56.07626182891758),(39.79030085993364,56.41214455508424),(40.48545993306579,56.495655446714636),(40.33433839542836,56.95993246553937),(39.79030085993364,56.992876013526654),(39.72985224487867,57.46729112028032),(40.33433839542836,57.46729112028032),(40.24366547284591,58.04776787540811),(39.63917932229622,58.04776787540811),(39.63917932229622,58.382088724871295),(40.33433839542836,58.382088724871295),(40.45523562553831,58.9011152358548),(38.52087994377928,59.11898412389468)]]])) format TSV; +select wkt(arrayMap(a -> arrayMap(b -> arrayMap(c -> (round(c.1, 6), round(c.2, 6)), b), a), +polygonsIntersectionSpherical([[(29.453587685533865,59.779570356240356),(29.393139070478895,52.276266797422124),(40.636581470703206,59.38168915000267),(41.21084331372543,59.103467777099866),(29.786055068336193,52.146627480315004),(31.23682182965546,52.16517054781818),(41.69443223416517,58.85424941916091),(42.51048853740727,58.47703162291134),(32.59691566839227,52.22075341251539),(34.289476889931414,52.22075341251539),(43.02430176537451,58.07974369546071),(43.02430176537451,57.25537683364851),(35.468224883503325,52.2022335126388),(37.16078610504247,52.23926559241349),(43.02430176537451,56.26136189644947),(43.02430176537451,55.326904361850836),(38.33953409861437,52.16517054781818),(40.09254393520848,52.16517054781818),(44.4146199116388,55.3097062225408),(44.47506852669377,59.80998197603594),(39.72985224487867,59.931351417569715),(30.23941968124846,53.67744677450975),(30.20919537372098,54.63314259659509),(38.73245009647167,59.94649146557819),(37.2816833351524,59.97675082987618),(30.23941968124846,55.2752875586599),(30.33009260383092,56.19415599955667),(36.28428118674541,59.96162460231375),(34.863738732953635,59.97675082987618),(30.178971066193498,56.97640788219866),(30.178971066193498,57.91957806959033),(33.65476643185424,59.94649146557819),(32.32489690064491,59.94649146557819),(30.481214141468342,58.85424941916091),(30.571887064050795,59.99187015036608),(29.453587685533865,59.779570356240356)]], [[[(33.473420586689336,58.85424941916091),(32.23422397806246,58.492830557036),(32.173775363007486,58.03176922751564),(31.508840597402823,57.499784781503735),(31.750635057622702,56.86092686957355),(31.508840597402823,55.941082594334574),(32.20399967053497,55.515591939372456),(31.84130798020516,54.998862226280465),(31.418167674820367,54.422670886434275),(32.47601843828233,53.83826377018255),(32.08310244042503,53.408048308050866),(33.171177511414484,52.82758702113742),(34.77306581037117,52.91880107773494),(34.77306581037117,53.784726518357985),(34.108131044766516,54.17574726780569),(35.07530888564602,54.59813930694554),(34.25925258240394,54.96417435716029),(35.01486027059106,55.361278263643584),(33.50364489421682,55.37845402950552),(32.7480372060297,55.90721384574556),(35.67979503619571,55.68634475630185),(32.83871012861215,56.311688992608396),(34.591719965206266,56.29492065473883),(35.7100193437232,56.311688992608396),(33.83611227701915,56.695333481003644),(32.95960735872209,56.9434497616887),(36.072711034053015,57.091531913901434),(33.171177511414484,57.33702717078384),(36.193608264162954,57.499784781503735),(33.23162612646945,57.77481561306047),(36.43540272438284,58.04776787540811),(33.62454212432676,58.27099811968307),(36.344729801800376,58.54018474404165),(33.83611227701915,58.68186423448108),(34.74284150284369,59.565911441555244),(33.473420586689336,58.85424941916091)]], [[(34.65216858026123,58.91672306881671),(37.19101041256995,58.68186423448108),(36.01226241899805,58.28688958537609),(37.16078610504247,58.04776787540811),(35.74024365125068,57.79092907387934),(37.009664567405046,57.499784781503735),(35.77046795877817,57.25537683364851),(36.979440259877556,57.07510745541089),(34.22902827487645,56.794777197297435),(36.7074214921302,56.210968525786996),(34.712617195316206,56.10998276812964),(36.55629995449277,55.63519693782703),(35.13575750070099,55.53270067649592),(36.43540272438284,55.34409504165558),(34.83351442542614,55.01619492319591),(35.61934642114075,54.49294870011772),(34.89396304048112,54.12264226523038),(35.37755196092087,53.046178687628185),(37.43280487278982,52.95523300597458),(35.92158949641559,53.80257986695776),(36.91899164482259,53.856094327816805),(36.01226241899805,54.75541714463799),(37.765272255592166,55.189110239786885),(36.828318722240134,55.44708256557195),(38.03729102333953,55.652253637168315),(36.64697287707522,55.941082594334574),(38.21863686850443,56.05939028508024),(36.37495410932787,56.64551287174558),(38.30930979108689,56.992876013526654),(37.16078610504247,57.25537683364851),(38.127963945921984,57.516020773674256),(37.43280487278982,57.710289827306724),(38.33953409861437,57.935626886818994),(37.40258056526235,58.31865112960426),(38.58132855883426,58.744648733419496),(37.31190764267989,59.02578062465136),(34.65216858026123,58.91672306881671)]], [[(38.52087994377928,59.11898412389468),(39.54850639971376,58.713270635642914),(38.369758406141855,58.28688958537609),(38.85334732658162,58.06375936407028),(38.33953409861437,57.710289827306724),(38.73245009647167,57.48354156434209),(38.21863686850443,57.271721400459285),(38.97424455669155,56.87744603722649),(37.463029180317314,56.5623320541159),(38.94402024916407,56.05939028508024),(38.18841256097694,55.856355210835915),(38.490655636251795,55.53270067649592),(37.795496563119656,55.39562234093384),(38.30930979108689,55.154587013355666),(36.7074214921302,54.65063295250911),(37.31190764267989,53.92734063371401),(36.979440259877556,53.58783775557231),(37.855945178174615,52.91880107773497),(39.57873070724124,52.69956490610895),(38.33953409861437,53.281741738901104),(40.00187101262603,53.35396273604752),(39.54850639971376,53.58783775557231),(40.24366547284591,53.58783775557231),(39.97164670509855,53.98069568468355),(40.60635716317572,54.03398248547225),(40.39478701048334,54.44025165268903),(39.54850639971376,54.56310590284329),(39.54850639971376,54.87732350170489),(40.39478701048334,54.87732350170489),(40.39478701048334,55.24083903654295),(39.82052516746112,55.2752875586599),(39.760076552406154,55.75443792473942),(40.57613285564824,55.78844000174894),(40.425011318010824,56.19415599955667),(39.82052516746112,56.07626182891758),(39.79030085993364,56.41214455508424),(40.48545993306579,56.495655446714636),(40.33433839542836,56.95993246553937),(39.79030085993364,56.992876013526654),(39.72985224487867,57.46729112028032),(40.33433839542836,57.46729112028032),(40.24366547284591,58.04776787540811),(39.63917932229622,58.04776787540811),(39.63917932229622,58.382088724871295),(40.33433839542836,58.382088724871295),(40.45523562553831,58.9011152358548),(38.52087994377928,59.11898412389468)]]]))) format TSV; select '-------- MultiPolygon with Polygon with Holes'; -select wkt(polygonsIntersectionSpherical([[[(33.473420586689336,58.85424941916091),(32.23422397806246,58.492830557036),(32.173775363007486,58.03176922751564),(31.508840597402823,57.499784781503735),(31.750635057622702,56.86092686957355),(31.508840597402823,55.941082594334574),(32.20399967053497,55.515591939372456),(31.84130798020516,54.998862226280465),(31.418167674820367,54.422670886434275),(32.47601843828233,53.83826377018255),(32.08310244042503,53.408048308050866),(33.171177511414484,52.82758702113742),(34.77306581037117,52.91880107773494),(34.77306581037117,53.784726518357985),(34.108131044766516,54.17574726780569),(35.07530888564602,54.59813930694554),(34.25925258240394,54.96417435716029),(35.01486027059106,55.361278263643584),(33.50364489421682,55.37845402950552),(32.7480372060297,55.90721384574556),(35.67979503619571,55.68634475630185),(32.83871012861215,56.311688992608396),(34.591719965206266,56.29492065473883),(35.7100193437232,56.311688992608396),(33.83611227701915,56.695333481003644),(32.95960735872209,56.9434497616887),(36.072711034053015,57.091531913901434),(33.171177511414484,57.33702717078384),(36.193608264162954,57.499784781503735),(33.23162612646945,57.77481561306047),(36.43540272438284,58.04776787540811),(33.62454212432676,58.27099811968307),(36.344729801800376,58.54018474404165),(33.83611227701915,58.68186423448108),(34.74284150284369,59.565911441555244),(33.473420586689336,58.85424941916091)]], [[(34.65216858026123,58.91672306881671),(37.19101041256995,58.68186423448108),(36.01226241899805,58.28688958537609),(37.16078610504247,58.04776787540811),(35.74024365125068,57.79092907387934),(37.009664567405046,57.499784781503735),(35.77046795877817,57.25537683364851),(36.979440259877556,57.07510745541089),(34.22902827487645,56.794777197297435),(36.7074214921302,56.210968525786996),(34.712617195316206,56.10998276812964),(36.55629995449277,55.63519693782703),(35.13575750070099,55.53270067649592),(36.43540272438284,55.34409504165558),(34.83351442542614,55.01619492319591),(35.61934642114075,54.49294870011772),(34.89396304048112,54.12264226523038),(35.37755196092087,53.046178687628185),(37.43280487278982,52.95523300597458),(35.92158949641559,53.80257986695776),(36.91899164482259,53.856094327816805),(36.01226241899805,54.75541714463799),(37.765272255592166,55.189110239786885),(36.828318722240134,55.44708256557195),(38.03729102333953,55.652253637168315),(36.64697287707522,55.941082594334574),(38.21863686850443,56.05939028508024),(36.37495410932787,56.64551287174558),(38.30930979108689,56.992876013526654),(37.16078610504247,57.25537683364851),(38.127963945921984,57.516020773674256),(37.43280487278982,57.710289827306724),(38.33953409861437,57.935626886818994),(37.40258056526235,58.31865112960426),(38.58132855883426,58.744648733419496),(37.31190764267989,59.02578062465136),(34.65216858026123,58.91672306881671)]], [[(38.52087994377928,59.11898412389468),(39.54850639971376,58.713270635642914),(38.369758406141855,58.28688958537609),(38.85334732658162,58.06375936407028),(38.33953409861437,57.710289827306724),(38.73245009647167,57.48354156434209),(38.21863686850443,57.271721400459285),(38.97424455669155,56.87744603722649),(37.463029180317314,56.5623320541159),(38.94402024916407,56.05939028508024),(38.18841256097694,55.856355210835915),(38.490655636251795,55.53270067649592),(37.795496563119656,55.39562234093384),(38.30930979108689,55.154587013355666),(36.7074214921302,54.65063295250911),(37.31190764267989,53.92734063371401),(36.979440259877556,53.58783775557231),(37.855945178174615,52.91880107773497),(39.57873070724124,52.69956490610895),(38.33953409861437,53.281741738901104),(40.00187101262603,53.35396273604752),(39.54850639971376,53.58783775557231),(40.24366547284591,53.58783775557231),(39.97164670509855,53.98069568468355),(40.60635716317572,54.03398248547225),(40.39478701048334,54.44025165268903),(39.54850639971376,54.56310590284329),(39.54850639971376,54.87732350170489),(40.39478701048334,54.87732350170489),(40.39478701048334,55.24083903654295),(39.82052516746112,55.2752875586599),(39.760076552406154,55.75443792473942),(40.57613285564824,55.78844000174894),(40.425011318010824,56.19415599955667),(39.82052516746112,56.07626182891758),(39.79030085993364,56.41214455508424),(40.48545993306579,56.495655446714636),(40.33433839542836,56.95993246553937),(39.79030085993364,56.992876013526654),(39.72985224487867,57.46729112028032),(40.33433839542836,57.46729112028032),(40.24366547284591,58.04776787540811),(39.63917932229622,58.04776787540811),(39.63917932229622,58.382088724871295),(40.33433839542836,58.382088724871295),(40.45523562553831,58.9011152358548),(38.52087994377928,59.11898412389468)]]], [[(24.367675781249993,61.45977057029751),(19.577636718749993,58.67693767258692),(19.577636718749993,57.492213666700735),(19.445800781249996,55.87531083569678),(19.445800781249996,54.085173420886775),(17.468261718749996,53.014783245859235),(20.017089843749993,51.563412328675895),(21.203613281249993,50.205033264943324),(26.125488281249993,50.40151532278236),(27.22412109374999,48.980216985374994),(32.80517578124999,49.525208341974405),(35.26611328124999,48.74894534343292),(36.93603515624999,49.66762782262194),(42.56103515625,48.77791275550183),(43.92333984374999,49.8096315635631),(47.17529296875,49.152969656170455),(49.28466796875,50.54136296522162),(48.05419921875,51.17934297928929),(51.39404296875,52.48278022207825),(50.64697265625,53.014783245859235),(52.88818359375,53.93021986394004),(51.65771484374999,54.29088164657006),(52.66845703125,55.825973254619015),(50.25146484375,56.145549500679095),(51.92138671875,57.914847767009206),(49.15283203125,58.17070248348605),(49.59228515625,60.086762746260064),(47.043457031249986,59.88893689676584),(43.57177734375,61.37567331572748),(42.64892578125,60.630101766266705),(36.89208984374999,62.000904713685856),(36.01318359374999,61.143235250840576),(31.398925781249993,62.02152819100766),(30.563964843749996,61.05828537037917),(26.872558593749993,61.71070595883174),(26.652832031249993,61.10078883158897),(24.367675781249993,61.45977057029751)], [(24.455566406249993,59.42272750081452),(21.203613281249993,58.49369382056807),(21.335449218749993,56.89700392127261),(21.599121093749993,55.92458580482949),(25.202636718749993,55.998380955359636),(28.850097656249993,57.06463027327854),(27.09228515625,57.844750992890994),(28.806152343749996,59.17592824927138),(26.257324218749993,59.17592824927138),(24.455566406249993,59.42272750081452)], [(35.13427734375,59.84481485969107),(31.970214843749993,58.97266715450152),(33.20068359374999,56.776808316568406),(36.67236328125,56.41390137600675),(39.08935546874999,57.25528054528888),(42.69287109374999,58.03137242177638),(40.89111328124999,59.26588062825809),(37.28759765625,58.722598828043374),(37.11181640624999,59.66774058164964),(35.13427734375,59.84481485969107)], [(29.157714843749993,55.75184939173528),(22.565917968749993,55.128649068488784),(22.565917968749993,53.54030739150019),(22.038574218749996,51.48138289610097),(26.257324218749993,51.42661449707484),(30.124511718749993,50.54136296522162),(32.18994140624999,51.17934297928929),(30.124511718749993,53.173119202640635),(35.09033203124999,53.173119202640635),(33.11279296875,54.085173420886775),(29.597167968749993,55.50374985927513),(29.157714843749993,55.75184939173528)], [(42.82470703125,56.58369172128337),(36.584472656249986,55.329144408405085),(37.99072265625,53.592504809039355),(34.95849609374999,51.48138289610097),(36.54052734374999,50.40151532278236),(39.66064453124999,50.289339253291786),(39.79248046875,52.13348804077148),(41.77001953125,50.68079714532166),(44.49462890624999,51.97134580885171),(47.30712890624999,52.509534770327264),(44.05517578125,53.54030739150019),(46.60400390625,53.696706475303245),(47.61474609375,55.40406982700608),(45.37353515625,55.40406982700608),(42.82470703125,56.58369172128337)]])) format TSV; +select wkt(arrayMap(a -> arrayMap(b -> arrayMap(c -> (round(c.1, 6), round(c.2, 6)), b), a), +polygonsIntersectionSpherical([[[(33.473420586689336,58.85424941916091),(32.23422397806246,58.492830557036),(32.173775363007486,58.03176922751564),(31.508840597402823,57.499784781503735),(31.750635057622702,56.86092686957355),(31.508840597402823,55.941082594334574),(32.20399967053497,55.515591939372456),(31.84130798020516,54.998862226280465),(31.418167674820367,54.422670886434275),(32.47601843828233,53.83826377018255),(32.08310244042503,53.408048308050866),(33.171177511414484,52.82758702113742),(34.77306581037117,52.91880107773494),(34.77306581037117,53.784726518357985),(34.108131044766516,54.17574726780569),(35.07530888564602,54.59813930694554),(34.25925258240394,54.96417435716029),(35.01486027059106,55.361278263643584),(33.50364489421682,55.37845402950552),(32.7480372060297,55.90721384574556),(35.67979503619571,55.68634475630185),(32.83871012861215,56.311688992608396),(34.591719965206266,56.29492065473883),(35.7100193437232,56.311688992608396),(33.83611227701915,56.695333481003644),(32.95960735872209,56.9434497616887),(36.072711034053015,57.091531913901434),(33.171177511414484,57.33702717078384),(36.193608264162954,57.499784781503735),(33.23162612646945,57.77481561306047),(36.43540272438284,58.04776787540811),(33.62454212432676,58.27099811968307),(36.344729801800376,58.54018474404165),(33.83611227701915,58.68186423448108),(34.74284150284369,59.565911441555244),(33.473420586689336,58.85424941916091)]], [[(34.65216858026123,58.91672306881671),(37.19101041256995,58.68186423448108),(36.01226241899805,58.28688958537609),(37.16078610504247,58.04776787540811),(35.74024365125068,57.79092907387934),(37.009664567405046,57.499784781503735),(35.77046795877817,57.25537683364851),(36.979440259877556,57.07510745541089),(34.22902827487645,56.794777197297435),(36.7074214921302,56.210968525786996),(34.712617195316206,56.10998276812964),(36.55629995449277,55.63519693782703),(35.13575750070099,55.53270067649592),(36.43540272438284,55.34409504165558),(34.83351442542614,55.01619492319591),(35.61934642114075,54.49294870011772),(34.89396304048112,54.12264226523038),(35.37755196092087,53.046178687628185),(37.43280487278982,52.95523300597458),(35.92158949641559,53.80257986695776),(36.91899164482259,53.856094327816805),(36.01226241899805,54.75541714463799),(37.765272255592166,55.189110239786885),(36.828318722240134,55.44708256557195),(38.03729102333953,55.652253637168315),(36.64697287707522,55.941082594334574),(38.21863686850443,56.05939028508024),(36.37495410932787,56.64551287174558),(38.30930979108689,56.992876013526654),(37.16078610504247,57.25537683364851),(38.127963945921984,57.516020773674256),(37.43280487278982,57.710289827306724),(38.33953409861437,57.935626886818994),(37.40258056526235,58.31865112960426),(38.58132855883426,58.744648733419496),(37.31190764267989,59.02578062465136),(34.65216858026123,58.91672306881671)]], [[(38.52087994377928,59.11898412389468),(39.54850639971376,58.713270635642914),(38.369758406141855,58.28688958537609),(38.85334732658162,58.06375936407028),(38.33953409861437,57.710289827306724),(38.73245009647167,57.48354156434209),(38.21863686850443,57.271721400459285),(38.97424455669155,56.87744603722649),(37.463029180317314,56.5623320541159),(38.94402024916407,56.05939028508024),(38.18841256097694,55.856355210835915),(38.490655636251795,55.53270067649592),(37.795496563119656,55.39562234093384),(38.30930979108689,55.154587013355666),(36.7074214921302,54.65063295250911),(37.31190764267989,53.92734063371401),(36.979440259877556,53.58783775557231),(37.855945178174615,52.91880107773497),(39.57873070724124,52.69956490610895),(38.33953409861437,53.281741738901104),(40.00187101262603,53.35396273604752),(39.54850639971376,53.58783775557231),(40.24366547284591,53.58783775557231),(39.97164670509855,53.98069568468355),(40.60635716317572,54.03398248547225),(40.39478701048334,54.44025165268903),(39.54850639971376,54.56310590284329),(39.54850639971376,54.87732350170489),(40.39478701048334,54.87732350170489),(40.39478701048334,55.24083903654295),(39.82052516746112,55.2752875586599),(39.760076552406154,55.75443792473942),(40.57613285564824,55.78844000174894),(40.425011318010824,56.19415599955667),(39.82052516746112,56.07626182891758),(39.79030085993364,56.41214455508424),(40.48545993306579,56.495655446714636),(40.33433839542836,56.95993246553937),(39.79030085993364,56.992876013526654),(39.72985224487867,57.46729112028032),(40.33433839542836,57.46729112028032),(40.24366547284591,58.04776787540811),(39.63917932229622,58.04776787540811),(39.63917932229622,58.382088724871295),(40.33433839542836,58.382088724871295),(40.45523562553831,58.9011152358548),(38.52087994377928,59.11898412389468)]]], [[(24.367675781249993,61.45977057029751),(19.577636718749993,58.67693767258692),(19.577636718749993,57.492213666700735),(19.445800781249996,55.87531083569678),(19.445800781249996,54.085173420886775),(17.468261718749996,53.014783245859235),(20.017089843749993,51.563412328675895),(21.203613281249993,50.205033264943324),(26.125488281249993,50.40151532278236),(27.22412109374999,48.980216985374994),(32.80517578124999,49.525208341974405),(35.26611328124999,48.74894534343292),(36.93603515624999,49.66762782262194),(42.56103515625,48.77791275550183),(43.92333984374999,49.8096315635631),(47.17529296875,49.152969656170455),(49.28466796875,50.54136296522162),(48.05419921875,51.17934297928929),(51.39404296875,52.48278022207825),(50.64697265625,53.014783245859235),(52.88818359375,53.93021986394004),(51.65771484374999,54.29088164657006),(52.66845703125,55.825973254619015),(50.25146484375,56.145549500679095),(51.92138671875,57.914847767009206),(49.15283203125,58.17070248348605),(49.59228515625,60.086762746260064),(47.043457031249986,59.88893689676584),(43.57177734375,61.37567331572748),(42.64892578125,60.630101766266705),(36.89208984374999,62.000904713685856),(36.01318359374999,61.143235250840576),(31.398925781249993,62.02152819100766),(30.563964843749996,61.05828537037917),(26.872558593749993,61.71070595883174),(26.652832031249993,61.10078883158897),(24.367675781249993,61.45977057029751)], [(24.455566406249993,59.42272750081452),(21.203613281249993,58.49369382056807),(21.335449218749993,56.89700392127261),(21.599121093749993,55.92458580482949),(25.202636718749993,55.998380955359636),(28.850097656249993,57.06463027327854),(27.09228515625,57.844750992890994),(28.806152343749996,59.17592824927138),(26.257324218749993,59.17592824927138),(24.455566406249993,59.42272750081452)], [(35.13427734375,59.84481485969107),(31.970214843749993,58.97266715450152),(33.20068359374999,56.776808316568406),(36.67236328125,56.41390137600675),(39.08935546874999,57.25528054528888),(42.69287109374999,58.03137242177638),(40.89111328124999,59.26588062825809),(37.28759765625,58.722598828043374),(37.11181640624999,59.66774058164964),(35.13427734375,59.84481485969107)], [(29.157714843749993,55.75184939173528),(22.565917968749993,55.128649068488784),(22.565917968749993,53.54030739150019),(22.038574218749996,51.48138289610097),(26.257324218749993,51.42661449707484),(30.124511718749993,50.54136296522162),(32.18994140624999,51.17934297928929),(30.124511718749993,53.173119202640635),(35.09033203124999,53.173119202640635),(33.11279296875,54.085173420886775),(29.597167968749993,55.50374985927513),(29.157714843749993,55.75184939173528)], [(42.82470703125,56.58369172128337),(36.584472656249986,55.329144408405085),(37.99072265625,53.592504809039355),(34.95849609374999,51.48138289610097),(36.54052734374999,50.40151532278236),(39.66064453124999,50.289339253291786),(39.79248046875,52.13348804077148),(41.77001953125,50.68079714532166),(44.49462890624999,51.97134580885171),(47.30712890624999,52.509534770327264),(44.05517578125,53.54030739150019),(46.60400390625,53.696706475303245),(47.61474609375,55.40406982700608),(45.37353515625,55.40406982700608),(42.82470703125,56.58369172128337)]]))) format TSV; select '-------- Polygon with Polygon with Holes'; -select wkt(polygonsIntersectionSpherical([[(29.453587685533865,59.779570356240356),(29.393139070478895,52.276266797422124),(40.636581470703206,59.38168915000267),(41.21084331372543,59.103467777099866),(29.786055068336193,52.146627480315004),(31.23682182965546,52.16517054781818),(41.69443223416517,58.85424941916091),(42.51048853740727,58.47703162291134),(32.59691566839227,52.22075341251539),(34.289476889931414,52.22075341251539),(43.02430176537451,58.07974369546071),(43.02430176537451,57.25537683364851),(35.468224883503325,52.2022335126388),(37.16078610504247,52.23926559241349),(43.02430176537451,56.26136189644947),(43.02430176537451,55.326904361850836),(38.33953409861437,52.16517054781818),(40.09254393520848,52.16517054781818),(44.4146199116388,55.3097062225408),(44.47506852669377,59.80998197603594),(39.72985224487867,59.931351417569715),(30.23941968124846,53.67744677450975),(30.20919537372098,54.63314259659509),(38.73245009647167,59.94649146557819),(37.2816833351524,59.97675082987618),(30.23941968124846,55.2752875586599),(30.33009260383092,56.19415599955667),(36.28428118674541,59.96162460231375),(34.863738732953635,59.97675082987618),(30.178971066193498,56.97640788219866),(30.178971066193498,57.91957806959033),(33.65476643185424,59.94649146557819),(32.32489690064491,59.94649146557819),(30.481214141468342,58.85424941916091),(30.571887064050795,59.99187015036608),(29.453587685533865,59.779570356240356)]], [[(24.367675781249993,61.45977057029751),(19.577636718749993,58.67693767258692),(19.577636718749993,57.492213666700735),(19.445800781249996,55.87531083569678),(19.445800781249996,54.085173420886775),(17.468261718749996,53.014783245859235),(20.017089843749993,51.563412328675895),(21.203613281249993,50.205033264943324),(26.125488281249993,50.40151532278236),(27.22412109374999,48.980216985374994),(32.80517578124999,49.525208341974405),(35.26611328124999,48.74894534343292),(36.93603515624999,49.66762782262194),(42.56103515625,48.77791275550183),(43.92333984374999,49.8096315635631),(47.17529296875,49.152969656170455),(49.28466796875,50.54136296522162),(48.05419921875,51.17934297928929),(51.39404296875,52.48278022207825),(50.64697265625,53.014783245859235),(52.88818359375,53.93021986394004),(51.65771484374999,54.29088164657006),(52.66845703125,55.825973254619015),(50.25146484375,56.145549500679095),(51.92138671875,57.914847767009206),(49.15283203125,58.17070248348605),(49.59228515625,60.086762746260064),(47.043457031249986,59.88893689676584),(43.57177734375,61.37567331572748),(42.64892578125,60.630101766266705),(36.89208984374999,62.000904713685856),(36.01318359374999,61.143235250840576),(31.398925781249993,62.02152819100766),(30.563964843749996,61.05828537037917),(26.872558593749993,61.71070595883174),(26.652832031249993,61.10078883158897),(24.367675781249993,61.45977057029751)], [(24.455566406249993,59.42272750081452),(21.203613281249993,58.49369382056807),(21.335449218749993,56.89700392127261),(21.599121093749993,55.92458580482949),(25.202636718749993,55.998380955359636),(28.850097656249993,57.06463027327854),(27.09228515625,57.844750992890994),(28.806152343749996,59.17592824927138),(26.257324218749993,59.17592824927138),(24.455566406249993,59.42272750081452)], [(35.13427734375,59.84481485969107),(31.970214843749993,58.97266715450152),(33.20068359374999,56.776808316568406),(36.67236328125,56.41390137600675),(39.08935546874999,57.25528054528888),(42.69287109374999,58.03137242177638),(40.89111328124999,59.26588062825809),(37.28759765625,58.722598828043374),(37.11181640624999,59.66774058164964),(35.13427734375,59.84481485969107)], [(29.157714843749993,55.75184939173528),(22.565917968749993,55.128649068488784),(22.565917968749993,53.54030739150019),(22.038574218749996,51.48138289610097),(26.257324218749993,51.42661449707484),(30.124511718749993,50.54136296522162),(32.18994140624999,51.17934297928929),(30.124511718749993,53.173119202640635),(35.09033203124999,53.173119202640635),(33.11279296875,54.085173420886775),(29.597167968749993,55.50374985927513),(29.157714843749993,55.75184939173528)], [(42.82470703125,56.58369172128337),(36.584472656249986,55.329144408405085),(37.99072265625,53.592504809039355),(34.95849609374999,51.48138289610097),(36.54052734374999,50.40151532278236),(39.66064453124999,50.289339253291786),(39.79248046875,52.13348804077148),(41.77001953125,50.68079714532166),(44.49462890624999,51.97134580885171),(47.30712890624999,52.509534770327264),(44.05517578125,53.54030739150019),(46.60400390625,53.696706475303245),(47.61474609375,55.40406982700608),(45.37353515625,55.40406982700608),(42.82470703125,56.58369172128337)]])) format TSV; +select wkt(arrayMap(a -> arrayMap(b -> arrayMap(c -> (round(c.1, 6), round(c.2, 6)), b), a), +polygonsIntersectionSpherical([[(29.453587685533865,59.779570356240356),(29.393139070478895,52.276266797422124),(40.636581470703206,59.38168915000267),(41.21084331372543,59.103467777099866),(29.786055068336193,52.146627480315004),(31.23682182965546,52.16517054781818),(41.69443223416517,58.85424941916091),(42.51048853740727,58.47703162291134),(32.59691566839227,52.22075341251539),(34.289476889931414,52.22075341251539),(43.02430176537451,58.07974369546071),(43.02430176537451,57.25537683364851),(35.468224883503325,52.2022335126388),(37.16078610504247,52.23926559241349),(43.02430176537451,56.26136189644947),(43.02430176537451,55.326904361850836),(38.33953409861437,52.16517054781818),(40.09254393520848,52.16517054781818),(44.4146199116388,55.3097062225408),(44.47506852669377,59.80998197603594),(39.72985224487867,59.931351417569715),(30.23941968124846,53.67744677450975),(30.20919537372098,54.63314259659509),(38.73245009647167,59.94649146557819),(37.2816833351524,59.97675082987618),(30.23941968124846,55.2752875586599),(30.33009260383092,56.19415599955667),(36.28428118674541,59.96162460231375),(34.863738732953635,59.97675082987618),(30.178971066193498,56.97640788219866),(30.178971066193498,57.91957806959033),(33.65476643185424,59.94649146557819),(32.32489690064491,59.94649146557819),(30.481214141468342,58.85424941916091),(30.571887064050795,59.99187015036608),(29.453587685533865,59.779570356240356)]], [[(24.367675781249993,61.45977057029751),(19.577636718749993,58.67693767258692),(19.577636718749993,57.492213666700735),(19.445800781249996,55.87531083569678),(19.445800781249996,54.085173420886775),(17.468261718749996,53.014783245859235),(20.017089843749993,51.563412328675895),(21.203613281249993,50.205033264943324),(26.125488281249993,50.40151532278236),(27.22412109374999,48.980216985374994),(32.80517578124999,49.525208341974405),(35.26611328124999,48.74894534343292),(36.93603515624999,49.66762782262194),(42.56103515625,48.77791275550183),(43.92333984374999,49.8096315635631),(47.17529296875,49.152969656170455),(49.28466796875,50.54136296522162),(48.05419921875,51.17934297928929),(51.39404296875,52.48278022207825),(50.64697265625,53.014783245859235),(52.88818359375,53.93021986394004),(51.65771484374999,54.29088164657006),(52.66845703125,55.825973254619015),(50.25146484375,56.145549500679095),(51.92138671875,57.914847767009206),(49.15283203125,58.17070248348605),(49.59228515625,60.086762746260064),(47.043457031249986,59.88893689676584),(43.57177734375,61.37567331572748),(42.64892578125,60.630101766266705),(36.89208984374999,62.000904713685856),(36.01318359374999,61.143235250840576),(31.398925781249993,62.02152819100766),(30.563964843749996,61.05828537037917),(26.872558593749993,61.71070595883174),(26.652832031249993,61.10078883158897),(24.367675781249993,61.45977057029751)], [(24.455566406249993,59.42272750081452),(21.203613281249993,58.49369382056807),(21.335449218749993,56.89700392127261),(21.599121093749993,55.92458580482949),(25.202636718749993,55.998380955359636),(28.850097656249993,57.06463027327854),(27.09228515625,57.844750992890994),(28.806152343749996,59.17592824927138),(26.257324218749993,59.17592824927138),(24.455566406249993,59.42272750081452)], [(35.13427734375,59.84481485969107),(31.970214843749993,58.97266715450152),(33.20068359374999,56.776808316568406),(36.67236328125,56.41390137600675),(39.08935546874999,57.25528054528888),(42.69287109374999,58.03137242177638),(40.89111328124999,59.26588062825809),(37.28759765625,58.722598828043374),(37.11181640624999,59.66774058164964),(35.13427734375,59.84481485969107)], [(29.157714843749993,55.75184939173528),(22.565917968749993,55.128649068488784),(22.565917968749993,53.54030739150019),(22.038574218749996,51.48138289610097),(26.257324218749993,51.42661449707484),(30.124511718749993,50.54136296522162),(32.18994140624999,51.17934297928929),(30.124511718749993,53.173119202640635),(35.09033203124999,53.173119202640635),(33.11279296875,54.085173420886775),(29.597167968749993,55.50374985927513),(29.157714843749993,55.75184939173528)], [(42.82470703125,56.58369172128337),(36.584472656249986,55.329144408405085),(37.99072265625,53.592504809039355),(34.95849609374999,51.48138289610097),(36.54052734374999,50.40151532278236),(39.66064453124999,50.289339253291786),(39.79248046875,52.13348804077148),(41.77001953125,50.68079714532166),(44.49462890624999,51.97134580885171),(47.30712890624999,52.509534770327264),(44.05517578125,53.54030739150019),(46.60400390625,53.696706475303245),(47.61474609375,55.40406982700608),(45.37353515625,55.40406982700608),(42.82470703125,56.58369172128337)]]))) format TSV; diff --git a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect index fd2de93c39c..9c20b7c517e 100755 --- a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect +++ b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect @@ -1,14 +1,13 @@ #!/usr/bin/expect -f -# Tags: no-fasttest log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] @@ -21,6 +20,7 @@ expect "SET max_distributed" # Wait for suggestions to load, they are loaded in background set is_done 0 +set timeout 1 while {$is_done == 0} { send -- "\t" expect { @@ -28,10 +28,15 @@ while {$is_done == 0} { set is_done 1 } default { - sleep 1 + # expect "_" will wait for timeout, + # if completion was not loaded it will fail, + # and we will retry, + # but for retry on timeout default should be reseted, + # this is what for this block. } } } +set timeout 60 send -- "\3\4" expect eof diff --git a/tests/queries/0_stateless/01502_jemalloc_percpu_arena.reference b/tests/queries/0_stateless/01502_jemalloc_percpu_arena.reference index d00491fd7e5..1eb57a24638 100644 --- a/tests/queries/0_stateless/01502_jemalloc_percpu_arena.reference +++ b/tests/queries/0_stateless/01502_jemalloc_percpu_arena.reference @@ -1 +1,4 @@ 1 +: Number of CPUs is not deterministic +: Number of CPUs is not deterministic, but narenas is set. Hope you not what you are doing and you have set narenas to largest possible CPU ID. +1 diff --git a/tests/queries/0_stateless/01502_jemalloc_percpu_arena.sh b/tests/queries/0_stateless/01502_jemalloc_percpu_arena.sh index 869e3a1d26d..265ca4a6763 100755 --- a/tests/queries/0_stateless/01502_jemalloc_percpu_arena.sh +++ b/tests/queries/0_stateless/01502_jemalloc_percpu_arena.sh @@ -1,4 +1,7 @@ #!/usr/bin/env bash +# Tags: no-tsan, no-asan, no-msan, no-ubsan, no-fasttest +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# NOTE: jemalloc is disabled under sanitizers CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -6,9 +9,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ncpus="$(getconf _NPROCESSORS_ONLN)" -# to hit possible issues even in unbundled builds: -# (although likiley jemalloc will be compiled with NDEBUG there) -export MALLOC_CONF=percpu_arena:percpu +# In debug build the following settings enabled by default: +# - abort_conf +# - abort +# Disable them explicitly (will enable when required). +export MALLOC_CONF=abort_conf:false,abort:false # Regression for: # @@ -18,3 +23,15 @@ export MALLOC_CONF=percpu_arena:percpu taskset --cpu-list $((ncpus-1)) ${CLICKHOUSE_LOCAL} -q 'select 1' # just in case something more complicated taskset --cpu-list $((ncpus-1)) ${CLICKHOUSE_LOCAL} -q 'select * from numbers_mt(100000000) settings max_threads=100 FORMAT Null' + +# this command should fail because percpu arena will be disabled, +# and with abort_conf:true it is not allowed +( + # subshell is required to suppress "Aborted" message from the shell. + MALLOC_CONF=abort_conf:true,abort:true + taskset --cpu-list $((ncpus-1)) ${CLICKHOUSE_LOCAL} -q 'select 1' +) |& grep -F 'Number of CPUs is not deterministic' + +# this command should not fail because we specify narenas explicitly +# (even with abort_conf:true) +MALLOC_CONF=abort_conf:true,abort:false,narenas:$((ncpus)) taskset --cpu-list $((ncpus-1)) ${CLICKHOUSE_LOCAL} -q 'select 1' 2>&1 diff --git a/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh b/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh index 645eaea743c..ff22597c620 100755 --- a/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh +++ b/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh @@ -1,9 +1,14 @@ #!/usr/bin/env bash +# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +# In debug build abort_conf:true is set by default, disable it explicitly +# to avoid "Number of CPUs is not deterministic" error from jemalloc. +export MALLOC_CONF=abort_conf:false + # Regression for UAF in ThreadPool. # (Triggered under TSAN) for _ in {1..10}; do diff --git a/tests/queries/0_stateless/01520_client_print_query_id.expect b/tests/queries/0_stateless/01520_client_print_query_id.expect index 1989ab51aea..8b6e0e17a85 100755 --- a/tests/queries/0_stateless/01520_client_print_query_id.expect +++ b/tests/queries/0_stateless/01520_client_print_query_id.expect @@ -1,14 +1,13 @@ #!/usr/bin/expect -f -# Tags: no-fasttest log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/01565_reconnect_after_client_error.expect b/tests/queries/0_stateless/01565_reconnect_after_client_error.expect index 261e46c2111..819450ffd30 100755 --- a/tests/queries/0_stateless/01565_reconnect_after_client_error.expect +++ b/tests/queries/0_stateless/01565_reconnect_after_client_error.expect @@ -1,5 +1,5 @@ #!/usr/bin/expect -f -# Tags: no-fasttest +# Tags: long # This is a separate test, because we want to test the interactive mode. # https://github.com/ClickHouse/ClickHouse/issues/19353 @@ -8,11 +8,11 @@ log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/01622_defaults_for_url_engine.reference b/tests/queries/0_stateless/01622_defaults_for_url_engine.reference index 7326d960397..c0a49154cd9 100644 --- a/tests/queries/0_stateless/01622_defaults_for_url_engine.reference +++ b/tests/queries/0_stateless/01622_defaults_for_url_engine.reference @@ -1 +1 @@ -Ok +(1,7,8) \ No newline at end of file diff --git a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh index 0a93a3ef479..cf89a624bc9 100755 --- a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh +++ b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh @@ -1,37 +1,13 @@ #!/usr/bin/env bash # Tags: no-fasttest -# Tag no-fasttest: nc - command not found CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh - -PORT="$(($RANDOM%63000+2001))" - -function thread1 -{ - while true; do - echo -e "HTTP/1.1 200 OK\n\n{\"a\": 1}" | nc -l -p $1 -q 1; - done -} - -function thread2 -{ - while true; do - $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 -q "SELECT * FROM url('http://127.0.0.1:$1/', JSONEachRow, 'a int, b int default 7, c default a + b') format Values" | grep -F '(1,7,8)' && break - done -} - -# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout -export -f thread1; -export -f thread2; - -TIMEOUT=60 - -timeout $TIMEOUT bash -c "thread1 $PORT" > /dev/null 2>&1 & -PID=$! - -bash -c "thread2 $PORT" 2> /dev/null | grep -q -F '(1,7,8)' && echo "Ok" && kill -9 $PID - -wait >/dev/null 2>&1 +${CLICKHOUSE_CLIENT} --query " +SELECT * FROM url( + \$\$http://127.0.0.1:${CLICKHOUSE_PORT_HTTP}/?query=SELECT+'{\"a\":1}'\$\$, + JSONEachRow, + 'a int, b int default 7, c default a + b') +FORMAT Values" diff --git a/tests/queries/0_stateless/01634_summap_nullable.reference b/tests/queries/0_stateless/01634_summap_nullable.reference new file mode 100644 index 00000000000..babed7df00d --- /dev/null +++ b/tests/queries/0_stateless/01634_summap_nullable.reference @@ -0,0 +1,2 @@ +(['a'],[1]) +(['a','b'],[1,0]) diff --git a/tests/queries/0_stateless/01634_summap_nullable.sql b/tests/queries/0_stateless/01634_summap_nullable.sql new file mode 100644 index 00000000000..226da645e9f --- /dev/null +++ b/tests/queries/0_stateless/01634_summap_nullable.sql @@ -0,0 +1,2 @@ +SELECT sumMap(['a', 'b'], [1, NULL]); +SELECT sumMap(['a', 'b'], [1, toNullable(0)]); diff --git a/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper_long.reference b/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper_long.reference index ebb0b033d5b..05f7d08de7d 100644 --- a/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper_long.reference +++ b/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper_long.reference @@ -1,3 +1,4 @@ +~~~~source parts~~~~~ 1 1_0_0_0 1 1_1_1_0 2 2_0_0_0 @@ -10,6 +11,7 @@ 2_ 2_1_1_0 3_ 3_0_0_0 3_ 3_1_1_0 +~~~~parts after deduplication~~~~~ 1 1_0_0_0 1 1_1_1_0 2 2_0_0_0 @@ -22,6 +24,7 @@ 2_ 2_1_1_0 3_ 3_0_0_0 3_ 3_1_1_0 +~~~~parts after drop 3_1_1_0~~~~~ 1 1_0_0_0 1 1_1_1_0 2 2_0_0_0 @@ -32,6 +35,7 @@ 2_ 2_0_0_0 2_ 2_1_1_0 3_ 3_0_0_0 +~~~~parts after new part without deduplication~~~~~ 1 1_0_0_0 1 1_1_1_0 2 2_0_0_0 diff --git a/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper_long.sql b/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper_long.sql index 1aa568c1663..c77f29d89c2 100644 --- a/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper_long.sql +++ b/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper_long.sql @@ -17,25 +17,33 @@ SYSTEM STOP MERGES partitioned_table; INSERT INTO partitioned_table VALUES (1, 1, 'A'), (2, 2, 'B'), (3, 3, 'C'); INSERT INTO partitioned_table VALUES (11, 1, 'AA'), (22, 2, 'BB'), (33, 3, 'CC'); -SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name; +SELECT '~~~~source parts~~~~~'; + +SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() and active ORDER BY name; SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/' || currentDatabase() || '/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value; INSERT INTO partitioned_table VALUES (33, 3, 'CC'); -- must be deduplicated -SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name; +SELECT '~~~~parts after deduplication~~~~~'; + +SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() and active ORDER BY name; SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/' || currentDatabase() || '/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value; ALTER TABLE partitioned_table DROP PART '3_1_1_0'; -SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name; +SELECT '~~~~parts after drop 3_1_1_0~~~~~'; + +SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() and active ORDER BY name; SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/' || currentDatabase() || '/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value; INSERT INTO partitioned_table VALUES (33, 3, 'CC'); -- mustn't be deduplicated -SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name; +SELECT '~~~~parts after new part without deduplication~~~~~'; + +SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() and active ORDER BY name; SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/' || currentDatabase() || '/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value; diff --git a/tests/queries/0_stateless/01675_distributed_bytes_to_delay_insert_long.reference b/tests/queries/0_stateless/01675_distributed_bytes_to_delay_insert.reference similarity index 61% rename from tests/queries/0_stateless/01675_distributed_bytes_to_delay_insert_long.reference rename to tests/queries/0_stateless/01675_distributed_bytes_to_delay_insert.reference index 343d1f3639f..63972b90eda 100644 --- a/tests/queries/0_stateless/01675_distributed_bytes_to_delay_insert_long.reference +++ b/tests/queries/0_stateless/01675_distributed_bytes_to_delay_insert.reference @@ -1,2 +1,3 @@ max_delay_to_insert will throw +Too many bytes pending for async INSERT max_delay_to_insert will succeed diff --git a/tests/queries/0_stateless/01675_distributed_bytes_to_delay_insert.sh b/tests/queries/0_stateless/01675_distributed_bytes_to_delay_insert.sh new file mode 100755 index 00000000000..a02589dde94 --- /dev/null +++ b/tests/queries/0_stateless/01675_distributed_bytes_to_delay_insert.sh @@ -0,0 +1,125 @@ +#!/usr/bin/env bash +# Tags: long, distributed + +# NOTE: $SECONDS accuracy is second, so we need some delta, hence -1 in time conditions. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function drop_tables() +{ + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<<"drop table if exists dist_01675" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<<"drop table if exists data_01675" +} + +# +# Case 1: max_delay_to_insert will throw. +# +function test_max_delay_to_insert_will_throw() +{ + echo "max_delay_to_insert will throw" + + local max_delay_to_insert=2 + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<<"create table data_01675 (key Int) engine=Null()" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<<"create table dist_01675 (key Int) engine=Distributed(test_shard_localhost, currentDatabase(), data_01675) settings bytes_to_delay_insert=1, max_delay_to_insert=$max_delay_to_insert" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<<"system stop distributed sends dist_01675" + + local start_seconds=$SECONDS + # first batch is always OK, since there is no pending bytes yet + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<<"insert into dist_01675 select * from numbers(1) settings prefer_localhost_replica=0" + # second will fail, because of bytes_to_delay_insert=1 and max_delay_to_insert>0, + # while distributed sends is stopped. + # + # (previous block definitelly takes more, since it has header) + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<<"insert into dist_01675 select * from numbers(1) settings prefer_localhost_replica=0" |& grep -o 'Too many bytes pending for async INSERT' + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<<"system flush distributed dist_01675" + local end_seconds=$SECONDS + + if (( (end_seconds-start_seconds)<(max_delay_to_insert-1) )); then + echo "max_delay_to_insert was not satisfied ($end_seconds-$start_seconds)" + fi +} + +# +# Case 2: max_delay_to_insert will finally finished. +# +function test_max_delay_to_insert_will_succeed_once() +{ + local max_delay_to_insert=4 + local flush_delay=2 + + drop_tables + + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<<"create table data_01675 (key Int) engine=Null()" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<<"create table dist_01675 (key Int) engine=Distributed(test_shard_localhost, currentDatabase(), data_01675) settings bytes_to_delay_insert=1, max_delay_to_insert=$max_delay_to_insert" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<<"system stop distributed sends dist_01675" + + function flush_distributed_worker() + { + sleep $flush_delay + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d @- <<<"system flush distributed dist_01675" + } + flush_distributed_worker & + + local start_seconds=$SECONDS + { + # NOTE: + # ignore stderr, since it may produce exception if flushing thread will be too slow + # (this is possible on CI) + + # first batch is always OK, since there is no pending bytes yet + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1" -d @- <<<"insert into dist_01675 select * from numbers(1) settings prefer_localhost_replica=0" >& /dev/null + # second will succeed, due to SYSTEM FLUSH DISTRIBUTED in background. + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&extremes=1" -d @- <<<"insert into dist_01675 select * from numbers(1) settings prefer_localhost_replica=0" >& /dev/null + } + local end_seconds=$SECONDS + + wait + + local diff=$(( end_seconds-start_seconds )) + + if (( diff<(flush_delay-1) )); then + # this is fatal error, that should not be retriable + echo "max_delay_to_insert was not wait flush_delay ($diff)" + exit 1 + fi + + # retry the test until the diff will be satisfied + # (since we should not assume that there will be no other lags) + if (( diff>=(max_delay_to_insert-1) )); then + return 1 + fi + + return 0 +} +function test_max_delay_to_insert_will_succeed() +{ + echo "max_delay_to_insert will succeed" + + local retries=20 i=0 + while (( (i++) < retries )); do + if test_max_delay_to_insert_will_succeed_once; then + return + fi + done + + echo failed +} + +function run_test() +{ + local test_case=$1 && shift + + drop_tables + $test_case +} + +function main() +{ + run_test test_max_delay_to_insert_will_throw + run_test test_max_delay_to_insert_will_succeed + + drop_tables +} +main "$@" diff --git a/tests/queries/0_stateless/01675_distributed_bytes_to_delay_insert_long.sh b/tests/queries/0_stateless/01675_distributed_bytes_to_delay_insert_long.sh deleted file mode 100755 index e373f632155..00000000000 --- a/tests/queries/0_stateless/01675_distributed_bytes_to_delay_insert_long.sh +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, distributed - -# NOTE: $SECONDS accuracy is second, so we need some delta, hence -1 in time conditions. - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -function drop_tables() -{ - ${CLICKHOUSE_CLIENT} -nq " - drop table if exists dist_01675; - drop table if exists data_01675; - " -} - -# -# Case 1: max_delay_to_insert will throw. -# -function test_max_delay_to_insert_will_throw() -{ - echo "max_delay_to_insert will throw" - - local max_delay_to_insert=2 - ${CLICKHOUSE_CLIENT} -nq " - create table data_01675 (key Int) engine=Null(); - create table dist_01675 (key Int) engine=Distributed(test_shard_localhost, currentDatabase(), data_01675) settings bytes_to_delay_insert=1, max_delay_to_insert=$max_delay_to_insert; - system stop distributed sends dist_01675; - " - - local start_seconds=$SECONDS - ${CLICKHOUSE_CLIENT} --testmode -nq " - -- first batch is always OK, since there is no pending bytes yet - insert into dist_01675 select * from numbers(1) settings prefer_localhost_replica=0; - -- second will fail, because of bytes_to_delay_insert=1 and max_delay_to_insert>0, - -- while distributed sends is stopped. - -- - -- (previous block definitelly takes more, since it has header) - insert into dist_01675 select * from numbers(1) settings prefer_localhost_replica=0; -- { serverError 574 } - system flush distributed dist_01675; - " - local end_seconds=$SECONDS - - if (( (end_seconds-start_seconds)<(max_delay_to_insert-1) )); then - echo "max_delay_to_insert was not satisfied ($end_seconds-$start_seconds)" - fi -} - -# -# Case 2: max_delay_to_insert will finally finished. -# -function test_max_delay_to_insert_will_succeed_once() -{ - local max_delay_to_insert=4 - local flush_delay=2 - - drop_tables - - ${CLICKHOUSE_CLIENT} -nq " - create table data_01675 (key Int) engine=Null(); - create table dist_01675 (key Int) engine=Distributed(test_shard_localhost, currentDatabase(), data_01675) settings bytes_to_delay_insert=1, max_delay_to_insert=$max_delay_to_insert; - system stop distributed sends dist_01675; - " - - function flush_distributed_worker() - { - sleep $flush_delay - ${CLICKHOUSE_CLIENT} -q "system flush distributed dist_01675" - } - flush_distributed_worker & - - local start_seconds=$SECONDS - # ignore stderr, since it may produce exception if flushing thread will be too slow - # (this is possible on CI) - ${CLICKHOUSE_CLIENT} --testmode -nq " - -- first batch is always OK, since there is no pending bytes yet - insert into dist_01675 select * from numbers(1) settings prefer_localhost_replica=0; - -- second will succeed, due to SYSTEM FLUSH DISTRIBUTED in background. - insert into dist_01675 select * from numbers(1) settings prefer_localhost_replica=0; - " >& /dev/null - local end_seconds=$SECONDS - wait - - local diff=$(( end_seconds-start_seconds )) - - if (( diff<(flush_delay-1) )); then - # this is fatal error, that should not be retriable - echo "max_delay_to_insert was not wait flush_delay ($diff)" - exit 1 - fi - - # retry the test until the diff will be satisfied - # (since we cannot assume that there will be no other lags) - if (( diff>=(max_delay_to_insert-1) )); then - return 1 - fi - - return 0 -} -function test_max_delay_to_insert_will_succeed() -{ - echo "max_delay_to_insert will succeed" - - local retries=20 i=0 - while (( (i++) < retries )); do - if test_max_delay_to_insert_will_succeed_once; then - return - fi - done - - echo failed -} - -function run_test() -{ - local test_case=$1 && shift - - drop_tables - $test_case -} - -function main() -{ - run_test test_max_delay_to_insert_will_throw - run_test test_max_delay_to_insert_will_succeed - - drop_tables -} -main "$@" diff --git a/tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.sh index fb2d97b6270..1be082a6aae 100755 --- a/tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.sh +++ b/tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest +# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -20,11 +20,11 @@ function test_completion_word_client() log_user 0 set timeout 3 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } spawn bash -c "$CLICKHOUSE_CLIENT_BINARY $CLICKHOUSE_CLIENT_OPT" @@ -104,11 +104,11 @@ function test_completion_word_local() log_user 0 set timeout 3 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } spawn bash -c "$CLICKHOUSE_LOCAL" diff --git a/tests/queries/0_stateless/01710_projection_optimize_materialize.reference b/tests/queries/0_stateless/01710_projection_optimize_materialize.reference new file mode 100644 index 00000000000..24d24e52797 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_optimize_materialize.reference @@ -0,0 +1 @@ +pp 2021-10-24 474.00 B 1.56 KiB 3.38 100 1 diff --git a/tests/queries/0_stateless/01710_projection_optimize_materialize.sql b/tests/queries/0_stateless/01710_projection_optimize_materialize.sql new file mode 100644 index 00000000000..d8251aabaf6 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_optimize_materialize.sql @@ -0,0 +1,13 @@ +drop table if exists z; + +create table z (pk Int64, d Date, id UInt64, c UInt64) Engine MergeTree partition by d order by pk ; + +insert into z select number, '2021-10-24', intDiv (number, 10000), 1 from numbers(1000000); +optimize table z final; + +alter table z add projection pp (select id, sum(c) group by id); +alter table z materialize projection pp settings mutations_sync=1; + +SELECT name, partition, formatReadableSize(sum(data_compressed_bytes) AS size) AS compressed, formatReadableSize(sum(data_uncompressed_bytes) AS usize) AS uncompressed, round(usize / size, 2) AS compr_rate, sum(rows) AS rows, count() AS part_count FROM system.projection_parts WHERE database = currentDatabase() and table = 'z' AND active GROUP BY name, partition ORDER BY size DESC; + +drop table z; diff --git a/tests/queries/0_stateless/01720_country_perimeter_and_area.reference b/tests/queries/0_stateless/01720_country_perimeter_and_area.reference index 8a9690791c6..461aea090e7 100644 --- a/tests/queries/0_stateless/01720_country_perimeter_and_area.reference +++ b/tests/queries/0_stateless/01720_country_perimeter_and_area.reference @@ -1,214 +1,214 @@ -Dhekelia Sovereign Base Area 0.0186259930051051 -Kyrgyzstan 0.5868323961091907 +Dhekelia Sovereign Base Area 0.018626 +Kyrgyzstan 0.586832 ------------------------------------- -Dhekelia Sovereign Base Area 0.000003139488070896512 -Kyrgyzstan 0.004895645023822883 +Dhekelia Sovereign Base Area 0.000003 +Kyrgyzstan 0.004896 ------------------------------------- -Aruba 0.011249330810410983 -Afghanistan 0.8199216326776404 -Albania 0.17108622597702605 -Andorra 0.015145740647213184 -Ashmore and Cartier Islands 0.001111472909012953 -Austria 0.3258464621357028 -Burundi 0.1409500621452211 -Belgium 0.1794463601873955 -Benin 0.31426073515874664 -Burkina Faso 0.5144381682226761 -Bulgaria 0.3083164214454252 -Bahrain 0.02137170357214413 -Bosnia and Herzegovina 0.20611959113245232 -Bajo Nuevo Bank (Petrel Is.) 0.0001254597070361587 -Saint Barthelemy 0.0032990108720812672 -Belarus 0.42899119772830474 -Bolivia 0.9279328001326348 -Barbados 0.014116142490651021 -Bhutan 0.1601735058766338 -Botswana 0.5896697538755427 -Central African Republic 0.7760222837198817 -Switzerland 0.2318851512510408 -Clipperton Island 0.0014072924221565273 -Cameroon 0.8001045813665599 -Republic of Congo 0.6904316055863188 -Coral Sea Islands 0.00011634674137689659 -Curaçao 0.02078862020307983 -Czech Republic 0.2708588915805718 -Djibouti 0.12937731543684822 -Dominica 0.020094439807419574 -Algeria 1.1549683948032776 -Ethiopia 0.8210654364815099 -Georgia 0.26823008017781313 -Ghana 0.4056578143818251 -Gibraltar 0.0014059440610631154 -Guinea 0.6350853755877334 -Gambia 0.19279774895359095 -Guatemala 0.3030953561509038 -Guam 0.020321390076536976 -Heard Island and McDonald Islands 0.017334896920453105 -Hungary 0.2617732480910806 -Isle of Man 0.01875803631141408 -Iraq 0.5469861219502402 -Israel 0.19353851895699914 -Jamaica 0.10055860979159512 -Jersey 0.008427337812134537 -Jordan 0.2642243503964102 -Baykonur Cosmodrome 0.04482995477542441 -Siachen Glacier 0.03872116827341272 -Kosovo 0.08773172991408161 -Laos 0.6899867972760174 -Lebanon 0.09676977254650951 -Liberia 0.2961649538030388 -Libya 0.9538430912224716 -Saint Lucia 0.016786201647759867 -Liechtenstein 0.009288582116863231 -Lesotho 0.12315874900320756 -Luxembourg 0.04125996057810259 -Latvia 0.24488610945731157 -Saint Martin 0.006547834154217771 -Morocco 0.8817924249630141 -Monaco 0.0026049777439637527 -Moldova 0.20765701819586885 -Macedonia 0.1128831074330059 -Mali 1.1385970015559317 -Montenegro 0.11756794062084858 -Mongolia 1.142306166871007 -Montserrat 0.006620100691409788 -Namibia 0.843464957679987 -Niger 0.8780744302377772 -Norfolk Island 0.004912027225339993 -Niue 0.009881892958363517 -Nepal 0.4076113675280835 -Nauru 0.0031205159769295255 -Poland 0.48922069488271314 -Paraguay 0.5475256537493991 -Qatar 0.09362771431858698 -Romania 0.44095021664473105 -Rwanda 0.1293663890297039 -Western Sahara 0.4691920993279596 -Scarborough Reef 0.00019842225207367386 -South Sudan 0.7584190842556537 -Senegal 0.5883247226863264 -Serranilla Bank 0.0002389083935906293 -Singapore 0.015233384733369614 -San Marino 0.004596873449598911 -Somaliland 0.3096791489207226 -Somalia 0.6879915318072617 -Republic of Serbia 0.29677234233404165 -Suriname 0.32255243342976203 -Slovakia 0.19843599488831584 -Slovenia 0.14713148471782736 -Swaziland 0.08434161089555517 -Sint Maarten 0.0037955305365309296 -Syria 0.35675522352394456 -Chad 0.9102578296637189 -Togo 0.2600585482954555 -Uganda 0.38301730108810556 -Uruguay 0.3083564407046887 -Vatican 0.00006702452496391445 -Akrotiri Sovereign Base Area 0.013376747415600219 -Zambia 0.8807923488623808 -Zimbabwe 0.4553903789902945 +Aruba 0.011249 +Afghanistan 0.819922 +Albania 0.171086 +Andorra 0.015146 +Ashmore and Cartier Islands 0.001111 +Austria 0.325846 +Burundi 0.14095 +Belgium 0.179446 +Benin 0.314261 +Burkina Faso 0.514438 +Bulgaria 0.308316 +Bahrain 0.021372 +Bosnia and Herzegovina 0.20612 +Bajo Nuevo Bank (Petrel Is.) 0.000125 +Saint Barthelemy 0.003299 +Belarus 0.428991 +Bolivia 0.927933 +Barbados 0.014116 +Bhutan 0.160174 +Botswana 0.58967 +Central African Republic 0.776022 +Switzerland 0.231885 +Clipperton Island 0.001407 +Cameroon 0.800105 +Republic of Congo 0.690432 +Coral Sea Islands 0.000116 +Curaçao 0.020789 +Czech Republic 0.270859 +Djibouti 0.129377 +Dominica 0.020094 +Algeria 1.154968 +Ethiopia 0.821065 +Georgia 0.26823 +Ghana 0.405658 +Gibraltar 0.001406 +Guinea 0.635085 +Gambia 0.192798 +Guatemala 0.303095 +Guam 0.020321 +Heard Island and McDonald Islands 0.017335 +Hungary 0.261773 +Isle of Man 0.018758 +Iraq 0.546986 +Israel 0.193539 +Jamaica 0.100559 +Jersey 0.008427 +Jordan 0.264224 +Baykonur Cosmodrome 0.04483 +Siachen Glacier 0.038721 +Kosovo 0.087732 +Laos 0.689987 +Lebanon 0.09677 +Liberia 0.296165 +Libya 0.953843 +Saint Lucia 0.016786 +Liechtenstein 0.009289 +Lesotho 0.123159 +Luxembourg 0.04126 +Latvia 0.244886 +Saint Martin 0.006548 +Morocco 0.881792 +Monaco 0.002605 +Moldova 0.207657 +Macedonia 0.112883 +Mali 1.138597 +Montenegro 0.117568 +Mongolia 1.142306 +Montserrat 0.00662 +Namibia 0.843465 +Niger 0.878074 +Norfolk Island 0.004912 +Niue 0.009882 +Nepal 0.407611 +Nauru 0.003121 +Poland 0.489221 +Paraguay 0.547526 +Qatar 0.093628 +Romania 0.44095 +Rwanda 0.129366 +Western Sahara 0.469192 +Scarborough Reef 0.000198 +South Sudan 0.758419 +Senegal 0.588325 +Serranilla Bank 0.000239 +Singapore 0.015233 +San Marino 0.004597 +Somaliland 0.309679 +Somalia 0.687992 +Republic of Serbia 0.296772 +Suriname 0.322552 +Slovakia 0.198436 +Slovenia 0.147131 +Swaziland 0.084342 +Sint Maarten 0.003796 +Syria 0.356755 +Chad 0.910258 +Togo 0.260059 +Uganda 0.383017 +Uruguay 0.308356 +Vatican 0.000067 +Akrotiri Sovereign Base Area 0.013377 +Zambia 0.880792 +Zimbabwe 0.45539 ------------------------------------- -Aruba 0.0000041986375296795025 -Afghanistan 0.015826481758320493 -Albania 0.0006971811189621746 -Andorra 0.00001112355564980348 -Ashmore and Cartier Islands 6.66668338977609e-8 -Austria 0.0020634744883290235 -Burundi 0.000669169243101558 -Belgium 0.0007529367590741593 -Benin 0.00287239734953164 -Burkina Faso 0.006746218025419332 -Bulgaria 0.0027733372191197786 -Bahrain 0.00001443842547561405 -Bosnia and Herzegovina 0.0012742491201009779 -Bajo Nuevo Bank (Petrel Is.) 8.864825701897049e-10 -Saint Barthelemy 6.036607210116289e-7 -Belarus 0.005090738074359067 -Bolivia 0.026865324735758436 -Barbados 0.0000109856680212211 -Bhutan 0.0009961026696220909 -Botswana 0.01430200501713062 -Central African Republic 0.015290667187215962 -Switzerland 0.0010181463734151514 -Clipperton Island 1.2373029819547803e-7 -Cameroon 0.011488908713113137 -Republic of Congo 0.008534881807187833 -Coral Sea Islands 5.121674593493771e-10 -Curaçao 0.000011457378136273848 -Czech Republic 0.0019339153549488386 -Djibouti 0.000540370985929321 -Dominica 0.000018056168258583246 -Algeria 0.05696762706232162 -Ethiopia 0.02789047634482515 -Georgia 0.0017113229913929072 -Ghana 0.0059048504621945965 -Gibraltar 9.095456688875715e-8 -Guinea 0.006043151808047173 -Gambia 0.0002596816395280707 -Guatemala 0.0026901925526205263 -Guam 0.000013952443476670549 -Heard Island and McDonald Islands 0.000009688375334192321 -Hungary 0.0022899094702118978 -Isle of Man 0.00001410012284549863 -Iraq 0.010780689598789812 -Israel 0.0005400181032289429 -Jamaica 0.00027268062650994383 -Jersey 0.0000029236161155167853 -Jordan 0.002191215069390572 -Baykonur Cosmodrome 0.00015978303781425133 -Siachen Glacier 0.0000513879615262916 -Kosovo 0.0002684178325412152 -Laos 0.005637555524983489 -Lebanon 0.0002464436461544738 -Liberia 0.002357973807538481 -Libya 0.040072512808839354 -Saint Lucia 0.000014963842166249258 -Liechtenstein 0.0000033722024322722466 -Lesotho 0.0007426290112070925 -Luxembourg 0.00006405006804909529 -Latvia 0.00158313668683266 -Saint Martin 0.00000168759530251474 -Morocco 0.014595589778269167 -Monaco 4.6325700981005285e-7 -Moldova 0.0008158639460823913 -Macedonia 0.0006245180554490506 -Mali 0.03096381132470007 -Montenegro 0.00033762445623993013 -Mongolia 0.038446609480001344 -Montserrat 0.0000024620326175206004 -Namibia 0.020320978539029165 -Niger 0.02919849042641136 -Norfolk Island 0.0000010150641235563077 -Niue 0.000005450796200539049 -Nepal 0.003629565673884544 -Nauru 7.119067469952887e-7 -Poland 0.0076921097527402876 -Paraguay 0.009875843128670564 -Qatar 0.0002752610716836153 -Romania 0.005809479702080411 -Rwanda 0.0006262235765421803 -Western Sahara 0.0022344529652030694 -Scarborough Reef 2.4176335726807567e-9 -South Sudan 0.015509656314462458 -Senegal 0.00485201810074574 -Serranilla Bank 2.6035559945372385e-9 -Singapore 0.000012633505579848072 -San Marino 0.0000014830814619737624 -Somaliland 0.0041412916217828406 -Somalia 0.011674654119996183 -Republic of Serbia 0.001907268740192651 -Suriname 0.0035911641359236534 -Slovakia 0.0011901587428922095 -Slovenia 0.0004995546076509384 -Swaziland 0.00042234053226485263 -Sint Maarten 5.772865969377286e-7 -Syria 0.004581243750467663 -Chad 0.0313064894302088 -Togo 0.0014067991034602252 -Uganda 0.005985159048654327 -Uruguay 0.0043716082436750115 -Vatican 3.002600504657064e-10 -Akrotiri Sovereign Base Area 0.0000024314362587592923 -Zambia 0.018594119224502336 -Zimbabwe 0.009621356779606268 +Aruba 0.000004 +Afghanistan 0.015826 +Albania 0.000697 +Andorra 0.000011 +Ashmore and Cartier Islands 0 +Austria 0.002063 +Burundi 0.000669 +Belgium 0.000753 +Benin 0.002872 +Burkina Faso 0.006746 +Bulgaria 0.002773 +Bahrain 0.000014 +Bosnia and Herzegovina 0.001274 +Bajo Nuevo Bank (Petrel Is.) 0 +Saint Barthelemy 0.000001 +Belarus 0.005091 +Bolivia 0.026865 +Barbados 0.000011 +Bhutan 0.000996 +Botswana 0.014302 +Central African Republic 0.015291 +Switzerland 0.001018 +Clipperton Island 0 +Cameroon 0.011489 +Republic of Congo 0.008535 +Coral Sea Islands 0 +Curaçao 0.000011 +Czech Republic 0.001934 +Djibouti 0.00054 +Dominica 0.000018 +Algeria 0.056968 +Ethiopia 0.02789 +Georgia 0.001711 +Ghana 0.005905 +Gibraltar 0 +Guinea 0.006043 +Gambia 0.00026 +Guatemala 0.00269 +Guam 0.000014 +Heard Island and McDonald Islands 0.00001 +Hungary 0.00229 +Isle of Man 0.000014 +Iraq 0.010781 +Israel 0.00054 +Jamaica 0.000273 +Jersey 0.000003 +Jordan 0.002191 +Baykonur Cosmodrome 0.00016 +Siachen Glacier 0.000051 +Kosovo 0.000268 +Laos 0.005638 +Lebanon 0.000246 +Liberia 0.002358 +Libya 0.040073 +Saint Lucia 0.000015 +Liechtenstein 0.000003 +Lesotho 0.000743 +Luxembourg 0.000064 +Latvia 0.001583 +Saint Martin 0.000002 +Morocco 0.014596 +Monaco 0 +Moldova 0.000816 +Macedonia 0.000625 +Mali 0.030964 +Montenegro 0.000338 +Mongolia 0.038447 +Montserrat 0.000002 +Namibia 0.020321 +Niger 0.029198 +Norfolk Island 0.000001 +Niue 0.000005 +Nepal 0.00363 +Nauru 0.000001 +Poland 0.007692 +Paraguay 0.009876 +Qatar 0.000275 +Romania 0.005809 +Rwanda 0.000626 +Western Sahara 0.002234 +Scarborough Reef 0 +South Sudan 0.01551 +Senegal 0.004852 +Serranilla Bank 0 +Singapore 0.000013 +San Marino 0.000001 +Somaliland 0.004141 +Somalia 0.011675 +Republic of Serbia 0.001907 +Suriname 0.003591 +Slovakia 0.00119 +Slovenia 0.0005 +Swaziland 0.000422 +Sint Maarten 0.000001 +Syria 0.004581 +Chad 0.031306 +Togo 0.001407 +Uganda 0.005985 +Uruguay 0.004372 +Vatican 0 +Akrotiri Sovereign Base Area 0.000002 +Zambia 0.018594 +Zimbabwe 0.009621 ------------------------------------- diff --git a/tests/queries/0_stateless/01720_country_perimeter_and_area.sh b/tests/queries/0_stateless/01720_country_perimeter_and_area.sh index 75016ee1d1f..0080c9a1a1b 100755 --- a/tests/queries/0_stateless/01720_country_perimeter_and_area.sh +++ b/tests/queries/0_stateless/01720_country_perimeter_and_area.sh @@ -8,9 +8,9 @@ ${CLICKHOUSE_CLIENT} -q "drop table if exists country_polygons;" ${CLICKHOUSE_CLIENT} -q "create table country_polygons(name String, p Array(Array(Tuple(Float64, Float64)))) engine=MergeTree() order by tuple();" cat ${CURDIR}/country_polygons.tsv | ${CLICKHOUSE_CLIENT} -q "insert into country_polygons format TSV" -${CLICKHOUSE_CLIENT} -q "SELECT name, polygonPerimeterSpherical(p) from country_polygons" +${CLICKHOUSE_CLIENT} -q "SELECT name, round(polygonPerimeterSpherical(p), 6) from country_polygons" ${CLICKHOUSE_CLIENT} -q "SELECT '-------------------------------------'" -${CLICKHOUSE_CLIENT} -q "SELECT name, polygonAreaSpherical(p) from country_polygons" +${CLICKHOUSE_CLIENT} -q "SELECT name, round(polygonAreaSpherical(p), 6) from country_polygons" ${CLICKHOUSE_CLIENT} -q "SELECT '-------------------------------------'" ${CLICKHOUSE_CLIENT} -q "drop table if exists country_rings;" @@ -18,9 +18,9 @@ ${CLICKHOUSE_CLIENT} -q "drop table if exists country_rings;" ${CLICKHOUSE_CLIENT} -q "create table country_rings(name String, p Array(Tuple(Float64, Float64))) engine=MergeTree() order by tuple();" cat ${CURDIR}/country_rings.tsv | ${CLICKHOUSE_CLIENT} -q "insert into country_rings format TSV" -${CLICKHOUSE_CLIENT} -q "SELECT name, polygonPerimeterSpherical(p) from country_rings" +${CLICKHOUSE_CLIENT} -q "SELECT name, round(polygonPerimeterSpherical(p), 6) from country_rings" ${CLICKHOUSE_CLIENT} -q "SELECT '-------------------------------------'" -${CLICKHOUSE_CLIENT} -q "SELECT name, polygonAreaSpherical(p) from country_rings" +${CLICKHOUSE_CLIENT} -q "SELECT name, round(polygonAreaSpherical(p), 6) from country_rings" ${CLICKHOUSE_CLIENT} -q "SELECT '-------------------------------------'" ${CLICKHOUSE_CLIENT} -q "drop table if exists country_rings;" diff --git a/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect b/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect index 922a6914584..022320e2d4b 100755 --- a/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect +++ b/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect @@ -1,14 +1,13 @@ #!/usr/bin/expect -f -# Tags: no-fasttest log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 2 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql index 6e4337fc05f..220d5d91a0b 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql @@ -32,7 +32,7 @@ select '(0, 2)'; with (select currentDatabase()) as id_no select *, ignore(id_no) from dist_01756 where dummy in (0, 2); system flush logs; select query from system.query_log where - event_date = today() and + event_date >= yesterday() and event_time > now() - interval 1 hour and not is_initial_query and query not like '%system%query_log%' and @@ -51,7 +51,7 @@ select 'optimize_skip_unused_shards_rewrite_in(0, 2)'; with (select currentDatabase()) as id_02 select *, ignore(id_02) from dist_01756 where dummy in (0, 2); system flush logs; select query from system.query_log where - event_date = today() and + event_date >= yesterday() and event_time > now() - interval 1 hour and not is_initial_query and query not like '%system%query_log%' and @@ -63,7 +63,7 @@ select 'optimize_skip_unused_shards_rewrite_in(2,)'; with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2,); system flush logs; select query from system.query_log where - event_date = today() and + event_date >= yesterday() and event_time > now() - interval 1 hour and not is_initial_query and query not like '%system%query_log%' and @@ -75,7 +75,7 @@ select 'optimize_skip_unused_shards_rewrite_in(0,)'; with (select currentDatabase()) as id_0 select *, ignore(id_0) from dist_01756 where dummy in (0,); system flush logs; select query from system.query_log where - event_date = today() and + event_date >= yesterday() and event_time > now() - interval 1 hour and not is_initial_query and query not like '%system%query_log%' and diff --git a/tests/queries/0_stateless/01780_column_sparse.reference b/tests/queries/0_stateless/01780_column_sparse.reference new file mode 100644 index 00000000000..08aef433172 --- /dev/null +++ b/tests/queries/0_stateless/01780_column_sparse.reference @@ -0,0 +1,182 @@ +-- { echo } + +DROP TABLE IF EXISTS t_sparse; +DROP TABLE IF EXISTS t_sparse_1; +CREATE TABLE t_sparse (id UInt64, u UInt64, s String, arr1 Array(String), arr2 Array(UInt64)) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.1; +INSERT INTO t_sparse SELECT + number, + if (number % 10 = 0, number, 0), + if (number % 5 = 0, toString(number), ''), + if (number % 7 = 0, arrayMap(x -> toString(x), range(number % 10)), []), + if (number % 12 = 0, range(number % 10), []) +FROM numbers (200); +SELECT column, serialization_kind FROM system.parts_columns +WHERE table = 't_sparse' AND database = currentDatabase() +ORDER BY column; +arr1 Default +arr2 Default +id Default +s Sparse +u Sparse +SELECT * FROM t_sparse WHERE u != 0 ORDER BY id; +10 10 10 [] [] +20 20 20 [] [] +30 30 30 [] [] +40 40 40 [] [] +50 50 50 [] [] +60 60 60 [] [] +70 70 70 [] [] +80 80 80 [] [] +90 90 90 [] [] +100 100 100 [] [] +110 110 110 [] [] +120 120 120 [] [] +130 130 130 [] [] +140 140 140 [] [] +150 150 150 [] [] +160 160 160 [] [] +170 170 170 [] [] +180 180 180 [] [] +190 190 190 [] [] +SELECT * FROM t_sparse WHERE s != '' ORDER BY id; +0 0 0 [] [] +5 0 5 [] [] +10 10 10 [] [] +15 0 15 [] [] +20 20 20 [] [] +25 0 25 [] [] +30 30 30 [] [] +35 0 35 ['0','1','2','3','4'] [] +40 40 40 [] [] +45 0 45 [] [] +50 50 50 [] [] +55 0 55 [] [] +60 60 60 [] [] +65 0 65 [] [] +70 70 70 [] [] +75 0 75 [] [] +80 80 80 [] [] +85 0 85 [] [] +90 90 90 [] [] +95 0 95 [] [] +100 100 100 [] [] +105 0 105 ['0','1','2','3','4'] [] +110 110 110 [] [] +115 0 115 [] [] +120 120 120 [] [] +125 0 125 [] [] +130 130 130 [] [] +135 0 135 [] [] +140 140 140 [] [] +145 0 145 [] [] +150 150 150 [] [] +155 0 155 [] [] +160 160 160 [] [] +165 0 165 [] [] +170 170 170 [] [] +175 0 175 ['0','1','2','3','4'] [] +180 180 180 [] [] +185 0 185 [] [] +190 190 190 [] [] +195 0 195 [] [] +SELECT * FROM t_sparse WHERE arr1 != [] ORDER BY id; +7 0 ['0','1','2','3','4','5','6'] [] +14 0 ['0','1','2','3'] [] +21 0 ['0'] [] +28 0 ['0','1','2','3','4','5','6','7'] [] +35 0 35 ['0','1','2','3','4'] [] +42 0 ['0','1'] [] +49 0 ['0','1','2','3','4','5','6','7','8'] [] +56 0 ['0','1','2','3','4','5'] [] +63 0 ['0','1','2'] [] +77 0 ['0','1','2','3','4','5','6'] [] +84 0 ['0','1','2','3'] [0,1,2,3] +91 0 ['0'] [] +98 0 ['0','1','2','3','4','5','6','7'] [] +105 0 105 ['0','1','2','3','4'] [] +112 0 ['0','1'] [] +119 0 ['0','1','2','3','4','5','6','7','8'] [] +126 0 ['0','1','2','3','4','5'] [] +133 0 ['0','1','2'] [] +147 0 ['0','1','2','3','4','5','6'] [] +154 0 ['0','1','2','3'] [] +161 0 ['0'] [] +168 0 ['0','1','2','3','4','5','6','7'] [0,1,2,3,4,5,6,7] +175 0 175 ['0','1','2','3','4'] [] +182 0 ['0','1'] [] +189 0 ['0','1','2','3','4','5','6','7','8'] [] +196 0 ['0','1','2','3','4','5'] [] +SELECT * FROM t_sparse WHERE arr2 != [] ORDER BY id; +12 0 [] [0,1] +24 0 [] [0,1,2,3] +36 0 [] [0,1,2,3,4,5] +48 0 [] [0,1,2,3,4,5,6,7] +72 0 [] [0,1] +84 0 ['0','1','2','3'] [0,1,2,3] +96 0 [] [0,1,2,3,4,5] +108 0 [] [0,1,2,3,4,5,6,7] +132 0 [] [0,1] +144 0 [] [0,1,2,3] +156 0 [] [0,1,2,3,4,5] +168 0 ['0','1','2','3','4','5','6','7'] [0,1,2,3,4,5,6,7] +192 0 [] [0,1] +SELECT sum(u) FROM t_sparse; +1900 +SELECT sum(u) FROM t_sparse GROUP BY id % 7; +210 +360 +300 +240 +190 +330 +270 +SELECT arrayFilter(x -> x % 2 = 1, arr2) FROM t_sparse WHERE arr2 != [] LIMIT 5; +[1] +[1,3] +[1,3,5] +[1,3,5,7] +[1] +CREATE TABLE t_sparse_1 (id UInt64, v Int64) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS ratio_of_defaults_for_sparse_serialization = 0; +INSERT INTO t_sparse_1 VALUES (1, 6), (2, 1), (3, 0), (4, -1), (5, 0), (6, 0), (7, -2), (8, 0), (9, 0), (10, 4), (11, 0); +SELECT * FROM t_sparse_1 ORDER BY v; +7 -2 +4 -1 +3 0 +5 0 +6 0 +8 0 +9 0 +11 0 +2 1 +10 4 +1 6 +SELECT * FROM t_sparse_1 ORDER BY v DESC; +1 6 +10 4 +2 1 +3 0 +5 0 +6 0 +8 0 +9 0 +11 0 +4 -1 +7 -2 +SELECT * FROM t_sparse_1 ORDER BY v, id LIMIT 5; +7 -2 +4 -1 +3 0 +5 0 +6 0 +SELECT * FROM t_sparse_1 ORDER BY v DESC, id LIMIT 5; +1 6 +10 4 +2 1 +3 0 +5 0 +DROP TABLE t_sparse; +DROP TABLE t_sparse_1; diff --git a/tests/queries/0_stateless/01780_column_sparse.sql b/tests/queries/0_stateless/01780_column_sparse.sql new file mode 100644 index 00000000000..480321c6d14 --- /dev/null +++ b/tests/queries/0_stateless/01780_column_sparse.sql @@ -0,0 +1,44 @@ +-- { echo } + +DROP TABLE IF EXISTS t_sparse; +DROP TABLE IF EXISTS t_sparse_1; + +CREATE TABLE t_sparse (id UInt64, u UInt64, s String, arr1 Array(String), arr2 Array(UInt64)) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.1; + +INSERT INTO t_sparse SELECT + number, + if (number % 10 = 0, number, 0), + if (number % 5 = 0, toString(number), ''), + if (number % 7 = 0, arrayMap(x -> toString(x), range(number % 10)), []), + if (number % 12 = 0, range(number % 10), []) +FROM numbers (200); + +SELECT column, serialization_kind FROM system.parts_columns +WHERE table = 't_sparse' AND database = currentDatabase() +ORDER BY column; + +SELECT * FROM t_sparse WHERE u != 0 ORDER BY id; +SELECT * FROM t_sparse WHERE s != '' ORDER BY id; +SELECT * FROM t_sparse WHERE arr1 != [] ORDER BY id; +SELECT * FROM t_sparse WHERE arr2 != [] ORDER BY id; + +SELECT sum(u) FROM t_sparse; +SELECT sum(u) FROM t_sparse GROUP BY id % 7; + +SELECT arrayFilter(x -> x % 2 = 1, arr2) FROM t_sparse WHERE arr2 != [] LIMIT 5; + +CREATE TABLE t_sparse_1 (id UInt64, v Int64) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS ratio_of_defaults_for_sparse_serialization = 0; + +INSERT INTO t_sparse_1 VALUES (1, 6), (2, 1), (3, 0), (4, -1), (5, 0), (6, 0), (7, -2), (8, 0), (9, 0), (10, 4), (11, 0); + +SELECT * FROM t_sparse_1 ORDER BY v; +SELECT * FROM t_sparse_1 ORDER BY v DESC; +SELECT * FROM t_sparse_1 ORDER BY v, id LIMIT 5; +SELECT * FROM t_sparse_1 ORDER BY v DESC, id LIMIT 5; + +DROP TABLE t_sparse; +DROP TABLE t_sparse_1; diff --git a/tests/queries/0_stateless/01780_column_sparse_alter.reference b/tests/queries/0_stateless/01780_column_sparse_alter.reference new file mode 100644 index 00000000000..cec7af647b3 --- /dev/null +++ b/tests/queries/0_stateless/01780_column_sparse_alter.reference @@ -0,0 +1,7 @@ +id Default +u Sparse +s Sparse +182 155 +id Default +t Sparse +182 diff --git a/tests/queries/0_stateless/01780_column_sparse_alter.sql b/tests/queries/0_stateless/01780_column_sparse_alter.sql new file mode 100644 index 00000000000..444a1f9cf43 --- /dev/null +++ b/tests/queries/0_stateless/01780_column_sparse_alter.sql @@ -0,0 +1,26 @@ +SET mutations_sync = 2; + +DROP TABLE IF EXISTS t_sparse_alter; + +CREATE TABLE t_sparse_alter (id UInt64, u UInt64, s String) +ENGINE = MergeTree ORDER BY id +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.5; + +INSERT INTO t_sparse_alter SELECT + number, + if (number % 11 = 0, number, 0), + if (number % 13 = 0, toString(number), '') +FROM numbers(2000); + +SELECT column, serialization_kind FROM system.parts_columns WHERE database = currentDatabase() AND table = 't_sparse_alter' AND active ORDER BY name; + +SELECT uniqExact(u), uniqExact(s) FROM t_sparse_alter; + +ALTER TABLE t_sparse_alter DROP COLUMN s, RENAME COLUMN u TO t; +ALTER TABLE t_sparse_alter MODIFY COLUMN t UInt16; + +SELECT column, serialization_kind FROM system.parts_columns WHERE database = currentDatabase() AND table = 't_sparse_alter' AND active ORDER BY name; + +SELECT uniqExact(t) FROM t_sparse_alter; + +DROP TABLE t_sparse_alter; diff --git a/tests/queries/0_stateless/01780_column_sparse_distinct.reference b/tests/queries/0_stateless/01780_column_sparse_distinct.reference new file mode 100644 index 00000000000..bb0cebc6540 --- /dev/null +++ b/tests/queries/0_stateless/01780_column_sparse_distinct.reference @@ -0,0 +1,7 @@ +all_1_1_0 v Default +all_2_2_0 v Sparse +0 +1 +2 +3 +4 diff --git a/tests/queries/0_stateless/01780_column_sparse_distinct.sql b/tests/queries/0_stateless/01780_column_sparse_distinct.sql new file mode 100644 index 00000000000..502ca7600d4 --- /dev/null +++ b/tests/queries/0_stateless/01780_column_sparse_distinct.sql @@ -0,0 +1,20 @@ +DROP TABLE IF EXISTS t_sparse_distinct; + +CREATE TABLE t_sparse_distinct (id UInt32, v UInt64) +ENGINE = MergeTree +ORDER BY id +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; + +SYSTEM STOP MERGES t_sparse_distinct; + +INSERT INTO t_sparse_distinct SELECT number, number % 5 FROM numbers(100000); +INSERT INTO t_sparse_distinct SELECT number, number % 100 = 0 FROM numbers(100000); + +SELECT name, column, serialization_kind +FROM system.parts_columns +WHERE table = 't_sparse_distinct' AND database = currentDatabase() AND column = 'v' +ORDER BY name; + +SELECT DISTINCT v FROM t_sparse_distinct ORDER BY v; + +DROP TABLE t_sparse_distinct; diff --git a/tests/queries/0_stateless/01780_column_sparse_full.reference b/tests/queries/0_stateless/01780_column_sparse_full.reference new file mode 100644 index 00000000000..4d2d0a58798 --- /dev/null +++ b/tests/queries/0_stateless/01780_column_sparse_full.reference @@ -0,0 +1,110 @@ +all_1_1_0 id Default +all_1_1_0 s Sparse +all_1_1_0 u Sparse +all_2_2_0 id Default +all_2_2_0 s Default +all_2_2_0 u Default +0 0 +0 0 +1 0 +1 1 +====== +0 0 +0 0 +1 0 +1 1 +====== +990 990 +980 980 980 +970 970 +====== +990 990 +980 980 980 +970 970 +====== +0 58413 +1 57920 +2 57917 +====== +507 +====== +0 [0,2,1,3] +1 [0,2,1,3] +2 [0,2,1,3] +3 [0,2,1,3] +4 [0,2,1,3] +8 +====== +0 0 0 +0 0 0 +0 0 0 +0 0 0 +1 1 1 +====== +58413 +57920 +57917 + +174250 +====== +174250 +58413 +57920 +57917 +====== +174250 +58413 +57920 +57917 +====== +508413 +57920 +57917 +====== +1075 +====== +1077 +====== +0 +1 +2 +3 +4 +====== +0 0 0 +0 0 0 +1 0 +2 0 +3 0 +====== +0 0 0 +0 0 0 +1 0 +1 1 1 +2 0 +====== +0 0 0 +0 0 0 +1 0 +2 0 +2 2 2 +====== +0 0 0 +0 0 0 +1 1 1 +1 0 +2 2 2 +====== +0 0 0 +0 0 0 +0 0 0 +0 0 0 +0 0 0 +====== +id Default +s Sparse +u Sparse +====== +990 990 +980 980 980 +970 970 diff --git a/tests/queries/0_stateless/01780_column_sparse_full.sql b/tests/queries/0_stateless/01780_column_sparse_full.sql new file mode 100644 index 00000000000..af6fde116d9 --- /dev/null +++ b/tests/queries/0_stateless/01780_column_sparse_full.sql @@ -0,0 +1,105 @@ +-- This test checks, that common SQL operations work +-- with mixed columns (sparse and full) in table. + +DROP TABLE IF EXISTS t_sparse_full; + +CREATE TABLE t_sparse_full (id UInt64, u UInt64, s String) +ENGINE = MergeTree ORDER BY id +SETTINGS index_granularity = 32, +ratio_of_defaults_for_sparse_serialization = 0.1; + +SYSTEM STOP MERGES t_sparse_full; + +INSERT INTO t_sparse_full +SELECT + number, + if (number % 10 = 0, number, 0), + if (number % 7 = 0, toString(number), '') +FROM numbers(1000); + +INSERT INTO t_sparse_full +SELECT + number, + number, + toString(number) +FROM numbers(500); + +SELECT name, column, serialization_kind +FROM system.parts_columns WHERE table = 't_sparse_full' AND database = currentDatabase() AND active +ORDER BY name, column; + +SELECT id, u FROM t_sparse_full ORDER BY id, u LIMIT 4; +SELECT '======'; +SELECT id, u FROM t_sparse_full ORDER BY id, u LIMIT 4 SETTINGS optimize_read_in_order = 0; +SELECT '======'; +SELECT id, u, s FROM t_sparse_full ORDER BY u DESC LIMIT 3; +SELECT '======'; +SELECT id, u, s FROM t_sparse_full WHERE u != 0 ORDER BY u DESC LIMIT 3; +SELECT '======'; +SELECT id % 3 AS k, sum(u) FROM t_sparse_full WHERE u != 0 GROUP BY k ORDER BY k; +SELECT '======'; +SELECT uniqExact(u) FROM t_sparse_full WHERE s != ''; +SELECT '======'; +SELECT toUInt32(s) % 5 AS k, groupUniqArray(u % 4) FROM t_sparse_full WHERE s != '' GROUP BY k ORDER BY k; +SELECT max(range(id % 10)[u]) FROM t_sparse_full; +SELECT '======'; +SELECT id, u, s FROM remote('127.0.0.{1,2}', currentDatabase(), t_sparse_full) ORDER BY id LIMIT 5; +SELECT '======'; +SELECT sum(u) FROM t_sparse_full GROUP BY id % 3 AS k WITH TOTALS ORDER BY k; +SELECT '======'; +SELECT sum(u) FROM t_sparse_full GROUP BY id % 3 AS k WITH ROLLUP ORDER BY k; +SELECT '======'; +SELECT sum(u) FROM t_sparse_full GROUP BY id % 3 AS k WITH CUBE ORDER BY k; +SELECT '======'; +SELECT sum(id) FROM t_sparse_full GROUP BY u % 3 AS k ORDER BY k; +SELECT '======'; +SELECT count() FROM t_sparse_full WHERE u % 4 = 0; +SELECT '======'; +SELECT count() FROM t_sparse_full WHERE u IN (SELECT u FROM t_sparse_full WHERE id % 4 = 2); +SELECT '======'; +SELECT DISTINCT u FROM t_sparse_full ORDER BY id LIMIT 5; + +SELECT '======'; + +SELECT id, u, s FROM t_sparse_full INNER JOIN +( + SELECT number * 3 AS u FROM numbers(10) +) AS t1 USING(u) ORDER BY id, u, s LIMIT 5; + +SELECT '======'; + +SELECT id, u, s FROM t_sparse_full FULL JOIN +( + SELECT number * 3 AS u FROM numbers(10) +) AS t1 USING(u) ORDER BY id, u, s LIMIT 5; + +SELECT '======'; + +SELECT id, u, s FROM (SELECT number * 2 AS u FROM numbers(10)) AS t1 +INNER JOIN t_sparse_full USING(u) ORDER BY id, u, s LIMIT 5; + +SELECT '======'; + +SELECT id, u, s FROM (SELECT number * 2 AS u FROM numbers(10)) AS t1 +FULL JOIN t_sparse_full USING(u) ORDER BY id LIMIT 5; + +SELECT '======'; + +SELECT id, u, s FROM (SELECT u FROM t_sparse_full) AS t1 +FULL JOIN t_sparse_full USING(u) ORDER BY id, u, s LIMIT 5; + +SYSTEM START MERGES t_sparse_full; + +OPTIMIZE TABLE t_sparse_full FINAL; + +SELECT '======'; + +SELECT column, serialization_kind +FROM system.parts_columns WHERE table = 't_sparse_full' AND database = currentDatabase() AND active +ORDER BY name, column; + +SELECT '======'; + +SELECT id, u, s FROM t_sparse_full ORDER BY u DESC LIMIT 3; + +DROP TABLE t_sparse_full; diff --git a/tests/queries/0_stateless/01780_column_sparse_pk.reference b/tests/queries/0_stateless/01780_column_sparse_pk.reference new file mode 100644 index 00000000000..11bb0471689 --- /dev/null +++ b/tests/queries/0_stateless/01780_column_sparse_pk.reference @@ -0,0 +1,15 @@ +2 +2 e +0 a +0 b +3 f +200 84 +200 84 +800 167 +800 167 +\N +\N +\N +[] +[] +[] diff --git a/tests/queries/0_stateless/01780_column_sparse_pk.sql b/tests/queries/0_stateless/01780_column_sparse_pk.sql new file mode 100644 index 00000000000..63ed9e99a87 --- /dev/null +++ b/tests/queries/0_stateless/01780_column_sparse_pk.sql @@ -0,0 +1,43 @@ +DROP TABLE IF EXISTS t_sparse_pk; +DROP TABLE IF EXISTS t_full_pk; + +CREATE TABLE t_sparse_pk (k UInt64, s String) +ENGINE = MergeTree ORDER BY k +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.0, index_granularity = 1; + +INSERT INTO t_sparse_pk VALUES (0, 'a'), (0, 'b'), (1, ''), (2, ''), (2, 'e'), (3, 'f'), (4, 'g'); + +SET force_primary_key = 1; + +SELECT k, s FROM t_sparse_pk WHERE k = 2 ORDER BY k, s; +SELECT k, s FROM t_sparse_pk WHERE k = 0 OR k = 3 ORDER BY k, s; + +DROP TABLE IF EXISTS t_sparse_pk; + +CREATE TABLE t_sparse_pk (k UInt64, v UInt64 CODEC(NONE)) +ENGINE = MergeTree ORDER BY k +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.0, index_granularity = 30; + +CREATE TABLE t_full_pk (k UInt64, v UInt64) +ENGINE = MergeTree ORDER BY k +SETTINGS ratio_of_defaults_for_sparse_serialization = 1.1, index_granularity = 30; + +INSERT INTO t_sparse_pk SELECT number % 10, number % 4 = 0 FROM numbers(1000); +INSERT INTO t_full_pk SELECT number % 10, number % 4 = 0 FROM numbers(1000); + +INSERT INTO t_sparse_pk SELECT number % 10, number % 6 = 0 FROM numbers(1000); +INSERT INTO t_full_pk SELECT number % 10, number % 6 = 0 FROM numbers(1000); + +SELECT count(v), sum(v) FROM t_sparse_pk WHERE k = 0; +SELECT count(v), sum(v) FROM t_full_pk WHERE k = 0; + +SELECT count(v), sum(v) FROM t_sparse_pk WHERE k = 0 OR k = 3 OR k = 7 OR k = 8; +SELECT count(v), sum(v) FROM t_full_pk WHERE k = 0 OR k = 3 OR k = 7 OR k = 8; + +SET force_primary_key = 0; + +SELECT (k = NULL) OR (k = 1000) FROM t_sparse_pk LIMIT 3; +SELECT range(k) FROM t_sparse_pk LIMIT 3; + +DROP TABLE IF EXISTS t_sparse_pk; +DROP TABLE IF EXISTS t_full_pk; diff --git a/tests/queries/0_stateless/01780_column_sparse_tuple.reference b/tests/queries/0_stateless/01780_column_sparse_tuple.reference new file mode 100644 index 00000000000..22337838cff --- /dev/null +++ b/tests/queries/0_stateless/01780_column_sparse_tuple.reference @@ -0,0 +1,66 @@ +id [] [] [] +t ['a','s'] ['UInt64','String'] ['Sparse','Default'] +(0,'a') +(0,'aa') +(0,'aaa') +(0,'aaaa') +(0,'aaaaa') +(20,'a') +(40,'a') +(60,'a') +(80,'a') +(100,'a') +(20,'a') +(40,'a') +(60,'a') +(80,'a') +(100,'a') +0 +0 +0 +0 +0 +20 +40 +60 +80 +100 +20 +40 +60 +80 +100 +a +aa +aaa +aaaa +aaaaa +a +a +a +a +a +id [] [] [] +t ['a','b','b.u','b.s'] ['UInt64','Tuple(u UInt32, s String)','UInt32','String'] ['Sparse','Default','Sparse','Default'] +0 +0 +0 +60 +0 +a +aa +aaa +aaaa +aaaaa +aaaaaa +a +aaaaaa +a +aaaaaa +id [] [] [] +t ['a','b','b.u','b.s'] ['UInt64','Tuple(u UInt32, s String)','UInt32','String'] ['Sparse','Default','Sparse','Default'] +aaaaaa +a +aaaaaa +a +aaaaaa diff --git a/tests/queries/0_stateless/01780_column_sparse_tuple.sql b/tests/queries/0_stateless/01780_column_sparse_tuple.sql new file mode 100644 index 00000000000..da679f2c7eb --- /dev/null +++ b/tests/queries/0_stateless/01780_column_sparse_tuple.sql @@ -0,0 +1,53 @@ +DROP TABLE IF EXISTS sparse_tuple; + +CREATE TABLE sparse_tuple (id UInt64, t Tuple(a UInt64, s String)) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.5; + +INSERT INTO sparse_tuple SELECT number, (if (number % 20 = 0, number, 0), repeat('a', number % 10 + 1)) FROM numbers(1000); + +SELECT column, subcolumns.names, subcolumns.types, subcolumns.serializations +FROM system.parts_columns +WHERE table = 'sparse_tuple' AND database = currentDatabase() +ORDER BY column; + +SELECT t FROM sparse_tuple ORDER BY id LIMIT 5; +SELECT t FROM sparse_tuple WHERE t.a != 0 ORDER BY id LIMIT 5; +SELECT t FROM sparse_tuple WHERE t.a != 0 ORDER BY t.a LIMIT 5; + +SELECT t.a FROM sparse_tuple ORDER BY id LIMIT 5; +SELECT t.a FROM sparse_tuple WHERE t.a != 0 ORDER BY id LIMIT 5; +SELECT t.a FROM sparse_tuple WHERE t.a != 0 ORDER BY t.a LIMIT 5; + +SELECT t.s FROM sparse_tuple ORDER BY id LIMIT 5; +SELECT t.s FROM sparse_tuple WHERE t.a != 0 ORDER BY id LIMIT 5; + +DROP TABLE IF EXISTS sparse_tuple; + +CREATE TABLE sparse_tuple (id UInt64, t Tuple(a UInt64, b Tuple(u UInt32, s String))) +ENGINE = MergeTree ORDER BY tuple() +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.5; + +INSERT INTO sparse_tuple SELECT number, (if (number % 20 = 0, number, 0), (if (number % 15 = 0, number, 0), repeat('a', number % 10 + 1))) FROM numbers(1000); + +SELECT column, subcolumns.names, subcolumns.types, subcolumns.serializations +FROM system.parts_columns +WHERE table = 'sparse_tuple' AND database = currentDatabase() +ORDER BY column; + +SELECT t.a FROM sparse_tuple WHERE t.b.u != 0 ORDER BY id LIMIT 5; + +SELECT t.b.s FROM sparse_tuple ORDER BY id LIMIT 5; +SELECT t.b.s FROM sparse_tuple WHERE t.b.u != 0 ORDER BY id LIMIT 5; + +DETACH TABLE sparse_tuple; +ATTACH TABLE sparse_tuple; + +SELECT column, subcolumns.names, subcolumns.types, subcolumns.serializations +FROM system.parts_columns +WHERE table = 'sparse_tuple' AND database = currentDatabase() +ORDER BY column; + +SELECT t.b.s FROM sparse_tuple WHERE t.b.u != 0 ORDER BY id LIMIT 5; + +DROP TABLE IF EXISTS sparse_tuple; diff --git a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh index a2945de5b0c..c5aaa794ac9 100755 --- a/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh +++ b/tests/queries/0_stateless/01810_max_part_removal_threads_long.sh @@ -20,7 +20,7 @@ $CLICKHOUSE_CLIENT -nm -q """ insert into data_01810 select * from numbers(50); drop table data_01810 settings log_queries=1; system flush logs; - select throwIf(length(thread_ids)<50) from system.query_log where event_date = today() and current_database = currentDatabase() and query = 'drop table data_01810 settings log_queries=1;' and type = 'QueryFinish' format Null; + select throwIf(length(thread_ids)<50) from system.query_log where event_date >= yesterday() and current_database = currentDatabase() and query = 'drop table data_01810 settings log_queries=1;' and type = 'QueryFinish' format Null; """ # ReplicatedMergeTree @@ -31,7 +31,7 @@ $CLICKHOUSE_CLIENT -nm -q """ insert into rep_data_01810 select * from numbers(50); drop table rep_data_01810 settings log_queries=1; system flush logs; - select throwIf(length(thread_ids)<50) from system.query_log where event_date = today() and current_database = currentDatabase() and query = 'drop table rep_data_01810 settings log_queries=1;' and type = 'QueryFinish' format Null; + select throwIf(length(thread_ids)<50) from system.query_log where event_date >= yesterday() and current_database = currentDatabase() and query = 'drop table rep_data_01810 settings log_queries=1;' and type = 'QueryFinish' format Null; """ $CLICKHOUSE_CLIENT -nm -q "drop database ordinary_$CLICKHOUSE_DATABASE" diff --git a/tests/queries/0_stateless/01814_distributed_push_down_limit.sh b/tests/queries/0_stateless/01814_distributed_push_down_limit.sh index 81ed4568092..1412ea3be65 100755 --- a/tests/queries/0_stateless/01814_distributed_push_down_limit.sh +++ b/tests/queries/0_stateless/01814_distributed_push_down_limit.sh @@ -69,7 +69,7 @@ function test_distributed_push_down_limit_with_query_log() system flush logs; select read_rows from system.query_log where - event_date = today() + event_date >= yesterday() and query_kind = 'Select' /* exclude DESC TABLE */ and initial_query_id = '$query_id' and initial_query_id != query_id; " | xargs # convert new lines to spaces diff --git a/tests/queries/0_stateless/01821_to_date_time_ubsan.reference b/tests/queries/0_stateless/01821_to_date_time_ubsan.reference index e69de29bb2d..0a762ec3b77 100644 --- a/tests/queries/0_stateless/01821_to_date_time_ubsan.reference +++ b/tests/queries/0_stateless/01821_to_date_time_ubsan.reference @@ -0,0 +1,2 @@ +2283-11-11 23:48:05.4775806 +2283-11-11 23:52:48.54775806 diff --git a/tests/queries/0_stateless/01821_to_date_time_ubsan.sql b/tests/queries/0_stateless/01821_to_date_time_ubsan.sql index 74226fc221f..377291e015f 100644 --- a/tests/queries/0_stateless/01821_to_date_time_ubsan.sql +++ b/tests/queries/0_stateless/01821_to_date_time_ubsan.sql @@ -1,2 +1,2 @@ -SELECT toDateTime('9223372036854775806', 7); -- { serverError 407 } -SELECT toDateTime('9223372036854775806', 8); -- { serverError 407 } +SELECT toDateTime('9223372036854775806', 7, 'Europe/Moscow'); +SELECT toDateTime('9223372036854775806', 8, 'Europe/Moscow'); diff --git a/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect b/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect index 3003a0de42d..d5ce4c3cbf2 100755 --- a/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect +++ b/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect @@ -1,14 +1,14 @@ #!/usr/bin/expect -f -# Tags: long, no-fasttest +# Tags: long log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/01933_client_replxx_convert_history.expect b/tests/queries/0_stateless/01933_client_replxx_convert_history.expect index 664c3f06d20..c5645179ab3 100755 --- a/tests/queries/0_stateless/01933_client_replxx_convert_history.expect +++ b/tests/queries/0_stateless/01933_client_replxx_convert_history.expect @@ -1,15 +1,15 @@ #!/usr/bin/expect -f -# Tags: no-parallel, no-fasttest +# Tags: no-parallel # Tag no-parallel: Uses non unique history file log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index 402ad9a1f35..2f74b6e33ae 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -7,11 +7,11 @@ log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/02003_memory_limit_in_client.expect b/tests/queries/0_stateless/02003_memory_limit_in_client.expect index 47ac4926537..29701f49746 100755 --- a/tests/queries/0_stateless/02003_memory_limit_in_client.expect +++ b/tests/queries/0_stateless/02003_memory_limit_in_client.expect @@ -8,11 +8,11 @@ log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/02004_invalid_partition_mutation_stuck.sql b/tests/queries/0_stateless/02004_invalid_partition_mutation_stuck.sql index 481a5565095..71c8b9af652 100644 --- a/tests/queries/0_stateless/02004_invalid_partition_mutation_stuck.sql +++ b/tests/queries/0_stateless/02004_invalid_partition_mutation_stuck.sql @@ -28,6 +28,6 @@ PARTITION BY p ORDER BY t SETTINGS number_of_free_entries_in_pool_to_execute_mutation=0; INSERT INTO data VALUES (1, now()); -ALTER TABLE data MATERIALIZE INDEX idx IN PARTITION ID 'NO_SUCH_PART'; -- { serverError 341 } +ALTER TABLE data MATERIALIZE INDEX idx IN PARTITION ID 'NO_SUCH_PART'; -- { serverError 248 } ALTER TABLE data MATERIALIZE INDEX idx IN PARTITION ID '1'; ALTER TABLE data MATERIALIZE INDEX idx IN PARTITION ID '2'; diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.reference b/tests/queries/0_stateless/02006_test_positional_arguments.reference index 7b75ab43430..5fc070ffd0b 100644 --- a/tests/queries/0_stateless/02006_test_positional_arguments.reference +++ b/tests/queries/0_stateless/02006_test_positional_arguments.reference @@ -46,22 +46,6 @@ select x1, x2, x3 from test order by 3 limit 1 by 1; 100 100 1 10 1 10 1 10 100 -explain syntax select x3, x2, x1 from test order by 1 + 1; -SELECT - x3, - x2, - x1 -FROM test -ORDER BY x3 + x3 ASC -explain syntax select x3, x2, x1 from test order by (1 + 1) * 3; -SELECT - x3, - x2, - x1 -FROM test -ORDER BY (x3 + x3) * x1 ASC -select x2, x1 from test group by x2 + x1; -- { serverError 215 } -select x2, x1 from test group by 1 + 2; -- { serverError 215 } explain syntax select x3, x2, x1 from test order by 1; SELECT x3, @@ -110,27 +94,6 @@ GROUP BY x2 select max(x1), x2 from test group by 1, 2; -- { serverError 43 } select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43 } -select x1 + x2, x3 from test group by x1 + x2, x3; -11 100 -200 1 -11 200 -11 10 -select x3, x2, x1 from test order by x3 * 2, x2, x1; -- check x3 * 2 does not become x3 * x2 -1 100 100 -1 100 100 -10 1 10 -100 10 1 -200 1 10 -200 10 1 -explain syntax select x1, x3 from test group by 1 + 2, 1, 2; -SELECT - x1, - x3 -FROM test -GROUP BY - x1 + x3, - x1, - x3 explain syntax select x1 + x3, x3 from test group by 1, 2; SELECT x1 + x3, @@ -152,3 +115,5 @@ SELECT 1 + 1 AS a GROUP BY a select substr('aaaaaaaaaaaaaa', 8) as a group by a; aaaaaaa +select substr('aaaaaaaaaaaaaa', 8) as a group by substr('aaaaaaaaaaaaaa', 8); +aaaaaaa diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.sql b/tests/queries/0_stateless/02006_test_positional_arguments.sql index 3ba01b47efa..3a2cf76f6c4 100644 --- a/tests/queries/0_stateless/02006_test_positional_arguments.sql +++ b/tests/queries/0_stateless/02006_test_positional_arguments.sql @@ -22,12 +22,6 @@ select x1, x2, x3 from test order by 3 limit 1 by 3; select x1, x2, x3 from test order by x3 limit 1 by x1; select x1, x2, x3 from test order by 3 limit 1 by 1; -explain syntax select x3, x2, x1 from test order by 1 + 1; -explain syntax select x3, x2, x1 from test order by (1 + 1) * 3; - -select x2, x1 from test group by x2 + x1; -- { serverError 215 } -select x2, x1 from test group by 1 + 2; -- { serverError 215 } - explain syntax select x3, x2, x1 from test order by 1; explain syntax select x3 + 1, x2, x1 from test order by 1; explain syntax select x3, x3 - x2, x2, x1 from test order by 2; @@ -37,11 +31,7 @@ explain syntax select 1 + greatest(x1, 1), x2 from test group by 1, 2; select max(x1), x2 from test group by 1, 2; -- { serverError 43 } select 1 + max(x1), x2 from test group by 1, 2; -- { serverError 43 } -select x1 + x2, x3 from test group by x1 + x2, x3; -select x3, x2, x1 from test order by x3 * 2, x2, x1; -- check x3 * 2 does not become x3 * x2 - -explain syntax select x1, x3 from test group by 1 + 2, 1, 2; explain syntax select x1 + x3, x3 from test group by 1, 2; create table test2(x1 Int, x2 Int, x3 Int) engine=Memory; @@ -52,3 +42,5 @@ select a, b, c, d, e, f from (select 44 a, 88 b, 13 c, 14 d, 15 e, 16 f) t grou explain syntax select plus(1, 1) as a group by a; select substr('aaaaaaaaaaaaaa', 8) as a group by a; +select substr('aaaaaaaaaaaaaa', 8) as a group by substr('aaaaaaaaaaaaaa', 8); + diff --git a/tests/queries/0_stateless/02007_ipv4_and_ipv6_to_and_from_string.reference b/tests/queries/0_stateless/02007_ipv4_and_ipv6_to_and_from_string.reference new file mode 100644 index 00000000000..8a4df1605fb --- /dev/null +++ b/tests/queries/0_stateless/02007_ipv4_and_ipv6_to_and_from_string.reference @@ -0,0 +1,6 @@ +127.0.0.1 IPv4 +127.0.0.1 String +2001:db8:0:85a3::ac1f:8001 IPv6 +2001:db8:0:85a3::ac1f:8001 String +0.0.0.0 IPv4 +:: IPv6 diff --git a/tests/queries/0_stateless/02007_ipv4_and_ipv6_to_and_from_string.sql b/tests/queries/0_stateless/02007_ipv4_and_ipv6_to_and_from_string.sql new file mode 100644 index 00000000000..2fcc20b9811 --- /dev/null +++ b/tests/queries/0_stateless/02007_ipv4_and_ipv6_to_and_from_string.sql @@ -0,0 +1,13 @@ +SELECT CAST('127.0.0.1' as IPv4) as v, toTypeName(v); +SELECT CAST(toIPv4('127.0.0.1') as String) as v, toTypeName(v); + +SELECT CAST('2001:0db8:0000:85a3:0000:0000:ac1f:8001' as IPv6) as v, toTypeName(v); +SELECT CAST(toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001') as String) as v, toTypeName(v); + +SELECT toIPv4('hello') as v, toTypeName(v); +SELECT toIPv6('hello') as v, toTypeName(v); + +SELECT CAST('hello' as IPv4) as v, toTypeName(v); -- { serverError CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING } +SELECT CAST('hello' as IPv6) as v, toTypeName(v); -- { serverError CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING } + +SELECT CAST('1.1.1.1' as IPv6) as v, toTypeName(v); -- { serverError CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING } diff --git a/tests/queries/0_stateless/02010_lc_native.python b/tests/queries/0_stateless/02010_lc_native.python index 56e981555f3..71965512e64 100755 --- a/tests/queries/0_stateless/02010_lc_native.python +++ b/tests/queries/0_stateless/02010_lc_native.python @@ -302,11 +302,44 @@ def insertLowCardinalityRowWithIncorrectDictType(): print(readException(s)) s.close() +def insertLowCardinalityRowWithIncorrectAdditionalKeys(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery(s, 'insert into {}.tab format TSV'.format(CLICKHOUSE_DATABASE)) + + # external tables + sendEmptyBlock(s) + readHeader(s) + + # Data + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary('', ba) + serializeBlockInfo(ba) + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary('x', ba) + writeStringBinary('LowCardinality(String)', ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend([3, 0] + [0] * 6) # indexes type: UInt64 [3], with NO additional keys [0] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary('hello', ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 8) # UInt64 index (0 for 'hello') + s.sendall(ba) + + assertPacket(readVarUInt(s), 2) + print(readException(s)) + s.close() def main(): insertValidLowCardinalityRow() insertLowCardinalityRowWithIndexOverflow() insertLowCardinalityRowWithIncorrectDictType() + insertLowCardinalityRowWithIncorrectAdditionalKeys() if __name__ == "__main__": main() diff --git a/tests/queries/0_stateless/02010_lc_native.reference b/tests/queries/0_stateless/02010_lc_native.reference index 0167f05c952..bbf0c9c025d 100644 --- a/tests/queries/0_stateless/02010_lc_native.reference +++ b/tests/queries/0_stateless/02010_lc_native.reference @@ -6,3 +6,6 @@ code 117: Index for LowCardinality is out of range. Dictionary size is 1, but f Rows 0 Columns 1 Column x type LowCardinality(String) code 117: LowCardinality indexes serialization type for Native format cannot use global dictionary +Rows 0 Columns 1 +Column x type LowCardinality(String) +code 117: No additional keys found. diff --git a/tests/queries/0_stateless/02015_global_in_threads.sh b/tests/queries/0_stateless/02015_global_in_threads.sh index c112e47fe92..9437187d462 100755 --- a/tests/queries/0_stateless/02015_global_in_threads.sh +++ b/tests/queries/0_stateless/02015_global_in_threads.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --log_queries=1 --max_threads=32 --query_id "2015_${CLICKHOUSE_DATABASE}_query" -q "select count() from remote('127.0.0.{2,3}', numbers(10)) where number global in (select number % 5 from numbers_mt(1000000))" ${CLICKHOUSE_CLIENT} -q "system flush logs" -${CLICKHOUSE_CLIENT} -q "select length(thread_ids) >= 32 from system.query_log where event_date = today() and query_id = '2015_${CLICKHOUSE_DATABASE}_query' and type = 'QueryFinish' and current_database = currentDatabase()" +${CLICKHOUSE_CLIENT} -q "select length(thread_ids) >= 32 from system.query_log where event_date >= yesterday() and query_id = '2015_${CLICKHOUSE_DATABASE}_query' and type = 'QueryFinish' and current_database = currentDatabase()" diff --git a/tests/queries/0_stateless/02047_client_exception.expect b/tests/queries/0_stateless/02047_client_exception.expect index 0025afa88eb..f7d4bfb555d 100755 --- a/tests/queries/0_stateless/02047_client_exception.expect +++ b/tests/queries/0_stateless/02047_client_exception.expect @@ -1,15 +1,14 @@ #!/usr/bin/expect -f -# Tags: no-fasttest log_user 0 set timeout 20 match_max 100000 -# A default timeout action is to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect index 17b98b077d5..ffa25b964db 100755 --- a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect +++ b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect @@ -1,16 +1,14 @@ #!/usr/bin/expect -f -# Tags: no-fasttest log_user 0 set timeout 20 match_max 100000 -# A default timeout action is to fail expect_after { - timeout { - exit 1 - } - + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/02050_client_profile_events.reference b/tests/queries/0_stateless/02050_client_profile_events.reference index 00fc3b5d06a..2451417ddf0 100644 --- a/tests/queries/0_stateless/02050_client_profile_events.reference +++ b/tests/queries/0_stateless/02050_client_profile_events.reference @@ -1,4 +1,5 @@ 0 -SelectedRows: 131010 (increment) +100000 +[ 0 ] SelectedRows: 131010 (increment) OK OK diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh index 5c3887cf5fb..459e8505e22 100755 --- a/tests/queries/0_stateless/02050_client_profile_events.sh +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -7,9 +6,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # do not print any ProfileEvents packets $CLICKHOUSE_CLIENT -q 'select * from numbers(1e5) format Null' |& grep -c 'SelectedRows' -# print only last -$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5) format Null' |& grep -o 'SelectedRows: .*$' +# print only last (and also number of rows to provide more info in case of failures) +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5)' 2> >(grep -o -e '\[ 0 \] SelectedRows: .*$' -e Exception) 1> >(wc -l) # print everything -test "$($CLICKHOUSE_CLIENT --print-profile-events -q 'select * from numbers(1e9) format Null' |& grep -c 'SelectedRows')" -gt 1 && echo OK || echo FAIL +profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')" +test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" # print each 100 ms -test "$($CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=100 -q 'select * from numbers(1e9) format Null' |& grep -c 'SelectedRows')" -gt 1 && echo OK || echo FAIL +profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events --profile-events-delay-ms=100 -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')" +test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" diff --git a/tests/queries/0_stateless/02098_date32_comparison.sql b/tests/queries/0_stateless/02098_date32_comparison.sql index 5fd7172e0bb..b35191e58ed 100644 --- a/tests/queries/0_stateless/02098_date32_comparison.sql +++ b/tests/queries/0_stateless/02098_date32_comparison.sql @@ -1,19 +1,19 @@ -select toDate32('1990-01-01') = toDate('1990-01-01'); -select toDate('1991-01-02') > toDate32('1990-01-01'); -select toDate32('1925-01-01') <= toDate('1990-01-01'); -select toDate('1991-01-01') < toDate32('2283-11-11'); -select toDate32('1990-01-01') = toDateTime('1990-01-01'); -select toDateTime('1991-01-02') > toDate32('1990-01-01'); -select toDate32('1925-01-01') <= toDateTime('1990-01-01'); -select toDateTime('1991-01-01') < toDate32('2283-11-11'); -select toDate32('1990-01-01') = toDateTime64('1990-01-01',2); -select toDateTime64('1991-01-02',2) > toDate32('1990-01-01'); -select toDate32('1925-01-01') = toDateTime64('1925-01-01',2); -select toDateTime64('1925-01-02',2) > toDate32('1925-01-01'); -select toDate32('2283-11-11') = toDateTime64('2283-11-11',2); -select toDateTime64('2283-11-11',2) > toDate32('1925-01-01'); -select toDate32('1990-01-01') = '1990-01-01'; -select '1991-01-02' > toDate32('1990-01-01'); -select toDate32('1925-01-01') = '1925-01-01'; -select '2283-11-11' >= toDate32('2283-11-10'); -select '2283-11-11' > toDate32('1925-01-01'); \ No newline at end of file +select toDate32('1990-02-01') = toDate('1990-02-01'); +select toDate('1991-01-02') > toDate32('1990-02-01'); +select toDate32('1925-02-01') <= toDate('1990-02-01'); +select toDate('1991-02-01') < toDate32('2283-11-11'); +select toDate32('1990-02-01') = toDateTime('1990-02-01'); +select toDateTime('1991-01-02') > toDate32('1990-02-01'); +select toDate32('1925-02-01') <= toDateTime('1990-02-01'); +select toDateTime('1991-02-01') < toDate32('2283-11-11'); +select toDate32('1990-02-01') = toDateTime64('1990-02-01',2); +select toDateTime64('1991-01-02',2) > toDate32('1990-02-01'); +select toDate32('1925-02-01') = toDateTime64('1925-02-01',2); +select toDateTime64('1925-02-02',2) > toDate32('1925-02-01'); +select toDate32('2283-11-11') = toDateTime64('2283-11-11',2); +select toDateTime64('2283-11-11',2) > toDate32('1925-02-01'); +select toDate32('1990-02-01') = '1990-02-01'; +select '1991-01-02' > toDate32('1990-02-01'); +select toDate32('1925-02-01') = '1925-02-01'; +select '2283-11-11' >= toDate32('2283-11-10'); +select '2283-11-11' > toDate32('1925-02-01'); diff --git a/tests/queries/0_stateless/02101_avro_union_index_out_of_boundary.reference b/tests/queries/0_stateless/02101_avro_union_index_out_of_boundary.reference new file mode 100644 index 00000000000..33702ab4186 --- /dev/null +++ b/tests/queries/0_stateless/02101_avro_union_index_out_of_boundary.reference @@ -0,0 +1 @@ +index out of boundary diff --git a/tests/queries/0_stateless/02101_avro_union_index_out_of_boundary.sh b/tests/queries/0_stateless/02101_avro_union_index_out_of_boundary.sh new file mode 100755 index 00000000000..1e9c49b8963 --- /dev/null +++ b/tests/queries/0_stateless/02101_avro_union_index_out_of_boundary.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +DATA_DIR=$CUR_DIR/data_avro +cat "$DATA_DIR"/nested_complex_incorrect_data.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S "\"b.b2_null_str\" Nullable(String)" -q 'select * from table;' 2>&1 | grep -i 'index out of boundary' -o diff --git a/tests/queries/0_stateless/02105_backslash_letter_commands.expect b/tests/queries/0_stateless/02105_backslash_letter_commands.expect index 9c6f3e10227..e67d60912fa 100755 --- a/tests/queries/0_stateless/02105_backslash_letter_commands.expect +++ b/tests/queries/0_stateless/02105_backslash_letter_commands.expect @@ -1,14 +1,13 @@ #!/usr/bin/expect -f -# Tags: no-fasttest log_user 0 set timeout 02 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect index b676c221c65..0abe25e60f4 100755 --- a/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect @@ -1,27 +1,28 @@ #!/usr/bin/expect -f -# Tags: no-parallel, no-fasttest +# Tags: no-parallel log_user 0 set timeout 20 match_max 100000 -# A default timeout action is to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } - -spawn bash -c "\$CLICKHOUSE_TESTS_DIR/helpers/02112_prepare.sh" - set basedir [file dirname $argv0] -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT --disable_suggestion --interactive --queries-file \$CURDIR/file_02112" + +system "$basedir/helpers/02112_prepare.sh" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT --disable_suggestion --interactive --queries-file $basedir/file_02112" expect ":) " send -- "select * from t format TSV\r" expect "1" expect ":) " -spawn bash -c "\$CLICKHOUSE_TESTS_DIR/helpers/02112_clean.sh" +send "" +expect eof +system "$basedir/helpers/02112_clean.sh" diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect index cd42388c099..c846464b011 100755 --- a/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect @@ -1,15 +1,14 @@ #!/usr/bin/expect -f -# Tags: no-fasttest log_user 0 set timeout 20 match_max 100000 -# A default timeout action is to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect index f0aef1550c3..c64f149a93c 100755 --- a/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect @@ -1,27 +1,28 @@ #!/usr/bin/expect -f -# Tags: no-parallel, no-fasttest +# Tags: no-parallel log_user 0 set timeout 20 match_max 100000 -# A default timeout action is to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } - -spawn bash -c "\$CLICKHOUSE_TESTS_DIR/helpers/02112_prepare.sh" - set basedir [file dirname $argv0] -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion --interactive --queries-file \$CURDIR/file_02112" + +system "$basedir/helpers/02112_prepare.sh" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion --interactive --queries-file $basedir/file_02112" expect ":) " send -- "select * from t format TSV\r" expect "1" expect ":) " -spawn bash -c "\$CLICKHOUSE_TESTS_DIR/helpers/02112_clean.sh" +send "" +expect eof +system "$basedir/helpers/02112_clean.sh" diff --git a/tests/queries/0_stateless/02113_base64encode_trailing_bytes_1.reference b/tests/queries/0_stateless/02113_base64encode_trailing_bytes_1.reference new file mode 100644 index 00000000000..8d9df2da010 --- /dev/null +++ b/tests/queries/0_stateless/02113_base64encode_trailing_bytes_1.reference @@ -0,0 +1,100 @@ +0 +1 61 +2 6161 +3 616161 +4 61616161 +5 6161616161 +6 616161616161 +7 61616161616161 +8 6161616161616161 +9 616161616161616161 +10 61616161616161616161 +11 6161616161616161616161 +12 616161616161616161616161 +13 61616161616161616161616161 +14 6161616161616161616161616161 +15 616161616161616161616161616161 +16 61616161616161616161616161616161 +17 6161616161616161616161616161616161 +18 616161616161616161616161616161616161 +19 61616161616161616161616161616161616161 +20 6161616161616161616161616161616161616161 +21 616161616161616161616161616161616161616161 +22 61616161616161616161616161616161616161616161 +23 6161616161616161616161616161616161616161616161 +24 616161616161616161616161616161616161616161616161 +25 61616161616161616161616161616161616161616161616161 +26 6161616161616161616161616161616161616161616161616161 +27 616161616161616161616161616161616161616161616161616161 +28 61616161616161616161616161616161616161616161616161616161 +29 6161616161616161616161616161616161616161616161616161616161 +30 616161616161616161616161616161616161616161616161616161616161 +31 61616161616161616161616161616161616161616161616161616161616161 +32 6161616161616161616161616161616161616161616161616161616161616161 +33 616161616161616161616161616161616161616161616161616161616161616161 +34 61616161616161616161616161616161616161616161616161616161616161616161 +35 6161616161616161616161616161616161616161616161616161616161616161616161 +36 616161616161616161616161616161616161616161616161616161616161616161616161 +37 61616161616161616161616161616161616161616161616161616161616161616161616161 +38 6161616161616161616161616161616161616161616161616161616161616161616161616161 +39 616161616161616161616161616161616161616161616161616161616161616161616161616161 +40 61616161616161616161616161616161616161616161616161616161616161616161616161616161 +41 6161616161616161616161616161616161616161616161616161616161616161616161616161616161 +42 616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +43 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +44 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +45 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +46 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +47 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +48 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +49 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +50 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +51 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +52 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +53 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +54 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +55 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +56 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +57 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +58 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +59 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +60 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +61 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +62 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +63 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +64 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +65 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +66 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +67 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +68 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +69 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +70 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +71 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +72 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +73 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +74 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +75 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +76 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +77 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +78 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +79 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +80 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +81 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +82 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +83 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +84 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +85 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +86 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +87 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +88 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +89 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +90 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +91 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +92 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +93 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +94 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +95 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +96 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +97 61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +98 6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 +99 616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161 diff --git a/tests/queries/0_stateless/02113_base64encode_trailing_bytes_1.sql b/tests/queries/0_stateless/02113_base64encode_trailing_bytes_1.sql new file mode 100644 index 00000000000..56edf5dbf6f --- /dev/null +++ b/tests/queries/0_stateless/02113_base64encode_trailing_bytes_1.sql @@ -0,0 +1,6 @@ +-- Tags: no-fasttest + +SELECT + number, + hex(base64Decode(base64Encode(repeat('a', number)))) r +FROM numbers(100); diff --git a/tests/queries/0_stateless/02114_bool_type.sql b/tests/queries/0_stateless/02114_bool_type.sql index 4542cc68a3a..d4ea4e54028 100644 --- a/tests/queries/0_stateless/02114_bool_type.sql +++ b/tests/queries/0_stateless/02114_bool_type.sql @@ -5,7 +5,7 @@ CREATE TABLE bool_test (value Bool,f String) ENGINE = Memory; -- value column shoud have type 'Bool' SHOW CREATE TABLE bool_test; -INSERT INTO bool_test (value,f) VALUES ('false', 'test'), ('true' , 'test'), (0, 'test'), (1, 'test'), ('FALSE', 'test'), ('TRUE', 'test'); +INSERT INTO bool_test (value,f) VALUES (false, 'test'), (true , 'test'), (0, 'test'), (1, 'test'), (FALSE, 'test'), (TRUE, 'test'); INSERT INTO bool_test (value,f) FORMAT JSONEachRow {"value":false,"f":"test"}{"value":true,"f":"test"}{"value":0,"f":"test"}{"value":1,"f":"test"} SELECT value,f FROM bool_test; diff --git a/tests/queries/0_stateless/02114_hdfs_bad_url.sh b/tests/queries/0_stateless/02114_hdfs_bad_url.sh index 5117568b67f..a05baf19e6f 100755 --- a/tests/queries/0_stateless/02114_hdfs_bad_url.sh +++ b/tests/queries/0_stateless/02114_hdfs_bad_url.sh @@ -20,7 +20,7 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('://abcd:9000/data', 'CSV', 'x UInt32' $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('abcd/', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://abcd', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs1:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; -$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "NETWORK_ERROR" && echo 'OK' || echo 'FAIL'; $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('http://hdfs1:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; -$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1/abcd:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1@nameservice/abcd/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "NETWORK_ERROR" && echo 'OK' || echo 'FAIL'; diff --git a/tests/queries/0_stateless/02116_interactive_hello.expect b/tests/queries/0_stateless/02116_interactive_hello.expect index 1642ac91e42..e659cf8703c 100755 --- a/tests/queries/0_stateless/02116_interactive_hello.expect +++ b/tests/queries/0_stateless/02116_interactive_hello.expect @@ -1,15 +1,15 @@ #!/usr/bin/expect -f -# Tags: no-fasttest +# Tags: long log_user 0 set timeout 60 match_max 100000 -# A default timeout action is to do nothing, change it to fail expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } set basedir [file dirname $argv0] diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 2b391cd292e..35de7f8e82c 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -34,7 +34,7 @@ CREATE TABLE system.numbers_mt\n(\n `number` UInt64\n)\nENGINE = SystemNumber CREATE TABLE system.one\n(\n `dummy` UInt8\n)\nENGINE = SystemOne()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.part_moves_between_shards\n(\n `database` String,\n `table` String,\n `task_name` String,\n `task_uuid` UUID,\n `create_time` DateTime,\n `part_name` String,\n `part_uuid` UUID,\n `to_shard` String,\n `dst_part_name` String,\n `update_time` DateTime,\n `state` String,\n `rollback` UInt8,\n `num_tries` UInt32,\n `last_exception` String\n)\nENGINE = SystemShardMoves()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.parts\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `secondary_indices_compressed_bytes` UInt64,\n `secondary_indices_uncompressed_bytes` UInt64,\n `secondary_indices_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `projections` Array(String),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.parts_columns\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.parts_columns\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `serialization_kind` String,\n `subcolumns.names` Array(String),\n `subcolumns.types` Array(String),\n `subcolumns.serializations` Array(String),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.privileges\n(\n `privilege` Enum8(\'SQLITE\' = -128, \'ODBC\' = -127, \'JDBC\' = -126, \'HDFS\' = -125, \'S3\' = -124, \'SOURCES\' = -123, \'ALL\' = -122, \'NONE\' = -121, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM\' = 116, \'dictGet\' = 117, \'addressToLine\' = 118, \'addressToSymbol\' = 119, \'demangle\' = 120, \'INTROSPECTION\' = 121, \'FILE\' = 122, \'URL\' = 123, \'REMOTE\' = 124, \'MONGO\' = 125, \'MYSQL\' = 126, \'POSTGRES\' = 127),\n `aliases` Array(String),\n `level` Nullable(Enum8(\'GLOBAL\' = 0, \'DATABASE\' = 1, \'TABLE\' = 2, \'DICTIONARY\' = 3, \'VIEW\' = 4, \'COLUMN\' = 5)),\n `parent_group` Nullable(Enum8(\'SQLITE\' = -128, \'ODBC\' = -127, \'JDBC\' = -126, \'HDFS\' = -125, \'S3\' = -124, \'SOURCES\' = -123, \'ALL\' = -122, \'NONE\' = -121, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM\' = 116, \'dictGet\' = 117, \'addressToLine\' = 118, \'addressToSymbol\' = 119, \'demangle\' = 120, \'INTROSPECTION\' = 121, \'FILE\' = 122, \'URL\' = 123, \'REMOTE\' = 124, \'MONGO\' = 125, \'MYSQL\' = 126, \'POSTGRES\' = 127))\n)\nENGINE = SystemPrivileges()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.processes\n(\n `is_initial_query` UInt8,\n `user` String,\n `query_id` String,\n `address` IPv6,\n `port` UInt16,\n `initial_user` String,\n `initial_query_id` String,\n `initial_address` IPv6,\n `initial_port` UInt16,\n `interface` UInt8,\n `os_user` String,\n `client_hostname` String,\n `client_name` String,\n `client_revision` UInt64,\n `client_version_major` UInt64,\n `client_version_minor` UInt64,\n `client_version_patch` UInt64,\n `http_method` UInt8,\n `http_user_agent` String,\n `http_referer` String,\n `forwarded_for` String,\n `quota_key` String,\n `elapsed` Float64,\n `is_cancelled` UInt8,\n `read_rows` UInt64,\n `read_bytes` UInt64,\n `total_rows_approx` UInt64,\n `written_rows` UInt64,\n `written_bytes` UInt64,\n `memory_usage` Int64,\n `peak_memory_usage` Int64,\n `query` String,\n `thread_ids` Array(UInt64),\n `ProfileEvents` Map(String, UInt64),\n `Settings` Map(String, String),\n `current_database` String,\n `ProfileEvents.Names` Array(String),\n `ProfileEvents.Values` Array(UInt64),\n `Settings.Names` Array(String),\n `Settings.Values` Array(String)\n)\nENGINE = SystemProcesses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' diff --git a/tests/queries/0_stateless/02122_4letter_words_stress_zookeeper.sh b/tests/queries/0_stateless/02122_4letter_words_stress_zookeeper.sh index 4a94beddbba..2deaf788ecf 100755 --- a/tests/queries/0_stateless/02122_4letter_words_stress_zookeeper.sh +++ b/tests/queries/0_stateless/02122_4letter_words_stress_zookeeper.sh @@ -18,7 +18,7 @@ function four_letter_thread() function create_drop_thread() { while true; do - num=$RANDOM + num=$(($RANDOM % 10 + 1)) $CLICKHOUSE_CLIENT --query "CREATE TABLE test_table$num (key UInt64, value1 UInt8, value2 UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_table$num', '0') ORDER BY key" sleep 0.$RANDOM $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_table$num" @@ -43,5 +43,12 @@ timeout $TIMEOUT bash -c create_drop_thread 2> /dev/null & wait +for num in $(seq 1 10); do + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_table$num" 2>/dev/null + while [ $? -ne 0 ]; do + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_table$num" 2>/dev/null + done +done + # still alive $CLICKHOUSE_CLIENT --query "SELECT 1" diff --git a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql index d2041a612a6..4aad7ae3694 100644 --- a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql +++ b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql @@ -11,7 +11,5 @@ select if(toUInt8(1), 42, y) from test; select if(toInt8(1), 42, y) from test; select if(toUInt8(toUInt8(0)), y, 42) from test; select if(cast(cast(0, 'UInt8'), 'UInt8'), y, 42) from test; - explain syntax select x, if((select hasColumnInTable(currentDatabase(), 'test', 'y')), y, x || '_') from test; - drop table if exists t; diff --git a/tests/queries/0_stateless/02127_connection_drain.reference b/tests/queries/0_stateless/02127_connection_drain.reference new file mode 100644 index 00000000000..c31f2f40f6d --- /dev/null +++ b/tests/queries/0_stateless/02127_connection_drain.reference @@ -0,0 +1,2 @@ +OK: sync drain +OK: async drain diff --git a/tests/queries/0_stateless/02127_connection_drain.sh b/tests/queries/0_stateless/02127_connection_drain.sh new file mode 100755 index 00000000000..523b02d9bd5 --- /dev/null +++ b/tests/queries/0_stateless/02127_connection_drain.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# sync drain +for _ in {1..100}; do + prev=$(curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select value from system.metrics where metric = 'SyncDrainedConnections'") + curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select * from remote('127.{2,3}', view(select * from numbers(1e6))) limit 100 settings drain_timeout=-1 format Null" + now=$(curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select value from system.metrics where metric = 'SyncDrainedConnections'") + if [[ "$prev" != $(( now-2 )) ]]; then + continue + fi + echo "OK: sync drain" + break +done + +# async drain +for _ in {1..100}; do + prev=$(curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select value from system.metrics where metric = 'AsyncDrainedConnections'") + curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select * from remote('127.{2,3}', view(select * from numbers(1e6))) limit 100 settings drain_timeout=10 format Null" + now=$(curl -d@- -sS "${CLICKHOUSE_URL}" <<<"select value from system.metrics where metric = 'AsyncDrainedConnections'") + if [[ "$prev" != $(( now-2 )) ]]; then + continue + fi + echo "OK: async drain" + break +done diff --git a/tests/queries/0_stateless/02131_mv_many_chunks_bug.reference b/tests/queries/0_stateless/02131_mv_many_chunks_bug.reference new file mode 100644 index 00000000000..9183bf03fcc --- /dev/null +++ b/tests/queries/0_stateless/02131_mv_many_chunks_bug.reference @@ -0,0 +1 @@ +256 diff --git a/tests/queries/0_stateless/02131_mv_many_chunks_bug.sql b/tests/queries/0_stateless/02131_mv_many_chunks_bug.sql new file mode 100644 index 00000000000..736fd9242b0 --- /dev/null +++ b/tests/queries/0_stateless/02131_mv_many_chunks_bug.sql @@ -0,0 +1,15 @@ +drop table if exists t; +drop table if exists t_mv; + +create table t (x UInt64) engine = MergeTree order by x; +create materialized view t_mv engine = MergeTree order by tuple() as select uniq(x), bitAnd(x, 255) as y from t group by y; + +set max_bytes_before_external_group_by = 1000000000; +set group_by_two_level_threshold = 100; +set min_insert_block_size_rows = 100; + +insert into t select number from numbers(300); +select count() from (select y from t_mv group by y); + +drop table if exists t; +drop table if exists t_mv; diff --git a/tests/queries/0_stateless/02132_client_history_navigation.expect b/tests/queries/0_stateless/02132_client_history_navigation.expect index 129a65e0a0a..b722a0af04c 100755 --- a/tests/queries/0_stateless/02132_client_history_navigation.expect +++ b/tests/queries/0_stateless/02132_client_history_navigation.expect @@ -1,14 +1,14 @@ #!/usr/bin/expect -f -# Tags: no-fasttest log_user 0 set timeout 3 match_max 100000 -# A default timeout action is to do nothing, change it to fail + expect_after { - timeout { - exit 1 - } + # Do not ignore eof from expect + eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + timeout { exit 1 } } # useful debugging configuration diff --git a/tests/queries/0_stateless/02149_issue_32487.reference b/tests/queries/0_stateless/02149_issue_32487.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02149_issue_32487.sql b/tests/queries/0_stateless/02149_issue_32487.sql new file mode 100644 index 00000000000..4e75c981774 --- /dev/null +++ b/tests/queries/0_stateless/02149_issue_32487.sql @@ -0,0 +1 @@ +SELECT topKWeightedState(2)(now(), 1) FORMAT Null; diff --git a/tests/queries/0_stateless/02150_replace_regexp_all_empty_match.reference b/tests/queries/0_stateless/02150_replace_regexp_all_empty_match.reference new file mode 100644 index 00000000000..6e269c2a690 --- /dev/null +++ b/tests/queries/0_stateless/02150_replace_regexp_all_empty_match.reference @@ -0,0 +1 @@ +here: Hello, World! diff --git a/tests/queries/0_stateless/02150_replace_regexp_all_empty_match.sql b/tests/queries/0_stateless/02150_replace_regexp_all_empty_match.sql new file mode 100644 index 00000000000..a7b52a1c8b6 --- /dev/null +++ b/tests/queries/0_stateless/02150_replace_regexp_all_empty_match.sql @@ -0,0 +1 @@ +select replaceRegexpAll('Hello, World!', '^', 'here: '); diff --git a/tests/queries/0_stateless/02151_clickhouse_client_hints.reference b/tests/queries/0_stateless/02151_clickhouse_client_hints.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02151_clickhouse_client_hints.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02151_clickhouse_client_hints.sh b/tests/queries/0_stateless/02151_clickhouse_client_hints.sh new file mode 100755 index 00000000000..3e6c6cb16a5 --- /dev/null +++ b/tests/queries/0_stateless/02151_clickhouse_client_hints.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT --hardware_utilization 2>&1 | grep -q "Code: 552. DB::Exception: Unrecognized option '--hardware_utilization'. Maybe you meant \['--hardware-utilization'\]. (UNRECOGNIZED_ARGUMENTS)" && echo 'OK' || echo 'FAIL' ||: diff --git a/tests/queries/0_stateless/02151_client_option_echo.reference b/tests/queries/0_stateless/02151_client_option_echo.reference new file mode 100644 index 00000000000..4dba04c5d41 --- /dev/null +++ b/tests/queries/0_stateless/02151_client_option_echo.reference @@ -0,0 +1,3 @@ +DROP TABLE IF EXISTS echo_test_0 +DROP TABLE IF EXISTS echo_test_2; +DROP TABLE IF EXISTS echo_test_3 diff --git a/tests/queries/0_stateless/02151_client_option_echo.sh b/tests/queries/0_stateless/02151_client_option_echo.sh new file mode 100755 index 00000000000..8056b3b5ed1 --- /dev/null +++ b/tests/queries/0_stateless/02151_client_option_echo.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +# shellcheck source=../shell_config.sh + +. "$CURDIR"/../shell_config.sh + +# single query echo on +${CLICKHOUSE_CLIENT} --echo --query="DROP TABLE IF EXISTS echo_test_0" +# single query echo off +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS echo_test_1" +# multi query echo on +${CLICKHOUSE_CLIENT} --echo --multiquery --query="DROP TABLE IF EXISTS echo_test_2;DROP TABLE IF EXISTS echo_test_3" +# multi query echo off +${CLICKHOUSE_CLIENT} --multiquery --query="DROP TABLE IF EXISTS echo_test_4;DROP TABLE IF EXISTS echo_test_5" diff --git a/tests/queries/0_stateless/02151_http_s_structure_set_eof.reference b/tests/queries/0_stateless/02151_http_s_structure_set_eof.reference new file mode 100644 index 00000000000..51de8112089 --- /dev/null +++ b/tests/queries/0_stateless/02151_http_s_structure_set_eof.reference @@ -0,0 +1,2 @@ +124 +124 diff --git a/tests/queries/0_stateless/02151_http_s_structure_set_eof.sh b/tests/queries/0_stateless/02151_http_s_structure_set_eof.sh new file mode 100755 index 00000000000..448fa9bfede --- /dev/null +++ b/tests/queries/0_stateless/02151_http_s_structure_set_eof.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +tmp_file=$(mktemp "$CURDIR/clickhouse.XXXXXX.csv") +trap 'rm $tmp_file' EXIT + +# NOTE: this file should be huge enough, so that it is impossible to upload it +# in 0.15s, see timeout command below, this will ensure, that EOF will be +# received during creating a set from externally uploaded table. +# +# Previously code there wasn't ready for EOF, and you will get one of the +# following assertions: +# +# - ./src/IO/ReadBuffer.h:58: bool DB::ReadBuffer::next(): Assertion `!hasPendingData()' failed. +# - ./src/Server/HTTP/HTMLForm.cpp:245: bool DB::HTMLForm::MultipartReadBuffer::skipToNextBoundary(): Assertion `boundary_hit' failed. +# - ./src/IO/LimitReadBuffer.cpp:17: virtual bool DB::LimitReadBuffer::nextImpl(): Assertion `position() >= in->position()' failed. +# +$CLICKHOUSE_CLIENT -q "SELECT toString(number) FROM numbers(10e6) FORMAT TSV" > "$tmp_file" + +# NOTE: Just in case check w/ input_format_parallel_parsing and w/o +timeout 0.15s ${CLICKHOUSE_CURL} -sS -F "s=@$tmp_file;" "${CLICKHOUSE_URL}&s_structure=key+Int&query=SELECT+dummy+IN+s&input_format_parallel_parsing=true" -o /dev/null +echo $? +timeout 0.15s ${CLICKHOUSE_CURL} -sS -F "s=@$tmp_file;" "${CLICKHOUSE_URL}&s_structure=key+Int&query=SELECT+dummy+IN+s&input_format_parallel_parsing=false" -o /dev/null +echo $? diff --git a/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.reference b/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.sh b/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.sh new file mode 100755 index 00000000000..2faaa3bb1b6 --- /dev/null +++ b/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --query="SET input_format_with_names_use_headers = 1" 2>&1 | grep -qF "Code: 115. DB::Exception: Unknown setting input_format_with_names_use_headers: Maybe you meant ['input_format_with_names_use_header','input_format_with_types_use_header']. (UNKNOWN_SETTING)" && echo 'OK' || echo 'FAIL' ||: diff --git a/tests/queries/0_stateless/02151_lc_prefetch.reference b/tests/queries/0_stateless/02151_lc_prefetch.reference new file mode 100644 index 00000000000..deebb18c2f7 --- /dev/null +++ b/tests/queries/0_stateless/02151_lc_prefetch.reference @@ -0,0 +1 @@ +2000000 diff --git a/tests/queries/0_stateless/02151_lc_prefetch.sql b/tests/queries/0_stateless/02151_lc_prefetch.sql new file mode 100644 index 00000000000..83d8d23264e --- /dev/null +++ b/tests/queries/0_stateless/02151_lc_prefetch.sql @@ -0,0 +1,7 @@ +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug +drop table if exists tab_lc; +CREATE TABLE tab_lc (x UInt64, y LowCardinality(String)) engine = MergeTree order by x; +insert into tab_lc select number, toString(number % 10) from numbers(20000000); +optimize table tab_lc; +select count() from tab_lc where y == '0' settings local_filesystem_read_prefetch=1; +drop table if exists tab_lc; diff --git a/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.reference b/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.reference new file mode 100644 index 00000000000..e8183f05f5d --- /dev/null +++ b/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.reference @@ -0,0 +1,3 @@ +1 +1 +1 diff --git a/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.sql b/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.sql new file mode 100644 index 00000000000..6725fa04114 --- /dev/null +++ b/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.sql @@ -0,0 +1,3 @@ +select replaceRegexpAll(',,1,,', '^[,]*|[,]*$', '') x; +select replaceRegexpAll(',,1', '^[,]*|[,]*$', '') x; +select replaceRegexpAll('1,,', '^[,]*|[,]*$', '') x; diff --git a/tests/queries/0_stateless/02152_bool_type.reference b/tests/queries/0_stateless/02152_bool_type.reference new file mode 100644 index 00000000000..a8c04f651e9 --- /dev/null +++ b/tests/queries/0_stateless/02152_bool_type.reference @@ -0,0 +1,46 @@ +true +true +true +true +true +true +true +true +true +true +true +true +true +true +false +false +false +false +false +false +false +false +false +false +false +false +false +false +Custom true +Custom true +(true) +Row 1: +────── +CAST('true', 'Bool'): Custom true +┏━━━━━━━━━━━━━━━━━━━━━━┓ +┃ CAST('true', 'Bool') ┃ +┡━━━━━━━━━━━━━━━━━━━━━━┩ +│ Custom true │ +└──────────────────────┘ +{"CAST('true', 'Bool')":true} +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02152_bool_type.sql b/tests/queries/0_stateless/02152_bool_type.sql new file mode 100644 index 00000000000..e9efde0795f --- /dev/null +++ b/tests/queries/0_stateless/02152_bool_type.sql @@ -0,0 +1,48 @@ +SELECT CAST('True', 'Bool'); +SELECT CAST('TrUe', 'Bool'); +SELECT CAST('true', 'Bool'); +SELECT CAST('On', 'Bool'); +SELECT CAST('on', 'Bool'); +SELECT CAST('Yes', 'Bool'); +SELECT CAST('yes', 'Bool'); +SELECT CAST('T', 'Bool'); +SELECT CAST('t', 'Bool'); +SELECT CAST('Y', 'Bool'); +SELECT CAST('y', 'Bool'); +SELECT CAST('1', 'Bool'); +SELECT CAST('enabled', 'Bool'); +SELECT CAST('enable', 'Bool'); + +SELECT CAST('False', 'Bool'); +SELECT CAST('FaLse', 'Bool'); +SELECT CAST('false', 'Bool'); +SELECT CAST('Off', 'Bool'); +SELECT CAST('off', 'Bool'); +SELECT CAST('No', 'Bool'); +SELECT CAST('no', 'Bool'); +SELECT CAST('N', 'Bool'); +SELECT CAST('n', 'Bool'); +SELECT CAST('F', 'Bool'); +SELECT CAST('f', 'Bool'); +SELECT CAST('0', 'Bool'); +SELECT CAST('disabled', 'Bool'); +SELECT CAST('disable', 'Bool'); + +SET bool_true_representation = 'Custom true'; +SET bool_false_representation = 'Custom false'; + +SELECT CAST('true', 'Bool') format CSV; +SELECT CAST('true', 'Bool') format TSV; +SELECT CAST('true', 'Bool') format Values; +SELECT ''; +SELECT CAST('true', 'Bool') format Vertical; +SELECT CAST('true', 'Bool') format Pretty; +SELECT CAST('true', 'Bool') format JSONEachRow; + +SELECT CAST(CAST(2, 'Bool'), 'UInt8'); +SELECT CAST(CAST(toUInt32(2), 'Bool'), 'UInt8'); +SELECT CAST(CAST(toInt8(2), 'Bool'), 'UInt8'); +SELECT CAST(CAST(toFloat32(2), 'Bool'), 'UInt8'); +SELECT CAST(CAST(toDecimal32(2, 2), 'Bool'), 'UInt8'); +SELECT CAST(CAST(materialize(2), 'Bool'), 'UInt8'); + diff --git a/tests/queries/0_stateless/02152_bool_type_parsing.reference b/tests/queries/0_stateless/02152_bool_type_parsing.reference new file mode 100644 index 00000000000..f9fcd324dbc --- /dev/null +++ b/tests/queries/0_stateless/02152_bool_type_parsing.reference @@ -0,0 +1,146 @@ +true +false +true +false +true +false +true +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false +true +false diff --git a/tests/queries/0_stateless/02152_bool_type_parsing.sh b/tests/queries/0_stateless/02152_bool_type_parsing.sh new file mode 100755 index 00000000000..9e9db499cf5 --- /dev/null +++ b/tests/queries/0_stateless/02152_bool_type_parsing.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +FILE_NAME=test_02152.data +DATA_FILE=$USER_FILES_PATH/$FILE_NAME + +echo -e "Custom true\nCustom false\nYes\nNo\nyes\nno\ny\nY\nN\nTrue\nFalse\ntrue\nfalse\nt\nf\nT\nF\nOn\nOff\non\noff\nenable\ndisable\nenabled\ndisabled" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'TSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false'" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'TSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false', input_format_parallel_parsing=0, max_read_buffer_size=2" + +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false'" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CSV', 'bool Bool') settings bool_true_representation='Custom true', bool_false_representation='Custom false', input_format_parallel_parsing=0, max_read_buffer_size=2" + +echo -e "'Yes'\n'No'\n'yes'\n'no'\n'y'\n'Y'\n'N'\nTrue\nFalse\ntrue\nfalse\n't'\n'f'\n'T'\n'F'\n'On'\n'Off'\n'on'\n'off'\n'enable'\n'disable'\n'enabled'\n'disabled'" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CustomSeparated', 'bool Bool') settings format_custom_escaping_rule='Quoted'" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('$FILE_NAME', 'CustomSeparated', 'bool Bool') settings format_custom_escaping_rule='Quoted', input_format_parallel_parsing=0, max_read_buffer_size=2" + +rm $DATA_FILE + diff --git a/tests/queries/0_stateless/02152_csv_tuple.reference b/tests/queries/0_stateless/02152_csv_tuple.reference new file mode 100644 index 00000000000..9d77cd9e0a6 --- /dev/null +++ b/tests/queries/0_stateless/02152_csv_tuple.reference @@ -0,0 +1,2 @@ +1 Hello [1,2,3] (2,'World',[4,5,6]) +1 Hello [1,2,3] (2,'World',[4,5,6]) diff --git a/tests/queries/0_stateless/02152_csv_tuple.sql b/tests/queries/0_stateless/02152_csv_tuple.sql new file mode 100644 index 00000000000..6a6c029e524 --- /dev/null +++ b/tests/queries/0_stateless/02152_csv_tuple.sql @@ -0,0 +1,11 @@ +drop table if exists test_02152; +create table test_02152 (x UInt32, y String, z Array(UInt32), t Tuple(UInt32, String, Array(UInt32))) engine=File('CSV') settings format_csv_delimiter=';'; +insert into test_02152 select 1, 'Hello', [1,2,3], tuple(2, 'World', [4,5,6]); +select * from test_02152; +drop table test_02152; + +create table test_02152 (x UInt32, y String, z Array(UInt32), t Tuple(UInt32, String, Array(UInt32))) engine=File('CustomSeparated') settings format_custom_field_delimiter='', format_custom_row_before_delimiter='', format_custom_row_after_delimiter='', format_custom_escaping_rule='CSV'; +insert into test_02152 select 1, 'Hello', [1,2,3], tuple(2, 'World', [4,5,6]); +select * from test_02152; +drop table test_02152; + diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference new file mode 100644 index 00000000000..1fc09c8d154 --- /dev/null +++ b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference @@ -0,0 +1,16 @@ +Checking input_format_parallel_parsing=false& +1 +Checking input_format_parallel_parsing=false&cancel_http_readonly_queries_on_client_close=1&readonly=1 +1 +Checking input_format_parallel_parsing=false&send_progress_in_http_headers=true +1 +Checking input_format_parallel_parsing=false&cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true +1 +Checking input_format_parallel_parsing=true& +1 +Checking input_format_parallel_parsing=true&cancel_http_readonly_queries_on_client_close=1&readonly=1 +1 +Checking input_format_parallel_parsing=true&send_progress_in_http_headers=true +1 +Checking input_format_parallel_parsing=true&cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true +1 diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh new file mode 100755 index 00000000000..2801ec16a43 --- /dev/null +++ b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Tags: no-tsan +# ^^^^^^^ +# TSan does not supports tracing. + +# Regression for proper release of Context, +# via tracking memory of external tables. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +tmp_file=$(mktemp "$CURDIR/clickhouse.XXXXXX.csv") +trap 'rm $tmp_file' EXIT + +$CLICKHOUSE_CLIENT -q "SELECT toString(number) FROM numbers(1e6) FORMAT TSV" > "$tmp_file" + +function run_and_check() +{ + local query_id + query_id="$(${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SELECT generateUUIDv4()')" + + echo "Checking $*" + + # Run query with external table (implicit StorageMemory user) + $CLICKHOUSE_CURL -sS -F "s=@$tmp_file;" "$CLICKHOUSE_URL&s_structure=key+Int&query=SELECT+count()+FROM+s&memory_profiler_sample_probability=1&query_id=$query_id&$*" -o /dev/null + + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SYSTEM FLUSH LOGS' + + # Check that temporary table had been destroyed. + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&allow_introspection_functions=1" --data-binary @- <<<" + WITH arrayStringConcat(arrayMap(x -> demangle(addressToSymbol(x)), trace), '\n') AS sym + SELECT count()>0 FROM system.trace_log + WHERE + sym LIKE '%DB::StorageMemory::drop%\n%TemporaryTableHolder::~TemporaryTableHolder%' AND + query_id = '$query_id' + " +} + +for input_format_parallel_parsing in false true; do + query_args_variants=( + "" + "cancel_http_readonly_queries_on_client_close=1&readonly=1" + "send_progress_in_http_headers=true" + # nested progress callback + "cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true" + ) + for query_args in "${query_args_variants[@]}"; do + run_and_check "input_format_parallel_parsing=$input_format_parallel_parsing&$query_args" + done +done diff --git a/tests/queries/0_stateless/02152_invalid_setting_with_hints_in_http_request.reference b/tests/queries/0_stateless/02152_invalid_setting_with_hints_in_http_request.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02152_invalid_setting_with_hints_in_http_request.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02152_invalid_setting_with_hints_in_http_request.sh b/tests/queries/0_stateless/02152_invalid_setting_with_hints_in_http_request.sh new file mode 100755 index 00000000000..1fbf747da4f --- /dev/null +++ b/tests/queries/0_stateless/02152_invalid_setting_with_hints_in_http_request.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&input_format_with_names_use_headers=1" -d 'SELECT 1' 2>&1 | grep -q "Code: 115.*Maybe you meant \['input_format_with_names_use_header','input_format_with_types_use_header'\]. (UNKNOWN_SETTING)" && echo 'OK' || echo 'FAIL' ||: diff --git a/tests/queries/0_stateless/02152_short_circuit_throw_if.reference b/tests/queries/0_stateless/02152_short_circuit_throw_if.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/02152_short_circuit_throw_if.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/02152_short_circuit_throw_if.sql b/tests/queries/0_stateless/02152_short_circuit_throw_if.sql new file mode 100644 index 00000000000..3fdc3cc48c8 --- /dev/null +++ b/tests/queries/0_stateless/02152_short_circuit_throw_if.sql @@ -0,0 +1,2 @@ +SELECT if(1, 0, throwIf(1, 'Executing FALSE branch')); +SELECT if(empty(''), 0, throwIf(1, 'Executing FALSE branch')); diff --git a/tests/queries/0_stateless/02153_clickhouse_local_profile_info.reference b/tests/queries/0_stateless/02153_clickhouse_local_profile_info.reference new file mode 100644 index 00000000000..2e1b607ac04 --- /dev/null +++ b/tests/queries/0_stateless/02153_clickhouse_local_profile_info.reference @@ -0,0 +1,32 @@ +{ + "meta": + [ + { + "name": "count()", + "type": "UInt64" + }, + { + "name": "n", + "type": "UInt8" + } + ], + + "data": + [ + { + "count()": "1", + "n": 1 + } + ], + + "totals": + { + "count()": "3", + "n": 0 + }, + + "rows": 1, + + "rows_before_limit_at_least": 3, + + "statistics": diff --git a/tests/queries/0_stateless/02153_clickhouse_local_profile_info.sh b/tests/queries/0_stateless/02153_clickhouse_local_profile_info.sh new file mode 100755 index 00000000000..65754d390fa --- /dev/null +++ b/tests/queries/0_stateless/02153_clickhouse_local_profile_info.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --query "SELECT count(), arrayJoin([1, 2, 3]) AS n GROUP BY n WITH TOTALS ORDER BY n LIMIT 1 FORMAT JSON;" 2>&1 | head -32 + diff --git a/tests/queries/0_stateless/02153_native_bounds_check.reference b/tests/queries/0_stateless/02153_native_bounds_check.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02153_native_bounds_check.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02153_native_bounds_check.sh b/tests/queries/0_stateless/02153_native_bounds_check.sh new file mode 100755 index 00000000000..a3475ddacae --- /dev/null +++ b/tests/queries/0_stateless/02153_native_bounds_check.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Should correctly handle error. + +${CLICKHOUSE_LOCAL} --query "SELECT toString(number) AS a, toString(number) AS a FROM numbers(10)" --output-format Native | + ${CLICKHOUSE_LOCAL} --query "SELECT * FROM table" --input-format Native --structure 'a LowCardinality(String)' 2>&1 | + grep -c -F Exception diff --git a/tests/queries/0_stateless/02154_bitmap_contains.reference b/tests/queries/0_stateless/02154_bitmap_contains.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02154_bitmap_contains.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02154_bitmap_contains.sql b/tests/queries/0_stateless/02154_bitmap_contains.sql new file mode 100644 index 00000000000..3235e81e2bb --- /dev/null +++ b/tests/queries/0_stateless/02154_bitmap_contains.sql @@ -0,0 +1 @@ +select bitmapContains(bitmapBuild([9]), 964291337) diff --git a/tests/queries/0_stateless/02155_csv_with_strings_with_slash.reference b/tests/queries/0_stateless/02155_csv_with_strings_with_slash.reference new file mode 100644 index 00000000000..db750f36364 --- /dev/null +++ b/tests/queries/0_stateless/02155_csv_with_strings_with_slash.reference @@ -0,0 +1,62 @@ +input_format_null_as_default = 1 +0 \\asdf 2000-01-01 +1 x\\x\\ 2000-01-01 +2 x\\x 2000-01-01 +3 x\\ 2000-01-01 +4 x\\ 2000-01-01 +5 \\x 2000-01-01 +6 2000-01-01 +7 \\r\\n 2000-01-01 +8 \\\\r\\\\n 2000-01-01 +9 x\\\\ 2000-01-01 +10 \\asdf 2000-01-01 +11 x\\x\\ 2000-01-01 +12 x\\x 2000-01-01 +13 x\\ 2000-01-01 +14 x\\ 2000-01-01 +15 \\x 2000-01-01 +16 \\N 2000-01-01 +17 \\r\\n 2000-01-01 +18 \\\\r\\\\n 2000-01-01 +19 x\\\\ 2000-01-01 +20 \\asdf 2000-01-01 +21 x\\x\\ 2000-01-01 +22 x\\x 2000-01-01 +23 x\\ 2000-01-01 +24 x\\ 2000-01-01 +25 \\x 2000-01-01 +26 \\N 2000-01-01 +27 \\r\\n 2000-01-01 +28 \\\\r\\\\n 2000-01-01 +29 x\\\\ 2000-01-01 +input_format_null_as_default = 0 +0 \\asdf 2000-01-01 +1 x\\x\\ 2000-01-01 +2 x\\x 2000-01-01 +3 x\\ 2000-01-01 +4 x\\ 2000-01-01 +5 \\x 2000-01-01 +6 \\N 2000-01-01 +7 \\r\\n 2000-01-01 +8 \\\\r\\\\n 2000-01-01 +9 x\\\\ 2000-01-01 +10 \\asdf 2000-01-01 +11 x\\x\\ 2000-01-01 +12 x\\x 2000-01-01 +13 x\\ 2000-01-01 +14 x\\ 2000-01-01 +15 \\x 2000-01-01 +16 \\N 2000-01-01 +17 \\r\\n 2000-01-01 +18 \\\\r\\\\n 2000-01-01 +19 x\\\\ 2000-01-01 +20 \\asdf 2000-01-01 +21 x\\x\\ 2000-01-01 +22 x\\x 2000-01-01 +23 x\\ 2000-01-01 +24 x\\ 2000-01-01 +25 \\x 2000-01-01 +26 \\N 2000-01-01 +27 \\r\\n 2000-01-01 +28 \\\\r\\\\n 2000-01-01 +29 x\\\\ 2000-01-01 diff --git a/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh b/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh new file mode 100755 index 00000000000..ab2577e6138 --- /dev/null +++ b/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test_02155_csv" + +${CLICKHOUSE_CLIENT} --query="create table test_02155_csv (A Int64, S String, D Date) Engine=Memory;" + + +echo "input_format_null_as_default = 1" +cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv FORMAT CSV SETTINGS input_format_null_as_default = 1" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM test_02155_csv" + +${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE test_02155_csv" + +echo "input_format_null_as_default = 0" +cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv FORMAT CSV SETTINGS input_format_null_as_default = 0" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM test_02155_csv" + + +${CLICKHOUSE_CLIENT} --query="DROP TABLE test_02155_csv" + diff --git a/tests/queries/0_stateless/02155_dictionary_comment.reference b/tests/queries/0_stateless/02155_dictionary_comment.reference new file mode 100644 index 00000000000..69b871a6925 --- /dev/null +++ b/tests/queries/0_stateless/02155_dictionary_comment.reference @@ -0,0 +1,11 @@ +02155_test_dictionary +02155_test_dictionary 02155_test_dictionary_comment_0 +02155_test_dictionary 02155_test_dictionary_comment_0 +0 Value +02155_test_dictionary 02155_test_dictionary_comment_0 +02155_test_dictionary 02155_test_dictionary_comment_0 +02155_test_dictionary 02155_test_dictionary_comment_1 +02155_test_dictionary 02155_test_dictionary_comment_1 +0 Value +02155_test_dictionary_view 02155_test_dictionary_view_comment_0 +02155_test_dictionary_view 02155_test_dictionary_view_comment_0 diff --git a/tests/queries/0_stateless/02155_dictionary_comment.sql b/tests/queries/0_stateless/02155_dictionary_comment.sql new file mode 100644 index 00000000000..e31d9d28366 --- /dev/null +++ b/tests/queries/0_stateless/02155_dictionary_comment.sql @@ -0,0 +1,53 @@ +DROP TABLE IF EXISTS 02155_test_table; +CREATE TABLE 02155_test_table +( + id UInt64, + value String +) ENGINE=TinyLog; + +INSERT INTO 02155_test_table VALUES (0, 'Value'); + +DROP DICTIONARY IF EXISTS 02155_test_dictionary; +CREATE DICTIONARY 02155_test_dictionary +( + id UInt64, + value String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE '02155_test_table')) +LAYOUT(DIRECT()); + +SELECT name, comment FROM system.dictionaries WHERE name == '02155_test_dictionary' AND database == currentDatabase(); + +ALTER TABLE 02155_test_dictionary COMMENT COLUMN value 'value_column'; --{serverError 48} + +ALTER TABLE 02155_test_dictionary MODIFY COMMENT '02155_test_dictionary_comment_0'; +SELECT name, comment FROM system.dictionaries WHERE name == '02155_test_dictionary' AND database == currentDatabase(); +SELECT name, comment FROM system.tables WHERE name == '02155_test_dictionary' AND database == currentDatabase(); + +SELECT * FROM 02155_test_dictionary; +SELECT name, comment FROM system.dictionaries WHERE name == '02155_test_dictionary' AND database == currentDatabase(); +SELECT name, comment FROM system.tables WHERE name == '02155_test_dictionary' AND database == currentDatabase(); + +ALTER TABLE 02155_test_dictionary MODIFY COMMENT '02155_test_dictionary_comment_1'; +SELECT name, comment FROM system.dictionaries WHERE name == '02155_test_dictionary' AND database == currentDatabase(); +SELECT name, comment FROM system.tables WHERE name == '02155_test_dictionary' AND database == currentDatabase(); + +DROP TABLE IF EXISTS 02155_test_dictionary_view; +CREATE TABLE 02155_test_dictionary_view +( + id UInt64, + value String +) ENGINE=Dictionary(concat(currentDatabase(), '.02155_test_dictionary')); + +SELECT * FROM 02155_test_dictionary_view; + +ALTER TABLE 02155_test_dictionary_view COMMENT COLUMN value 'value_column'; --{serverError 48} + +ALTER TABLE 02155_test_dictionary_view MODIFY COMMENT '02155_test_dictionary_view_comment_0'; +SELECT name, comment FROM system.tables WHERE name == '02155_test_dictionary_view' AND database == currentDatabase(); +SELECT name, comment FROM system.tables WHERE name == '02155_test_dictionary_view' AND database == currentDatabase(); + +DROP TABLE 02155_test_dictionary_view; +DROP TABLE 02155_test_table; +DROP DICTIONARY 02155_test_dictionary; diff --git a/tests/queries/0_stateless/02156_async_insert_query_log.reference b/tests/queries/0_stateless/02156_async_insert_query_log.reference new file mode 100644 index 00000000000..404dbfe753d --- /dev/null +++ b/tests/queries/0_stateless/02156_async_insert_query_log.reference @@ -0,0 +1,4 @@ +1 a +2 b +INSERT INTO async_inserts_2156 VALUES 1 Insert 1 0 +INSERT INTO async_inserts_2156 VALUES 1 Insert 1 diff --git a/tests/queries/0_stateless/02156_async_insert_query_log.sh b/tests/queries/0_stateless/02156_async_insert_query_log.sh new file mode 100755 index 00000000000..d7177fbe70c --- /dev/null +++ b/tests/queries/0_stateless/02156_async_insert_query_log.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts_2156" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts_2156 (id UInt32, s String) ENGINE = Memory" + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" -d "INSERT INTO async_inserts_2156 VALUES (1, 'a')" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" -d "INSERT INTO async_inserts_2156 VALUES (2, 'b')" + +${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts_2156 ORDER BY id" + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + +${CLICKHOUSE_CLIENT} -q "SELECT query, arrayExists(x -> x LIKE '%async_inserts_2156', tables), \ + query_kind, Settings['async_insert'], Settings['wait_for_async_insert'] FROM system.query_log \ + WHERE event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' \ + AND query ILIKE 'INSERT INTO async_inserts_2156 VALUES%' AND type = 'QueryFinish' \ + ORDER BY query_start_time_microseconds" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE async_inserts_2156" diff --git a/tests/queries/0_stateless/data_avro/nested_complex_incorrect_data.avro b/tests/queries/0_stateless/data_avro/nested_complex_incorrect_data.avro new file mode 100755 index 00000000000..9feb3c10486 Binary files /dev/null and b/tests/queries/0_stateless/data_avro/nested_complex_incorrect_data.avro differ diff --git a/tests/queries/0_stateless/data_csv/csv_with_slash.csv b/tests/queries/0_stateless/data_csv/csv_with_slash.csv new file mode 100644 index 00000000000..0f2c166faa8 --- /dev/null +++ b/tests/queries/0_stateless/data_csv/csv_with_slash.csv @@ -0,0 +1,30 @@ +0,\asdf,2000-01-01 +1,x\x\,2000-01-01 +2,x\x,2000-01-01 +3,x\,2000-01-01 +4,x\,2000-01-01 +5,\x,2000-01-01 +6,\N,2000-01-01 +7,\r\n,2000-01-01 +8,\\r\\n,2000-01-01 +9,x\\,2000-01-01 +10,'\asdf',2000-01-01 +11,'x\x\',2000-01-01 +12,'x\x',2000-01-01 +13,'x\',2000-01-01 +14,'x\',2000-01-01 +15,'\x',2000-01-01 +16,'\N',2000-01-01 +17,'\r\n',2000-01-01 +18,"\\r\\n",2000-01-01 +19,"x\\",2000-01-01 +20,"\asdf",2000-01-01 +21,"x\x\",2000-01-01 +22,"x\x",2000-01-01 +23,"x\",2000-01-01 +24,"x\",2000-01-01 +25,"\x",2000-01-01 +26,"\N",2000-01-01 +27,"\r\n",2000-01-01 +28,"\\r\\n",2000-01-01 +29,"x\\",2000-01-01 diff --git a/tests/queries/0_stateless/helpers/02112_clean.sh b/tests/queries/0_stateless/helpers/02112_clean.sh index 910c0709955..95af0cede9c 100755 --- a/tests/queries/0_stateless/helpers/02112_clean.sh +++ b/tests/queries/0_stateless/helpers/02112_clean.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash -FILE=${CURDIR}/file_02112 -if [ -f $FILE ]; then - rm $FILE -fi +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +FILE=${CURDIR}/../file_02112 +rm "$FILE" diff --git a/tests/queries/0_stateless/helpers/02112_prepare.sh b/tests/queries/0_stateless/helpers/02112_prepare.sh index 1f371789f86..c2791b01140 100755 --- a/tests/queries/0_stateless/helpers/02112_prepare.sh +++ b/tests/queries/0_stateless/helpers/02112_prepare.sh @@ -1,7 +1,5 @@ #!/usr/bin/env bash -FILE=${CURDIR}/file_02112 -if [ -f $FILE ]; then - rm $FILE -fi -echo "drop table if exists t;create table t(i Int32) engine=Memory; insert into t select 1" >> $FILE +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +FILE=${CURDIR}/../file_02112 +echo "drop table if exists t;create table t(i Int32) engine=Memory; insert into t select 1" > "$FILE" diff --git a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh index ba1245d9679..d025dae5b2e 100755 --- a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh +++ b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh @@ -72,7 +72,7 @@ do if [[ "$expected" != "$actual" ]]; then FAILED+=("$TESTNAME") - echo "Failed! ❌ " + echo "Failed! ❌" echo "Plain:" cat $TESTNAME_RESULT echo "Distributed:" diff --git a/utils/check-style/check-style b/utils/check-style/check-style index c65099f2582..22b5faa0fcb 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -184,7 +184,6 @@ tables_with_database_column=( tests_with_database_column=( $( find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' | grep -vP $EXCLUDE_DIRS | - grep -v -x -e $ROOT_PATH/tests/queries/query_test.py | xargs grep --with-filename $(printf -- "-e %s " "${tables_with_database_column[@]}") | cut -d: -f1 | sort -u ) ) for test_case in "${tests_with_database_column[@]}"; do @@ -299,6 +298,20 @@ for src in "${sources_with_std_cerr_cout[@]}"; do fi done +# Queries with event_date should have yesterday() not today() +# +# NOTE: it is not that accuate, but at least something. +tests_with_event_time_date=( $( + find $ROOT_PATH/tests/queries -iname '*.sql' -or -iname '*.sh' -or -iname '*.py' -or -iname '*.j2' | + grep -vP $EXCLUDE_DIRS | + xargs grep --with-filename -e event_time -e event_date | cut -d: -f1 | sort -u +) ) +for test_case in "${tests_with_event_time_date[@]}"; do + cat "$test_case" | tr '\n' ' ' | grep -q -i -e 'WHERE.*event_date[ ]*=[ ]*today()' -e 'WHERE.*event_date[ ]*=[ ]*today()' && { + echo "event_time/event_date should be filtered using >=yesterday() in $test_case (to avoid flakiness)" + } +done + # Conflict markers find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files" diff --git a/utils/check-style/codespell-ignore-words.list b/utils/check-style/codespell-ignore-words.list index 200b55d112d..d3a7586647c 100644 --- a/utils/check-style/codespell-ignore-words.list +++ b/utils/check-style/codespell-ignore-words.list @@ -6,7 +6,7 @@ nd ect pullrequest pullrequests -thenn ths offsett numer +ue diff --git a/utils/ci/jobs/quick-build/README.md b/utils/ci/jobs/quick-build/README.md deleted file mode 100644 index 803acae0f93..00000000000 --- a/utils/ci/jobs/quick-build/README.md +++ /dev/null @@ -1,5 +0,0 @@ -## Build with debug mode and without many libraries - -This job is intended as first check that build is not broken on wide variety of platforms. - -Results of this build are not intended for production usage. diff --git a/utils/ci/jobs/quick-build/run.sh b/utils/ci/jobs/quick-build/run.sh deleted file mode 100755 index af977d14465..00000000000 --- a/utils/ci/jobs/quick-build/run.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -set -e -x - -# How to run: -# From "ci" directory: -# jobs/quick-build/run.sh -# or: -# ./run-with-docker.sh ubuntu:bionic jobs/quick-build/run.sh - -cd "$(dirname $0)"/../.. - -. default-config - -SOURCES_METHOD=local -COMPILER=clang -COMPILER_INSTALL_METHOD=packages -COMPILER_PACKAGE_VERSION=6.0 -BUILD_METHOD=normal -BUILD_TARGETS=clickhouse -BUILD_TYPE=Debug -ENABLE_EMBEDDED_COMPILER=0 - -CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_JEMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0 -D ENABLE_POCO_MONGODB=0 -D ENABLE_POCO_REDIS=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_ODBC=0 -D ENABLE_MYSQL=0 -D ENABLE_SSL=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_CASSANDRA=0 -D ENABLE_LDAP=0" - -[[ $(uname) == "FreeBSD" ]] && COMPILER_PACKAGE_VERSION=devel && export COMPILER_PATH=/usr/local/bin - -. get-sources.sh -. prepare-toolchain.sh -. install-libraries.sh -. build-normal.sh diff --git a/utils/ci/vagrant-freebsd/.gitignore b/utils/ci/vagrant-freebsd/.gitignore deleted file mode 100644 index 8000dd9db47..00000000000 --- a/utils/ci/vagrant-freebsd/.gitignore +++ /dev/null @@ -1 +0,0 @@ -.vagrant diff --git a/utils/ci/vagrant-freebsd/Vagrantfile b/utils/ci/vagrant-freebsd/Vagrantfile deleted file mode 100644 index c01ae5fa6e2..00000000000 --- a/utils/ci/vagrant-freebsd/Vagrantfile +++ /dev/null @@ -1,3 +0,0 @@ -Vagrant.configure("2") do |config| - config.vm.box = "generic/freebsd11" -end diff --git a/utils/clickhouse-diagnostics/README.md b/utils/clickhouse-diagnostics/README.md new file mode 100644 index 00000000000..991efefdf5a --- /dev/null +++ b/utils/clickhouse-diagnostics/README.md @@ -0,0 +1,2657 @@ +## Installation + +``` +python3 -m pip install -r requirements.txt +``` + +## Usage + +``` +./clickhouse-diagnostics +``` + +Example output: + +### Diagnostics data for host clickhouse01.test_net_3697 +Version: **21.11.8.4** +Timestamp: **2021-12-25 15:34:02** +Uptime: **13 minutes and 51 seconds** +#### ClickHouse configuration +**result** +```XML + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + 1 + + 8123 + 9000 + 9004 + 9005 + 9009 + 4096 + 3 + + false + /path/to/ssl_cert_file + /path/to/ssl_key_file + false + /path/to/ssl_ca_cert_file + deflate + medium + -1 + -1 + false + + + + /etc/clickhouse-server/server.crt + /etc/clickhouse-server/server.key + + none + true + true + sslv2,sslv3 + true + + + true + true + sslv2,sslv3,tlsv1,tlsv1_1 + true + + RejectCertificateHandler + + /etc/clickhouse-server/server.crt + /etc/clickhouse-server/server.key + /etc/clickhouse-server/allCAs.pem + + + 100 + 0 + 10000 + 0.9 + 4194304 + 0 + 8589934592 + 5368709120 + 1000 + 134217728 + 10000 + /var/lib/clickhouse/ + /var/lib/clickhouse/tmp/ + /var/lib/clickhouse/user_files/ + + + + users.xml + + + /var/lib/clickhouse/access/ + + + default + + default + true + false + + + + + localhost + 9000 + + + + + + + localhost + 9000 + + + + + localhost + 9000 + + + + + + + 127.0.0.1 + 9000 + + + + + 127.0.0.2 + 9000 + + + + + + true + + 127.0.0.1 + 9000 + + + + true + + 127.0.0.2 + 9000 + + + + + + + localhost + 9440 + 1 + + + + + + + localhost + 9000 + + + + + localhost + 1 + + + + + + + clickhouse01.test_net_3697 + 9000 + + + + + 3600 + 3600 + 60 + + system + query_log
+ toYYYYMM(event_date) + 7500 +
+ + system + trace_log
+ toYYYYMM(event_date) + 7500 +
+ + system + query_thread_log
+ toYYYYMM(event_date) + 7500 +
+ + system + query_views_log
+ toYYYYMM(event_date) + 7500 +
+ + system + part_log
+ toYYYYMM(event_date) + 7500 +
+ + system + metric_log
+ 7500 + 1000 +
+ + system + asynchronous_metric_log
+ 7000 +
+ + engine MergeTree + partition by toYYYYMM(finish_date) + order by (finish_date, finish_time_us, trace_id) + system + opentelemetry_span_log
+ 7500 +
+ + system + crash_log
+ + 1000 +
+ + system + session_log
+ toYYYYMM(event_date) + 7500 +
+ + *_dictionary.xml + *_function.xml + + + /clickhouse/task_queue/ddl + + + + click_cost + any + + 0 + 3600 + + + 86400 + 60 + + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + /var/lib/clickhouse/format_schemas/ + + + hide encrypt/decrypt arguments + ((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\) + \1(???) + + + + false + false + https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277 + + 0.0.0.0 + 8443 + 9440 + + + zookeeper01.test_net_3697 + 2281 + 1 + + 3000 + /clickhouse01 + ***** + + + clickhouse01 + shard1 + + 0 + + + + /hdd1/ + + + /hdd2/ + + + s3 + http://minio01:9000/cloud-storage-01/data/ + bB5vT2M8yaRv9J14SnAP + ***** + true + + + + + +
+ default +
+ + hdd1 + + + hdd2 + +
+ 0.0 +
+ + +
+ s3 +
+ + default + +
+ 0.0 +
+ + +
+ default +
+ + s3 + +
+ 0.0 +
+
+
+``` +#### Access configuration +**query** +```sql +SHOW ACCESS +``` +**result** +``` +CREATE USER default IDENTIFIED WITH plaintext_password SETTINGS PROFILE default +CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = 'random' +CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1 +CREATE QUOTA default KEYED BY user_name FOR INTERVAL 1 hour TRACKING ONLY TO default +GRANT ALL ON *.* TO default WITH GRANT OPTION +``` +#### Quotas +**query** +```sql +SHOW QUOTA +``` +**result** +``` +Row 1: +────── +quota_name: default +quota_key: default +start_time: 2021-12-25 15:00:00 +end_time: 2021-12-25 16:00:00 +duration: 3600 +queries: 49 +max_queries: ᴺᵁᴸᴸ +query_selects: 49 +max_query_selects: ᴺᵁᴸᴸ +query_inserts: 0 +max_query_inserts: ᴺᵁᴸᴸ +errors: 6 +max_errors: ᴺᵁᴸᴸ +result_rows: 607 +max_result_rows: ᴺᵁᴸᴸ +result_bytes: 237632 +max_result_bytes: ᴺᵁᴸᴸ +read_rows: 1256 +max_read_rows: ᴺᵁᴸᴸ +read_bytes: 778936 +max_read_bytes: ᴺᵁᴸᴸ +execution_time: 0 +max_execution_time: ᴺᵁᴸᴸ +``` +#### Schema +##### Database engines +**query** +```sql +SELECT + engine, + count() "count" +FROM system.databases +GROUP BY engine +``` +**result** +``` +┌─engine─┬─count─┐ +│ Memory │ 2 │ +│ Atomic │ 2 │ +└────────┴───────┘ +``` +##### Databases (top 10 by size) +**query** +```sql +SELECT + name, + engine, + tables, + partitions, + parts, + formatReadableSize(bytes_on_disk) "disk_size" +FROM system.databases db +LEFT JOIN +( + SELECT + database, + uniq(table) "tables", + uniq(table, partition) "partitions", + count() AS parts, + sum(bytes_on_disk) "bytes_on_disk" + FROM system.parts + WHERE active + GROUP BY database +) AS db_stats ON db.name = db_stats.database +ORDER BY bytes_on_disk DESC +LIMIT 10 +``` +**result** +``` +┌─name───────────────┬─engine─┬─tables─┬─partitions─┬─parts─┬─disk_size──┐ +│ system │ Atomic │ 6 │ 6 │ 22 │ 716.29 KiB │ +│ INFORMATION_SCHEMA │ Memory │ 0 │ 0 │ 0 │ 0.00 B │ +│ default │ Atomic │ 0 │ 0 │ 0 │ 0.00 B │ +│ information_schema │ Memory │ 0 │ 0 │ 0 │ 0.00 B │ +└────────────────────┴────────┴────────┴────────────┴───────┴────────────┘ +``` +##### Table engines +**query** +```sql +SELECT + engine, + count() "count" +FROM system.tables +WHERE database != 'system' +GROUP BY engine +``` +**result** +``` +┌─engine─┬─count─┐ +│ View │ 8 │ +└────────┴───────┘ +``` +##### Dictionaries +**query** +```sql +SELECT + source, + type, + status, + count() "count" +FROM system.dictionaries +GROUP BY source, type, status +ORDER BY status DESC, source +``` +**result** +``` + +``` +#### Replication +##### Replicated tables (top 10 by absolute delay) +**query** +```sql +SELECT + database, + table, + is_leader, + is_readonly, + absolute_delay, + queue_size, + inserts_in_queue, + merges_in_queue +FROM system.replicas +ORDER BY absolute_delay DESC +LIMIT 10 +``` +**result** +``` + +``` +##### Replication queue (top 20 oldest tasks) +**query** +```sql +SELECT + database, + table, + replica_name, + position, + node_name, + type, + source_replica, + parts_to_merge, + new_part_name, + create_time, + required_quorum, + is_detach, + is_currently_executing, + num_tries, + last_attempt_time, + last_exception, + concat('time: ', toString(last_postpone_time), ', number: ', toString(num_postponed), ', reason: ', postpone_reason) postpone +FROM system.replication_queue +ORDER BY create_time ASC +LIMIT 20 +``` +**result** +``` + +``` +##### Replicated fetches +**query** +```sql +SELECT + database, + table, + round(elapsed, 1) "elapsed", + round(100 * progress, 1) "progress", + partition_id, + result_part_name, + result_part_path, + total_size_bytes_compressed, + bytes_read_compressed, + source_replica_path, + source_replica_hostname, + source_replica_port, + interserver_scheme, + to_detached, + thread_id +FROM system.replicated_fetches +``` +**result** +``` + +``` +#### Top 10 tables by max parts per partition +**query** +```sql +SELECT + database, + table, + count() "partitions", + sum(part_count) "parts", + max(part_count) "max_parts_per_partition" +FROM +( + SELECT + database, + table, + partition, + count() "part_count" + FROM system.parts + WHERE active + GROUP BY database, table, partition +) partitions +GROUP BY database, table +ORDER BY max_parts_per_partition DESC +LIMIT 10 +``` +**result** +``` +┌─database─┬─table───────────────────┬─partitions─┬─parts─┬─max_parts_per_partition─┐ +│ system │ metric_log │ 1 │ 5 │ 5 │ +│ system │ trace_log │ 1 │ 5 │ 5 │ +│ system │ query_thread_log │ 1 │ 3 │ 3 │ +│ system │ query_log │ 1 │ 3 │ 3 │ +│ system │ asynchronous_metric_log │ 1 │ 3 │ 3 │ +│ system │ session_log │ 1 │ 3 │ 3 │ +└──────────┴─────────────────────────┴────────────┴───────┴─────────────────────────┘ +``` +#### Merges in progress +**query** +```sql +SELECT + database, + table, + round(elapsed, 1) "elapsed", + round(100 * progress, 1) "progress", + is_mutation, + partition_id, +result_part_path, + source_part_paths, +num_parts, + formatReadableSize(total_size_bytes_compressed) "total_size_compressed", + formatReadableSize(bytes_read_uncompressed) "read_uncompressed", + formatReadableSize(bytes_written_uncompressed) "written_uncompressed", + columns_written, +formatReadableSize(memory_usage) "memory_usage", + thread_id +FROM system.merges +``` +**result** +``` + +``` +#### Mutations in progress +**query** +```sql +SELECT + database, + table, + mutation_id, + command, + create_time, +parts_to_do_names, +parts_to_do, + is_done, + latest_failed_part, + latest_fail_time, + latest_fail_reason +FROM system.mutations +WHERE NOT is_done +ORDER BY create_time DESC +``` +**result** +``` + +``` +#### Recent data parts (modification time within last 3 minutes) +**query** +```sql +SELECT + database, + table, + engine, + partition_id, + name, +part_type, +active, + level, +disk_name, +path, + marks, + rows, + bytes_on_disk, + data_compressed_bytes, + data_uncompressed_bytes, + marks_bytes, + modification_time, + remove_time, + refcount, + is_frozen, + min_date, + max_date, + min_time, + max_time, + min_block_number, + max_block_number +FROM system.parts +WHERE modification_time > now() - INTERVAL 3 MINUTE +ORDER BY modification_time DESC +``` +**result** +``` +Row 1: +────── +database: system +table: metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_110_110_0 +part_type: Compact +active: 1 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/9a2/9a2fb3b4-8ced-4c0b-9a2f-b3b48ced4c0b/202112_110_110_0/ +marks: 2 +rows: 8 +bytes_on_disk: 21752 +data_compressed_bytes: 11699 +data_uncompressed_bytes: 19952 +marks_bytes: 10032 +modification_time: 2021-12-25 15:33:59 +remove_time: 1970-01-01 03:00:00 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 110 +max_block_number: 110 + +Row 2: +────── +database: system +table: asynchronous_metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_118_118_0 +part_type: Compact +active: 1 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/78e/78e6eec8-3f71-4724-b8e6-eec83f71a724/202112_118_118_0/ +marks: 2 +rows: 4767 +bytes_on_disk: 10856 +data_compressed_bytes: 10656 +data_uncompressed_bytes: 128675 +marks_bytes: 176 +modification_time: 2021-12-25 15:33:58 +remove_time: 1970-01-01 03:00:00 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 118 +max_block_number: 118 + +Row 3: +────── +database: system +table: asynchronous_metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_117_117_0 +part_type: Compact +active: 1 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/78e/78e6eec8-3f71-4724-b8e6-eec83f71a724/202112_117_117_0/ +marks: 2 +rows: 4767 +bytes_on_disk: 11028 +data_compressed_bytes: 10828 +data_uncompressed_bytes: 128675 +marks_bytes: 176 +modification_time: 2021-12-25 15:33:51 +remove_time: 1970-01-01 03:00:00 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 117 +max_block_number: 117 + +Row 4: +────── +database: system +table: metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_109_109_0 +part_type: Compact +active: 1 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/9a2/9a2fb3b4-8ced-4c0b-9a2f-b3b48ced4c0b/202112_109_109_0/ +marks: 2 +rows: 7 +bytes_on_disk: 21802 +data_compressed_bytes: 11749 +data_uncompressed_bytes: 17458 +marks_bytes: 10032 +modification_time: 2021-12-25 15:33:51 +remove_time: 1970-01-01 03:00:00 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 109 +max_block_number: 109 + +Row 5: +────── +database: system +table: trace_log +engine: MergeTree +partition_id: 202112 +name: 202112_53_53_0 +part_type: Compact +active: 1 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/c0b/c0bc3be3-22d7-45a3-80bc-3be322d7b5a3/202112_53_53_0/ +marks: 2 +rows: 6 +bytes_on_disk: 1057 +data_compressed_bytes: 700 +data_uncompressed_bytes: 1894 +marks_bytes: 336 +modification_time: 2021-12-25 15:33:49 +remove_time: 1970-01-01 03:00:00 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 53 +max_block_number: 53 + +Row 6: +────── +database: system +table: asynchronous_metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_116_116_0 +part_type: Compact +active: 0 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/78e/78e6eec8-3f71-4724-b8e6-eec83f71a724/202112_116_116_0/ +marks: 2 +rows: 4767 +bytes_on_disk: 10911 +data_compressed_bytes: 10711 +data_uncompressed_bytes: 128675 +marks_bytes: 176 +modification_time: 2021-12-25 15:33:44 +remove_time: 2021-12-25 15:33:44 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 116 +max_block_number: 116 + +Row 7: +────── +database: system +table: asynchronous_metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_1_116_23 +part_type: Wide +active: 1 +level: 23 +disk_name: default +path: /var/lib/clickhouse/store/78e/78e6eec8-3f71-4724-b8e6-eec83f71a724/202112_1_116_23/ +marks: 69 +rows: 553071 +bytes_on_disk: 435279 +data_compressed_bytes: 424915 +data_uncompressed_bytes: 13289123 +marks_bytes: 9936 +modification_time: 2021-12-25 15:33:44 +remove_time: 1970-01-01 03:00:00 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 1 +max_block_number: 116 + +Row 8: +────── +database: system +table: metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_108_108_0 +part_type: Compact +active: 1 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/9a2/9a2fb3b4-8ced-4c0b-9a2f-b3b48ced4c0b/202112_108_108_0/ +marks: 2 +rows: 8 +bytes_on_disk: 21833 +data_compressed_bytes: 11780 +data_uncompressed_bytes: 19952 +marks_bytes: 10032 +modification_time: 2021-12-25 15:33:44 +remove_time: 1970-01-01 03:00:00 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 108 +max_block_number: 108 + +Row 9: +─────── +database: system +table: asynchronous_metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_115_115_0 +part_type: Compact +active: 0 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/78e/78e6eec8-3f71-4724-b8e6-eec83f71a724/202112_115_115_0/ +marks: 2 +rows: 4767 +bytes_on_disk: 11146 +data_compressed_bytes: 10946 +data_uncompressed_bytes: 128675 +marks_bytes: 176 +modification_time: 2021-12-25 15:33:37 +remove_time: 2021-12-25 15:33:44 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 115 +max_block_number: 115 + +Row 10: +─────── +database: system +table: metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_107_107_0 +part_type: Compact +active: 1 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/9a2/9a2fb3b4-8ced-4c0b-9a2f-b3b48ced4c0b/202112_107_107_0/ +marks: 2 +rows: 7 +bytes_on_disk: 21996 +data_compressed_bytes: 11943 +data_uncompressed_bytes: 17458 +marks_bytes: 10032 +modification_time: 2021-12-25 15:33:36 +remove_time: 1970-01-01 03:00:00 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 107 +max_block_number: 107 + +Row 11: +─────── +database: system +table: session_log +engine: MergeTree +partition_id: 202112 +name: 202112_3_3_0 +part_type: Compact +active: 1 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/9f3/9f3dd592-781c-48d8-9f3d-d592781c48d8/202112_3_3_0/ +marks: 2 +rows: 44 +bytes_on_disk: 2208 +data_compressed_bytes: 1498 +data_uncompressed_bytes: 5130 +marks_bytes: 688 +modification_time: 2021-12-25 15:33:34 +remove_time: 1970-01-01 03:00:00 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 3 +max_block_number: 3 + +Row 12: +─────── +database: system +table: query_log +engine: MergeTree +partition_id: 202112 +name: 202112_3_3_0 +part_type: Compact +active: 1 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/1a3/1a3ec308-d42e-4f3c-9a3e-c308d42e2f3c/202112_3_3_0/ +marks: 2 +rows: 43 +bytes_on_disk: 17843 +data_compressed_bytes: 15725 +data_uncompressed_bytes: 61869 +marks_bytes: 2096 +modification_time: 2021-12-25 15:33:34 +remove_time: 1970-01-01 03:00:00 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 3 +max_block_number: 3 + +Row 13: +─────── +database: system +table: query_thread_log +engine: MergeTree +partition_id: 202112 +name: 202112_3_3_0 +part_type: Compact +active: 1 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/afa/afa652ef-f91d-4a48-afa6-52eff91daa48/202112_3_3_0/ +marks: 2 +rows: 43 +bytes_on_disk: 11878 +data_compressed_bytes: 10432 +data_uncompressed_bytes: 52339 +marks_bytes: 1424 +modification_time: 2021-12-25 15:33:34 +remove_time: 1970-01-01 03:00:00 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 3 +max_block_number: 3 + +Row 14: +─────── +database: system +table: trace_log +engine: MergeTree +partition_id: 202112 +name: 202112_52_52_0 +part_type: Compact +active: 1 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/c0b/c0bc3be3-22d7-45a3-80bc-3be322d7b5a3/202112_52_52_0/ +marks: 2 +rows: 4 +bytes_on_disk: 1078 +data_compressed_bytes: 721 +data_uncompressed_bytes: 1252 +marks_bytes: 336 +modification_time: 2021-12-25 15:33:34 +remove_time: 1970-01-01 03:00:00 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 52 +max_block_number: 52 + +Row 15: +─────── +database: system +table: asynchronous_metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_114_114_0 +part_type: Compact +active: 0 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/78e/78e6eec8-3f71-4724-b8e6-eec83f71a724/202112_114_114_0/ +marks: 2 +rows: 4767 +bytes_on_disk: 11447 +data_compressed_bytes: 11247 +data_uncompressed_bytes: 128675 +marks_bytes: 176 +modification_time: 2021-12-25 15:33:30 +remove_time: 2021-12-25 15:33:44 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 114 +max_block_number: 114 + +Row 16: +─────── +database: system +table: metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_1_106_21 +part_type: Compact +active: 1 +level: 21 +disk_name: default +path: /var/lib/clickhouse/store/9a2/9a2fb3b4-8ced-4c0b-9a2f-b3b48ced4c0b/202112_1_106_21/ +marks: 2 +rows: 798 +bytes_on_disk: 84853 +data_compressed_bytes: 74798 +data_uncompressed_bytes: 1990212 +marks_bytes: 10032 +modification_time: 2021-12-25 15:33:29 +remove_time: 1970-01-01 03:00:00 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 1 +max_block_number: 106 + +Row 17: +─────── +database: system +table: metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_106_106_0 +part_type: Compact +active: 0 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/9a2/9a2fb3b4-8ced-4c0b-9a2f-b3b48ced4c0b/202112_106_106_0/ +marks: 2 +rows: 8 +bytes_on_disk: 21863 +data_compressed_bytes: 11810 +data_uncompressed_bytes: 19952 +marks_bytes: 10032 +modification_time: 2021-12-25 15:33:28 +remove_time: 2021-12-25 15:33:29 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 106 +max_block_number: 106 + +Row 18: +─────── +database: system +table: asynchronous_metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_113_113_0 +part_type: Compact +active: 0 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/78e/78e6eec8-3f71-4724-b8e6-eec83f71a724/202112_113_113_0/ +marks: 2 +rows: 4767 +bytes_on_disk: 11191 +data_compressed_bytes: 10991 +data_uncompressed_bytes: 128675 +marks_bytes: 176 +modification_time: 2021-12-25 15:33:23 +remove_time: 2021-12-25 15:33:44 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 113 +max_block_number: 113 + +Row 19: +─────── +database: system +table: metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_105_105_0 +part_type: Compact +active: 0 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/9a2/9a2fb3b4-8ced-4c0b-9a2f-b3b48ced4c0b/202112_105_105_0/ +marks: 2 +rows: 7 +bytes_on_disk: 21786 +data_compressed_bytes: 11733 +data_uncompressed_bytes: 17458 +marks_bytes: 10032 +modification_time: 2021-12-25 15:33:21 +remove_time: 2021-12-25 15:33:29 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 105 +max_block_number: 105 + +Row 20: +─────── +database: system +table: asynchronous_metric_log +engine: MergeTree +partition_id: 202112 +name: 202112_112_112_0 +part_type: Compact +active: 0 +level: 0 +disk_name: default +path: /var/lib/clickhouse/store/78e/78e6eec8-3f71-4724-b8e6-eec83f71a724/202112_112_112_0/ +marks: 2 +rows: 4767 +bytes_on_disk: 11281 +data_compressed_bytes: 11081 +data_uncompressed_bytes: 128675 +marks_bytes: 176 +modification_time: 2021-12-25 15:33:16 +remove_time: 2021-12-25 15:33:44 +refcount: 1 +is_frozen: 0 +min_date: 2021-12-25 +max_date: 2021-12-25 +min_time: 1970-01-01 03:00:00 +max_time: 1970-01-01 03:00:00 +min_block_number: 112 +max_block_number: 112 +``` +#### Detached data +##### system.detached_parts +**query** +```sql +SELECT + database, + table, + partition_id, + name, + disk, + reason, + min_block_number, + max_block_number, + level +FROM system.detached_parts +``` +**result** +``` +┌─database─┬─table─┬─partition_id─┬─name─┬─disk─┬─reason─┬─min_block_number─┬─max_block_number─┬─level─┐ +└──────────┴───────┴──────────────┴──────┴──────┴────────┴──────────────────┴──────────────────┴───────┘ +``` +##### Disk space usage +**command** +``` +du -sh -L -c /var/lib/clickhouse/data/*/*/detached/* | sort -rsh +``` +**result** +``` +0 total + +``` +#### Queries +##### Queries in progress (process list) +**query** +```sql +SELECT + elapsed, + query_id, + query, + is_cancelled, + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + formatReadableSize(memory_usage) AS "memory usage", + user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + thread_ids, + ProfileEvents, + Settings + FROM system.processes +ORDER BY elapsed DESC +``` +**result** +``` +Row 1: +────── +elapsed: 0.000785246 +query_id: b51cbc7a-2260-4c9b-b26c-6307b10ad948 +query: SELECT + elapsed, + query_id, + query, + is_cancelled, + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + formatReadableSize(memory_usage) AS "memory usage", + user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + thread_ids, + ProfileEvents, + Settings + FROM system.processes +ORDER BY elapsed DESC FORMAT Vertical + +is_cancelled: 0 +read: 0 rows / 0.00 B +written: 0 rows / 0.00 B +memory usage: 0.00 B +user: default +client: python-requests/2.26.0 +thread_ids: [66] +ProfileEvents: {'Query':1,'SelectQuery':1,'ContextLock':38,'RWLockAcquiredReadLocks':1} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} +``` +##### Top 10 queries by duration +**query** +```sql +SELECT + type, + query_start_time, + query_duration_ms, + query_id, + query_kind, + is_initial_query, + query, + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + concat(toString(result_rows), ' rows / ', formatReadableSize(result_bytes)) AS result, + formatReadableSize(memory_usage) AS "memory usage", + exception, + '\n' || stack_trace AS stack_trace, + user, + initial_user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + client_hostname, + databases, + tables, + columns, + used_aggregate_functions, + used_aggregate_function_combinators, + used_database_engines, + used_data_type_families, + used_dictionaries, + used_formats, + used_functions, + used_storages, + used_table_functions, + thread_ids, + ProfileEvents, + Settings + FROM system.query_log +WHERE type != 'QueryStart' + AND event_date >= today() - 1 + AND event_time >= now() - INTERVAL 1 DAY +ORDER BY query_duration_ms DESC +LIMIT 10 +``` +**result** +``` +Row 1: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:25:01 +query_duration_ms: 60 +query_id: f72e1120-cc66-434c-9809-3a99077ed842 +query_kind: Select +is_initial_query: 1 +query: SELECT + database, + table, + count() "partitions", + sum(part_count) "parts", + max(part_count) "max_parts_per_partition" +FROM +( + SELECT + database, + table, + partition, + count() "part_count" + FROM system.parts + WHERE active + GROUP BY database, table, partition +) partitions +GROUP BY database, table +ORDER BY max_parts_per_partition DESC +LIMIT 10 FORMAT PrettyCompactNoEscapes + +read: 5 rows / 262.00 B +written: 0 rows / 0.00 B +result: 3 rows / 488.00 B +memory usage: 0.00 B +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.parts'] +columns: ['system.parts.active','system.parts.database','system.parts.partition','system.parts.table'] +used_aggregate_functions: ['count','max','sum'] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['PrettyCompactNoEscapes'] +used_functions: [] +used_storages: [] +used_table_functions: [] +thread_ids: [66] +ProfileEvents: {'Query':1,'SelectQuery':1,'ArenaAllocChunks':2,'ArenaAllocBytes':8192,'CompileFunction':1,'CompileExpressionsMicroseconds':52574,'CompileExpressionsBytes':8192,'SelectedRows':5,'SelectedBytes':262,'ContextLock':58,'RWLockAcquiredReadLocks':6,'RealTimeMicroseconds':61493,'UserTimeMicroseconds':34154,'SystemTimeMicroseconds':9874,'SoftPageFaults':170,'HardPageFaults':33,'OSIOWaitMicroseconds':10000,'OSCPUWaitMicroseconds':2433,'OSCPUVirtualTimeMicroseconds':43706,'OSReadBytes':3080192,'OSWriteBytes':4096,'OSReadChars':863,'OSWriteChars':5334} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 2: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:26:26 +query_duration_ms: 12 +query_id: eabd7483-70df-4d60-a668-d8961416e3fb +query_kind: Select +is_initial_query: 1 +query: SELECT + type, + query_start_time, + query_duration_ms, + query_id, + query_kind, + is_initial_query, + query, + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + concat(toString(result_rows), ' rows / ', formatReadableSize(result_bytes)) AS result, + formatReadableSize(memory_usage) AS "memory usage", + exception, + '\n' || stack_trace AS stack_trace, + user, + initial_user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + client_hostname, + databases, + tables, + columns, + used_aggregate_functions, + used_aggregate_function_combinators, + used_database_engines, + used_data_type_families, + used_dictionaries, + used_formats, + used_functions, + used_storages, + used_table_functions, + thread_ids, + ProfileEvents, + Settings + FROM system.query_log +WHERE type != 'QueryStart' + AND event_date >= today() - 1 + AND event_time >= now() - INTERVAL 1 DAY +ORDER BY query_duration_ms DESC +LIMIT 10 FORMAT Vertical + +read: 40 rows / 67.42 KiB +written: 0 rows / 0.00 B +result: 10 rows / 41.23 KiB +memory usage: 0.00 B +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.query_log'] +columns: ['system.query_log.ProfileEvents','system.query_log.Settings','system.query_log.client_hostname','system.query_log.client_name','system.query_log.client_version_major','system.query_log.client_version_minor','system.query_log.client_version_patch','system.query_log.columns','system.query_log.databases','system.query_log.event_date','system.query_log.event_time','system.query_log.exception','system.query_log.http_user_agent','system.query_log.initial_user','system.query_log.is_initial_query','system.query_log.memory_usage','system.query_log.query','system.query_log.query_duration_ms','system.query_log.query_id','system.query_log.query_kind','system.query_log.query_start_time','system.query_log.read_bytes','system.query_log.read_rows','system.query_log.result_bytes','system.query_log.result_rows','system.query_log.stack_trace','system.query_log.tables','system.query_log.thread_ids','system.query_log.type','system.query_log.used_aggregate_function_combinators','system.query_log.used_aggregate_functions','system.query_log.used_data_type_families','system.query_log.used_database_engines','system.query_log.used_dictionaries','system.query_log.used_formats','system.query_log.used_functions','system.query_log.used_storages','system.query_log.used_table_functions','system.query_log.user','system.query_log.written_bytes','system.query_log.written_rows'] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['Vertical'] +used_functions: ['empty','and','now','concat','today','toIntervalDay','formatReadableSize','minus','greaterOrEquals','multiIf','toString','subtractDays','notEquals'] +used_storages: [] +used_table_functions: [] +thread_ids: [66] +ProfileEvents: {'Query':1,'SelectQuery':1,'FileOpen':2,'Seek':3,'ReadBufferFromFileDescriptorRead':10,'ReadBufferFromFileDescriptorReadBytes':16873,'ReadCompressedBytes':12855,'CompressedReadBufferBlocks':41,'CompressedReadBufferBytes':61376,'IOBufferAllocs':5,'IOBufferAllocBytes':26594,'FunctionExecute':28,'MarkCacheHits':1,'MarkCacheMisses':1,'CreatedReadBufferOrdinary':3,'DiskReadElapsedMicroseconds':30,'SelectedParts':1,'SelectedRanges':1,'SelectedMarks':1,'SelectedRows':40,'SelectedBytes':69039,'ContextLock':342,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':14451,'UserTimeMicroseconds':10009,'SystemTimeMicroseconds':1515,'SoftPageFaults':44,'OSCPUWaitMicroseconds':3050,'OSCPUVirtualTimeMicroseconds':11523,'OSReadChars':17311,'OSWriteChars':7288} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 3: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:33:29 +query_duration_ms: 12 +query_id: d9557845-5b5e-44ef-befa-55f837065d00 +query_kind: Select +is_initial_query: 1 +query: SELECT + type, + query_start_time, + query_duration_ms, + query_id, + query_kind, + is_initial_query, + query, + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + concat(toString(result_rows), ' rows / ', formatReadableSize(result_bytes)) AS result, + formatReadableSize(memory_usage) AS "memory usage", + exception, + '\n' || stack_trace AS stack_trace, + user, + initial_user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + client_hostname, + databases, + tables, + columns, + used_aggregate_functions, + used_aggregate_function_combinators, + used_database_engines, + used_data_type_families, + used_dictionaries, + used_formats, + used_functions, + used_storages, + used_table_functions, + thread_ids, + ProfileEvents, + Settings + FROM system.query_log +WHERE type != 'QueryStart' + AND event_date >= today() - 1 + AND event_time >= now() - INTERVAL 1 DAY +ORDER BY query_duration_ms DESC +LIMIT 10 FORMAT Vertical + +read: 83 rows / 130.00 KiB +written: 0 rows / 0.00 B +result: 10 rows / 183.10 KiB +memory usage: 0.00 B +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.query_log'] +columns: ['system.query_log.ProfileEvents','system.query_log.Settings','system.query_log.client_hostname','system.query_log.client_name','system.query_log.client_version_major','system.query_log.client_version_minor','system.query_log.client_version_patch','system.query_log.columns','system.query_log.databases','system.query_log.event_date','system.query_log.event_time','system.query_log.exception','system.query_log.http_user_agent','system.query_log.initial_user','system.query_log.is_initial_query','system.query_log.memory_usage','system.query_log.query','system.query_log.query_duration_ms','system.query_log.query_id','system.query_log.query_kind','system.query_log.query_start_time','system.query_log.read_bytes','system.query_log.read_rows','system.query_log.result_bytes','system.query_log.result_rows','system.query_log.stack_trace','system.query_log.tables','system.query_log.thread_ids','system.query_log.type','system.query_log.used_aggregate_function_combinators','system.query_log.used_aggregate_functions','system.query_log.used_data_type_families','system.query_log.used_database_engines','system.query_log.used_dictionaries','system.query_log.used_formats','system.query_log.used_functions','system.query_log.used_storages','system.query_log.used_table_functions','system.query_log.user','system.query_log.written_bytes','system.query_log.written_rows'] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['Vertical'] +used_functions: ['empty','and','now','concat','today','toIntervalDay','formatReadableSize','minus','greaterOrEquals','multiIf','toString','subtractDays','notEquals'] +used_storages: [] +used_table_functions: [] +thread_ids: [66,283,225,281,282] +ProfileEvents: {'Query':1,'SelectQuery':1,'FileOpen':3,'Seek':6,'ReadBufferFromFileDescriptorRead':18,'ReadBufferFromFileDescriptorReadBytes':32140,'ReadCompressedBytes':25892,'CompressedReadBufferBlocks':82,'CompressedReadBufferBytes':116215,'IOBufferAllocs':9,'IOBufferAllocBytes':47368,'FunctionExecute':51,'MarkCacheHits':3,'MarkCacheMisses':1,'CreatedReadBufferOrdinary':5,'DiskReadElapsedMicroseconds':13,'SelectedParts':2,'SelectedRanges':2,'SelectedMarks':2,'SelectedRows':83,'SelectedBytes':133125,'ContextLock':351,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':19368,'UserTimeMicroseconds':12036,'SystemTimeMicroseconds':2047,'SoftPageFaults':42,'OSCPUWaitMicroseconds':710,'OSCPUVirtualTimeMicroseconds':13623,'OSWriteBytes':4096,'OSReadChars':34225,'OSWriteChars':8142} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 4: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:33:29 +query_duration_ms: 11 +query_id: bae8a338-eee9-406b-80d2-4596af2ba31f +query_kind: Select +is_initial_query: 1 +query: SELECT + name, + engine, + tables, + partitions, + parts, + formatReadableSize(bytes_on_disk) "disk_size" +FROM system.databases db +LEFT JOIN +( + SELECT + database, + uniq(table) "tables", + uniq(table, partition) "partitions", + count() AS parts, + sum(bytes_on_disk) "bytes_on_disk" + FROM system.parts + WHERE active + GROUP BY database +) AS db_stats ON db.name = db_stats.database +ORDER BY bytes_on_disk DESC +LIMIT 10 FORMAT PrettyCompactNoEscapes + +read: 17 rows / 1.31 KiB +written: 0 rows / 0.00 B +result: 4 rows / 640.00 B +memory usage: 0.00 B +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.databases','system.parts'] +columns: ['system.databases.engine','system.databases.name','system.parts.active','system.parts.bytes_on_disk','system.parts.database','system.parts.partition','system.parts.table'] +used_aggregate_functions: ['count','sum','uniq'] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['PrettyCompactNoEscapes'] +used_functions: ['formatReadableSize'] +used_storages: [] +used_table_functions: [] +thread_ids: [66] +ProfileEvents: {'Query':1,'SelectQuery':1,'ArenaAllocChunks':5,'ArenaAllocBytes':20480,'FunctionExecute':1,'SelectedRows':17,'SelectedBytes':1345,'ContextLock':69,'RWLockAcquiredReadLocks':9,'RealTimeMicroseconds':12225,'UserTimeMicroseconds':10731,'SystemTimeMicroseconds':1146,'SoftPageFaults':2,'OSCPUWaitMicroseconds':720,'OSCPUVirtualTimeMicroseconds':11876,'OSWriteBytes':4096,'OSReadChars':438,'OSWriteChars':8938} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 5: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:26:26 +query_duration_ms: 9 +query_id: f0c62bc7-36da-4542-a3d5-68a40c1c4b48 +query_kind: Select +is_initial_query: 1 +query: SELECT + type, + query_start_time, + query_duration_ms, + query_id, + query_kind, + is_initial_query, + query, + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + concat(toString(result_rows), ' rows / ', formatReadableSize(result_bytes)) AS result, + formatReadableSize(memory_usage) AS "memory usage", + exception, + '\n' || stack_trace AS stack_trace, + user, + initial_user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + client_hostname, + databases, + tables, + columns, + used_aggregate_functions, + used_aggregate_function_combinators, + used_database_engines, + used_data_type_families, + used_dictionaries, + used_formats, + used_functions, + used_storages, + used_table_functions, + thread_ids, + ProfileEvents, + Settings + FROM system.query_log +WHERE type != 'QueryStart' + AND event_date >= today() - 1 + AND event_time >= now() - INTERVAL 1 DAY + AND exception != '' +ORDER BY query_start_time DESC +LIMIT 10 FORMAT Vertical + +read: 40 rows / 67.42 KiB +written: 0 rows / 0.00 B +result: 4 rows / 43.13 KiB +memory usage: 0.00 B +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.query_log'] +columns: ['system.query_log.ProfileEvents','system.query_log.Settings','system.query_log.client_hostname','system.query_log.client_name','system.query_log.client_version_major','system.query_log.client_version_minor','system.query_log.client_version_patch','system.query_log.columns','system.query_log.databases','system.query_log.event_date','system.query_log.event_time','system.query_log.exception','system.query_log.http_user_agent','system.query_log.initial_user','system.query_log.is_initial_query','system.query_log.memory_usage','system.query_log.query','system.query_log.query_duration_ms','system.query_log.query_id','system.query_log.query_kind','system.query_log.query_start_time','system.query_log.read_bytes','system.query_log.read_rows','system.query_log.result_bytes','system.query_log.result_rows','system.query_log.stack_trace','system.query_log.tables','system.query_log.thread_ids','system.query_log.type','system.query_log.used_aggregate_function_combinators','system.query_log.used_aggregate_functions','system.query_log.used_data_type_families','system.query_log.used_database_engines','system.query_log.used_dictionaries','system.query_log.used_formats','system.query_log.used_functions','system.query_log.used_storages','system.query_log.used_table_functions','system.query_log.user','system.query_log.written_bytes','system.query_log.written_rows'] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['Vertical'] +used_functions: ['empty','and','now','concat','today','toIntervalDay','formatReadableSize','minus','greaterOrEquals','multiIf','toString','subtractDays','notEquals'] +used_storages: [] +used_table_functions: [] +thread_ids: [66] +ProfileEvents: {'Query':1,'SelectQuery':1,'FileOpen':1,'Seek':3,'ReadBufferFromFileDescriptorRead':8,'ReadBufferFromFileDescriptorReadBytes':15561,'ReadCompressedBytes':12855,'CompressedReadBufferBlocks':41,'CompressedReadBufferBytes':61376,'IOBufferAllocs':4,'IOBufferAllocBytes':25506,'FunctionExecute':31,'MarkCacheHits':2,'CreatedReadBufferOrdinary':2,'DiskReadElapsedMicroseconds':16,'SelectedParts':1,'SelectedRanges':1,'SelectedMarks':1,'SelectedRows':40,'SelectedBytes':69039,'ContextLock':361,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':11353,'UserTimeMicroseconds':8910,'SystemTimeMicroseconds':533,'SoftPageFaults':7,'HardPageFaults':2,'OSCPUWaitMicroseconds':1117,'OSCPUVirtualTimeMicroseconds':9443,'OSReadBytes':16384,'OSWriteBytes':4096,'OSReadChars':15999,'OSWriteChars':7714,'QueryProfilerRuns':1} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 6: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:33:29 +query_duration_ms: 8 +query_id: 72f3f9de-d17c-456b-8316-d494bea2096a +query_kind: Select +is_initial_query: 1 +query: SELECT name FROM system.tables WHERE database = 'system' FORMAT JSONCompact + +read: 74 rows / 2.61 KiB +written: 0 rows / 0.00 B +result: 74 rows / 2.00 KiB +memory usage: 0.00 B +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.tables'] +columns: ['system.tables.database','system.tables.name'] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['JSONCompact'] +used_functions: ['equals'] +used_storages: [] +used_table_functions: [] +thread_ids: [66] +ProfileEvents: {'Query':1,'SelectQuery':1,'IOBufferAllocs':2,'IOBufferAllocBytes':8192,'FunctionExecute':4,'SelectedRows':74,'SelectedBytes':2675,'ContextLock':23,'RWLockAcquiredReadLocks':75,'RealTimeMicroseconds':9190,'UserTimeMicroseconds':6468,'SystemTimeMicroseconds':517,'OSCPUWaitMicroseconds':2237,'OSCPUVirtualTimeMicroseconds':6984,'OSReadChars':438,'OSWriteChars':1270} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 7: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:33:29 +query_duration_ms: 8 +query_id: d55da87f-b030-4b5d-95fc-f9103ce58601 +query_kind: Select +is_initial_query: 1 +query: SELECT + type, + query_start_time, + query_duration_ms, + query_id, + query_kind, + is_initial_query, + query, + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + concat(toString(result_rows), ' rows / ', formatReadableSize(result_bytes)) AS result, + formatReadableSize(memory_usage) AS "memory usage", + exception, + '\n' || stack_trace AS stack_trace, + user, + initial_user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + client_hostname, + databases, + tables, + columns, + used_aggregate_functions, + used_aggregate_function_combinators, + used_database_engines, + used_data_type_families, + used_dictionaries, + used_formats, + used_functions, + used_storages, + used_table_functions, + thread_ids, + ProfileEvents, + Settings + FROM system.query_log +WHERE type != 'QueryStart' + AND event_date >= today() - 1 + AND event_time >= now() - INTERVAL 1 DAY +ORDER BY memory_usage DESC +LIMIT 10 FORMAT Vertical + +read: 83 rows / 130.00 KiB +written: 0 rows / 0.00 B +result: 10 rows / 178.41 KiB +memory usage: 0.00 B +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.query_log'] +columns: ['system.query_log.ProfileEvents','system.query_log.Settings','system.query_log.client_hostname','system.query_log.client_name','system.query_log.client_version_major','system.query_log.client_version_minor','system.query_log.client_version_patch','system.query_log.columns','system.query_log.databases','system.query_log.event_date','system.query_log.event_time','system.query_log.exception','system.query_log.http_user_agent','system.query_log.initial_user','system.query_log.is_initial_query','system.query_log.memory_usage','system.query_log.query','system.query_log.query_duration_ms','system.query_log.query_id','system.query_log.query_kind','system.query_log.query_start_time','system.query_log.read_bytes','system.query_log.read_rows','system.query_log.result_bytes','system.query_log.result_rows','system.query_log.stack_trace','system.query_log.tables','system.query_log.thread_ids','system.query_log.type','system.query_log.used_aggregate_function_combinators','system.query_log.used_aggregate_functions','system.query_log.used_data_type_families','system.query_log.used_database_engines','system.query_log.used_dictionaries','system.query_log.used_formats','system.query_log.used_functions','system.query_log.used_storages','system.query_log.used_table_functions','system.query_log.user','system.query_log.written_bytes','system.query_log.written_rows'] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['Vertical'] +used_functions: ['empty','and','now','concat','today','toIntervalDay','formatReadableSize','minus','greaterOrEquals','multiIf','toString','subtractDays','notEquals'] +used_storages: [] +used_table_functions: [] +thread_ids: [66,283,283,225,282] +ProfileEvents: {'Query':1,'SelectQuery':1,'FileOpen':2,'Seek':6,'ReadBufferFromFileDescriptorRead':16,'ReadBufferFromFileDescriptorReadBytes':30044,'ReadCompressedBytes':25892,'CompressedReadBufferBlocks':82,'CompressedReadBufferBytes':116215,'IOBufferAllocs':8,'IOBufferAllocBytes':45272,'FunctionExecute':51,'MarkCacheHits':4,'CreatedReadBufferOrdinary':4,'SelectedParts':2,'SelectedRanges':2,'SelectedMarks':2,'SelectedRows':83,'SelectedBytes':133125,'ContextLock':351,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':12416,'UserTimeMicroseconds':7727,'SystemTimeMicroseconds':1247,'SoftPageFaults':41,'OSCPUWaitMicroseconds':1058,'OSCPUVirtualTimeMicroseconds':9018,'OSWriteBytes':4096,'OSReadChars':32137,'OSWriteChars':8108} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 8: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:33:29 +query_duration_ms: 8 +query_id: cc2a0e7a-3b9b-47d2-9255-009c62584bc4 +query_kind: Select +is_initial_query: 1 +query: SELECT + type, + query_start_time, + query_duration_ms, + query_id, + query_kind, + is_initial_query, + query, + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + concat(toString(result_rows), ' rows / ', formatReadableSize(result_bytes)) AS result, + formatReadableSize(memory_usage) AS "memory usage", + exception, + '\n' || stack_trace AS stack_trace, + user, + initial_user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + client_hostname, + databases, + tables, + columns, + used_aggregate_functions, + used_aggregate_function_combinators, + used_database_engines, + used_data_type_families, + used_dictionaries, + used_formats, + used_functions, + used_storages, + used_table_functions, + thread_ids, + ProfileEvents, + Settings + FROM system.query_log +WHERE type != 'QueryStart' + AND event_date >= today() - 1 + AND event_time >= now() - INTERVAL 1 DAY + AND exception != '' +ORDER BY query_start_time DESC +LIMIT 10 FORMAT Vertical + +read: 83 rows / 130.00 KiB +written: 0 rows / 0.00 B +result: 5 rows / 57.80 KiB +memory usage: 0.00 B +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.query_log'] +columns: ['system.query_log.ProfileEvents','system.query_log.Settings','system.query_log.client_hostname','system.query_log.client_name','system.query_log.client_version_major','system.query_log.client_version_minor','system.query_log.client_version_patch','system.query_log.columns','system.query_log.databases','system.query_log.event_date','system.query_log.event_time','system.query_log.exception','system.query_log.http_user_agent','system.query_log.initial_user','system.query_log.is_initial_query','system.query_log.memory_usage','system.query_log.query','system.query_log.query_duration_ms','system.query_log.query_id','system.query_log.query_kind','system.query_log.query_start_time','system.query_log.read_bytes','system.query_log.read_rows','system.query_log.result_bytes','system.query_log.result_rows','system.query_log.stack_trace','system.query_log.tables','system.query_log.thread_ids','system.query_log.type','system.query_log.used_aggregate_function_combinators','system.query_log.used_aggregate_functions','system.query_log.used_data_type_families','system.query_log.used_database_engines','system.query_log.used_dictionaries','system.query_log.used_formats','system.query_log.used_functions','system.query_log.used_storages','system.query_log.used_table_functions','system.query_log.user','system.query_log.written_bytes','system.query_log.written_rows'] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['Vertical'] +used_functions: ['empty','and','now','concat','today','toIntervalDay','formatReadableSize','minus','greaterOrEquals','multiIf','toString','subtractDays','notEquals'] +used_storages: [] +used_table_functions: [] +thread_ids: [66,281,283,282,225] +ProfileEvents: {'Query':1,'SelectQuery':1,'FileOpen':2,'Seek':6,'ReadBufferFromFileDescriptorRead':16,'ReadBufferFromFileDescriptorReadBytes':31464,'ReadCompressedBytes':25892,'CompressedReadBufferBlocks':82,'CompressedReadBufferBytes':116215,'IOBufferAllocs':8,'IOBufferAllocBytes':46860,'FunctionExecute':56,'MarkCacheHits':4,'CreatedReadBufferOrdinary':4,'SelectedParts':2,'SelectedRanges':2,'SelectedMarks':2,'SelectedRows':83,'SelectedBytes':133125,'ContextLock':370,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':13096,'UserTimeMicroseconds':9503,'SystemTimeMicroseconds':195,'SoftPageFaults':23,'OSCPUWaitMicroseconds':1380,'OSCPUVirtualTimeMicroseconds':9661,'OSWriteBytes':4096,'OSReadChars':33567,'OSWriteChars':8310} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 9: +─────── +type: QueryFinish +query_start_time: 2021-12-25 15:25:01 +query_duration_ms: 8 +query_id: a3d717fd-c43f-4723-a18d-557c733299f6 +query_kind: Select +is_initial_query: 1 +query: SELECT + name, + engine, + tables, + partitions, + parts, + formatReadableSize(bytes_on_disk) "disk_size" +FROM system.databases db +LEFT JOIN +( + SELECT + database, + uniq(table) "tables", + uniq(table, partition) "partitions", + count() AS parts, + sum(bytes_on_disk) "bytes_on_disk" + FROM system.parts + WHERE active + GROUP BY database +) AS db_stats ON db.name = db_stats.database +ORDER BY bytes_on_disk DESC +LIMIT 10 FORMAT PrettyCompactNoEscapes + +read: 9 rows / 845.00 B +written: 0 rows / 0.00 B +result: 4 rows / 640.00 B +memory usage: 0.00 B +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.databases','system.parts'] +columns: ['system.databases.engine','system.databases.name','system.parts.active','system.parts.bytes_on_disk','system.parts.database','system.parts.partition','system.parts.table'] +used_aggregate_functions: ['count','sum','uniq'] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['PrettyCompactNoEscapes'] +used_functions: ['formatReadableSize'] +used_storages: [] +used_table_functions: [] +thread_ids: [66] +ProfileEvents: {'Query':1,'SelectQuery':1,'ArenaAllocChunks':5,'ArenaAllocBytes':20480,'FunctionExecute':1,'SelectedRows':9,'SelectedBytes':845,'ContextLock':69,'RWLockAcquiredReadLocks':6,'RealTimeMicroseconds':9090,'UserTimeMicroseconds':4654,'SystemTimeMicroseconds':1171,'SoftPageFaults':8,'HardPageFaults':2,'OSCPUWaitMicroseconds':2126,'OSCPUVirtualTimeMicroseconds':5824,'OSReadBytes':212992,'OSWriteBytes':4096,'OSReadChars':427,'OSWriteChars':8936} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 10: +─────── +type: QueryFinish +query_start_time: 2021-12-25 15:26:26 +query_duration_ms: 7 +query_id: 49305759-0f08-4d5a-81d8-c1a11cfc0eb4 +query_kind: Select +is_initial_query: 1 +query: SELECT + type, + query_start_time, + query_duration_ms, + query_id, + query_kind, + is_initial_query, + query, + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + concat(toString(result_rows), ' rows / ', formatReadableSize(result_bytes)) AS result, + formatReadableSize(memory_usage) AS "memory usage", + exception, + '\n' || stack_trace AS stack_trace, + user, + initial_user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + client_hostname, + databases, + tables, + columns, + used_aggregate_functions, + used_aggregate_function_combinators, + used_database_engines, + used_data_type_families, + used_dictionaries, + used_formats, + used_functions, + used_storages, + used_table_functions, + thread_ids, + ProfileEvents, + Settings + FROM system.query_log +WHERE type != 'QueryStart' + AND event_date >= today() - 1 + AND event_time >= now() - INTERVAL 1 DAY +ORDER BY memory_usage DESC +LIMIT 10 FORMAT Vertical + +read: 40 rows / 67.42 KiB +written: 0 rows / 0.00 B +result: 10 rows / 57.95 KiB +memory usage: 0.00 B +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.query_log'] +columns: ['system.query_log.ProfileEvents','system.query_log.Settings','system.query_log.client_hostname','system.query_log.client_name','system.query_log.client_version_major','system.query_log.client_version_minor','system.query_log.client_version_patch','system.query_log.columns','system.query_log.databases','system.query_log.event_date','system.query_log.event_time','system.query_log.exception','system.query_log.http_user_agent','system.query_log.initial_user','system.query_log.is_initial_query','system.query_log.memory_usage','system.query_log.query','system.query_log.query_duration_ms','system.query_log.query_id','system.query_log.query_kind','system.query_log.query_start_time','system.query_log.read_bytes','system.query_log.read_rows','system.query_log.result_bytes','system.query_log.result_rows','system.query_log.stack_trace','system.query_log.tables','system.query_log.thread_ids','system.query_log.type','system.query_log.used_aggregate_function_combinators','system.query_log.used_aggregate_functions','system.query_log.used_data_type_families','system.query_log.used_database_engines','system.query_log.used_dictionaries','system.query_log.used_formats','system.query_log.used_functions','system.query_log.used_storages','system.query_log.used_table_functions','system.query_log.user','system.query_log.written_bytes','system.query_log.written_rows'] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['Vertical'] +used_functions: ['empty','and','now','concat','today','toIntervalDay','formatReadableSize','minus','greaterOrEquals','multiIf','toString','subtractDays','notEquals'] +used_storages: [] +used_table_functions: [] +thread_ids: [66] +ProfileEvents: {'Query':1,'SelectQuery':1,'FileOpen':1,'Seek':3,'ReadBufferFromFileDescriptorRead':8,'ReadBufferFromFileDescriptorReadBytes':14777,'ReadCompressedBytes':12855,'CompressedReadBufferBlocks':41,'CompressedReadBufferBytes':61376,'IOBufferAllocs':4,'IOBufferAllocBytes':24498,'FunctionExecute':28,'MarkCacheHits':2,'CreatedReadBufferOrdinary':2,'DiskReadElapsedMicroseconds':16,'SelectedParts':1,'SelectedRanges':1,'SelectedMarks':1,'SelectedRows':40,'SelectedBytes':69039,'ContextLock':342,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':9159,'UserTimeMicroseconds':4713,'SystemTimeMicroseconds':1942,'SoftPageFaults':19,'OSCPUWaitMicroseconds':2421,'OSCPUVirtualTimeMicroseconds':6655,'OSWriteBytes':4096,'OSReadChars':15215,'OSWriteChars':7278} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} +``` +##### Top 10 queries by memory usage +**query** +```sql +SELECT + type, + query_start_time, + query_duration_ms, + query_id, + query_kind, + is_initial_query, + query, + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + concat(toString(result_rows), ' rows / ', formatReadableSize(result_bytes)) AS result, + formatReadableSize(memory_usage) AS "memory usage", + exception, + '\n' || stack_trace AS stack_trace, + user, + initial_user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + client_hostname, + databases, + tables, + columns, + used_aggregate_functions, + used_aggregate_function_combinators, + used_database_engines, + used_data_type_families, + used_dictionaries, + used_formats, + used_functions, + used_storages, + used_table_functions, + thread_ids, + ProfileEvents, + Settings + FROM system.query_log +WHERE type != 'QueryStart' + AND event_date >= today() - 1 + AND event_time >= now() - INTERVAL 1 DAY +ORDER BY memory_usage DESC +LIMIT 10 +``` +**result** +``` +Row 1: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:26:25 +query_duration_ms: 0 +query_id: c6b6a96c-d5c5-4406-98cd-80857a8412d4 +query_kind: +is_initial_query: 1 +query: SHOW ACCESS FORMAT TSVRaw + +read: 5 rows / 405.00 B +written: 0 rows / 0.00 B +result: 5 rows / 4.50 KiB +memory usage: 1.82 KiB +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: [] +tables: [] +columns: [] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['TSVRaw'] +used_functions: [] +used_storages: [] +used_table_functions: [] +thread_ids: [66,283,225,281] +ProfileEvents: {'Query':1,'IOBufferAllocs':3,'IOBufferAllocBytes':3145728,'SelectedRows':5,'SelectedBytes':405,'ContextLock':8,'RealTimeMicroseconds':959,'UserTimeMicroseconds':452,'SystemTimeMicroseconds':238,'OSCPUWaitMicroseconds':90,'OSCPUVirtualTimeMicroseconds':690,'OSWriteBytes':4096,'OSReadChars':846,'OSWriteChars':880} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 2: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:33:29 +query_duration_ms: 2 +query_id: 253362ba-40a1-4593-a4cc-30d3dfdfe0ab +query_kind: +is_initial_query: 1 +query: SHOW ACCESS FORMAT TSVRaw + +read: 5 rows / 405.00 B +written: 0 rows / 0.00 B +result: 5 rows / 4.50 KiB +memory usage: 1.82 KiB +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: [] +tables: [] +columns: [] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['TSVRaw'] +used_functions: [] +used_storages: [] +used_table_functions: [] +thread_ids: [66,225,283,282] +ProfileEvents: {'Query':1,'IOBufferAllocs':3,'IOBufferAllocBytes':3145728,'SelectedRows':5,'SelectedBytes':405,'ContextLock':8,'RealTimeMicroseconds':4687,'UserTimeMicroseconds':2171,'SystemTimeMicroseconds':1264,'OSCPUWaitMicroseconds':513,'OSCPUVirtualTimeMicroseconds':3335,'OSReadChars':848,'OSWriteChars':880} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 3: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:25:01 +query_duration_ms: 1 +query_id: 61b20c8c-ca63-4384-adb4-ce7765d77389 +query_kind: +is_initial_query: 1 +query: SHOW ACCESS FORMAT TSVRaw + +read: 5 rows / 405.00 B +written: 0 rows / 0.00 B +result: 5 rows / 4.50 KiB +memory usage: 1.82 KiB +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: [] +tables: [] +columns: [] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['TSVRaw'] +used_functions: [] +used_storages: [] +used_table_functions: [] +thread_ids: [66,225,281,283] +ProfileEvents: {'Query':1,'IOBufferAllocs':3,'IOBufferAllocBytes':3145728,'SelectedRows':5,'SelectedBytes':405,'ContextLock':8,'RealTimeMicroseconds':3442,'UserTimeMicroseconds':715,'SystemTimeMicroseconds':485,'SoftPageFaults':1,'OSCPUWaitMicroseconds':443,'OSCPUVirtualTimeMicroseconds':1170,'OSReadChars':833,'OSWriteChars':880} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 4: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:26:25 +query_duration_ms: 1 +query_id: 13ebdab7-e368-4f9f-b47e-023dbd9e91ce +query_kind: Select +is_initial_query: 1 +query: +SELECT formatReadableTimeDelta(uptime()) + + +read: 1 rows / 1.00 B +written: 0 rows / 0.00 B +result: 1 rows / 128.00 B +memory usage: 1.49 KiB +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.one'] +columns: ['system.one.dummy'] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['TabSeparated'] +used_functions: ['uptime','formatReadableTimeDelta'] +used_storages: [] +used_table_functions: [] +thread_ids: [66,283,282,225,281] +ProfileEvents: {'Query':1,'SelectQuery':1,'IOBufferAllocs':3,'IOBufferAllocBytes':3145728,'SelectedRows':1,'SelectedBytes':1,'ContextLock':17,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':1613,'UserTimeMicroseconds':708,'SystemTimeMicroseconds':274,'SoftPageFaults':3,'OSCPUWaitMicroseconds':2,'OSCPUVirtualTimeMicroseconds':980,'OSReadChars':846,'OSWriteChars':1190} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 5: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:33:29 +query_duration_ms: 2 +query_id: ff330183-854b-46bc-a548-30e12a7bee9c +query_kind: Select +is_initial_query: 1 +query: +SELECT formatReadableTimeDelta(uptime()) + + +read: 1 rows / 1.00 B +written: 0 rows / 0.00 B +result: 1 rows / 128.00 B +memory usage: 1.49 KiB +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.one'] +columns: ['system.one.dummy'] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['TabSeparated'] +used_functions: ['formatReadableTimeDelta','uptime'] +used_storages: [] +used_table_functions: [] +thread_ids: [66,225,283,281,282] +ProfileEvents: {'Query':1,'SelectQuery':1,'IOBufferAllocs':3,'IOBufferAllocBytes':3145728,'SelectedRows':1,'SelectedBytes':1,'ContextLock':17,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':4372,'UserTimeMicroseconds':1022,'SystemTimeMicroseconds':177,'OSCPUWaitMicroseconds':2070,'OSCPUVirtualTimeMicroseconds':1198,'OSWriteBytes':4096,'OSReadChars':848,'OSWriteChars':1190} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 6: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:25:01 +query_duration_ms: 3 +query_id: b763c2f9-6234-47f7-8b30-43d619909289 +query_kind: Select +is_initial_query: 1 +query: +SELECT formatReadableTimeDelta(uptime()) + + +read: 1 rows / 1.00 B +written: 0 rows / 0.00 B +result: 1 rows / 128.00 B +memory usage: 1.49 KiB +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.one'] +columns: ['system.one.dummy'] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['TabSeparated'] +used_functions: ['uptime','formatReadableTimeDelta'] +used_storages: [] +used_table_functions: [] +thread_ids: [66,225,281,283,282] +ProfileEvents: {'Query':1,'SelectQuery':1,'IOBufferAllocs':3,'IOBufferAllocBytes':3145728,'SelectedRows':1,'SelectedBytes':1,'ContextLock':17,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':6367,'UserTimeMicroseconds':3329,'SystemTimeMicroseconds':531,'SoftPageFaults':6,'HardPageFaults':1,'OSCPUWaitMicroseconds':1090,'OSCPUVirtualTimeMicroseconds':3859,'OSReadBytes':102400,'OSReadChars':830,'OSWriteChars':1190} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 7: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:26:25 +query_duration_ms: 1 +query_id: e9c25bd1-00d3-4239-9611-1c3d391178da +query_kind: Select +is_initial_query: 1 +query: SELECT version() + +read: 1 rows / 1.00 B +written: 0 rows / 0.00 B +result: 1 rows / 128.00 B +memory usage: 1.45 KiB +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.one'] +columns: ['system.one.dummy'] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['TabSeparated'] +used_functions: ['version'] +used_storages: [] +used_table_functions: [] +thread_ids: [66,283,225,282] +ProfileEvents: {'Query':1,'SelectQuery':1,'IOBufferAllocs':3,'IOBufferAllocBytes':3145728,'SelectedRows':1,'SelectedBytes':1,'ContextLock':15,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':2720,'UserTimeMicroseconds':648,'SystemTimeMicroseconds':1144,'OSCPUWaitMicroseconds':110,'OSCPUVirtualTimeMicroseconds':1790,'OSReadChars':845,'OSWriteChars':1140} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 8: +────── +type: QueryFinish +query_start_time: 2021-12-25 15:33:29 +query_duration_ms: 4 +query_id: 69762642-8a75-4149-aaf5-bc1969558747 +query_kind: Select +is_initial_query: 1 +query: SELECT version() + +read: 1 rows / 1.00 B +written: 0 rows / 0.00 B +result: 1 rows / 128.00 B +memory usage: 1.45 KiB +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.one'] +columns: ['system.one.dummy'] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['TabSeparated'] +used_functions: ['version'] +used_storages: [] +used_table_functions: [] +thread_ids: [66,282,283] +ProfileEvents: {'Query':1,'SelectQuery':1,'IOBufferAllocs':3,'IOBufferAllocBytes':3145728,'SelectedRows':1,'SelectedBytes':1,'ContextLock':15,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':10137,'UserTimeMicroseconds':6289,'SystemTimeMicroseconds':47,'SoftPageFaults':2,'HardPageFaults':1,'OSCPUWaitMicroseconds':859,'OSCPUVirtualTimeMicroseconds':6336,'OSReadBytes':12288,'OSReadChars':845,'OSWriteChars':1140} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 9: +─────── +type: QueryFinish +query_start_time: 2021-12-25 15:25:01 +query_duration_ms: 4 +query_id: 9e31242c-62c5-4bb1-9a3e-f96e99f3bddf +query_kind: Select +is_initial_query: 1 +query: SELECT version() + +read: 1 rows / 1.00 B +written: 0 rows / 0.00 B +result: 1 rows / 128.00 B +memory usage: 1.45 KiB +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.one'] +columns: ['system.one.dummy'] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['TabSeparated'] +used_functions: ['version'] +used_storages: [] +used_table_functions: [] +thread_ids: [66,225,282,281,283] +ProfileEvents: {'Query':1,'SelectQuery':1,'IOBufferAllocs':3,'IOBufferAllocBytes':3145728,'SelectedRows':1,'SelectedBytes':1,'ContextLock':15,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':8688,'UserTimeMicroseconds':3598,'SystemTimeMicroseconds':1288,'SoftPageFaults':42,'HardPageFaults':1,'OSCPUWaitMicroseconds':214,'OSCPUVirtualTimeMicroseconds':4885,'OSReadBytes':98304,'OSReadChars':818,'OSWriteChars':1140} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} + +Row 10: +─────── +type: QueryFinish +query_start_time: 2021-12-25 15:26:26 +query_duration_ms: 2 +query_id: de1fc64c-09c3-420a-8801-a2f9f04407cd +query_kind: Select +is_initial_query: 1 +query: SELECT + database, + table, + count() "partitions", + sum(part_count) "parts", + max(part_count) "max_parts_per_partition" +FROM +( + SELECT + database, + table, + partition, + count() "part_count" + FROM system.parts + WHERE active + GROUP BY database, table, partition +) partitions +GROUP BY database, table +ORDER BY max_parts_per_partition DESC +LIMIT 10 FORMAT PrettyCompactNoEscapes + +read: 12 rows / 643.00 B +written: 0 rows / 0.00 B +result: 6 rows / 752.00 B +memory usage: 0.00 B +exception: +stack_trace: + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: ['system'] +tables: ['system.parts'] +columns: ['system.parts.active','system.parts.database','system.parts.partition','system.parts.table'] +used_aggregate_functions: ['count','max','sum'] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: ['PrettyCompactNoEscapes'] +used_functions: [] +used_storages: [] +used_table_functions: [] +thread_ids: [66] +ProfileEvents: {'Query':1,'SelectQuery':1,'ArenaAllocChunks':2,'ArenaAllocBytes':8192,'SelectedRows':12,'SelectedBytes':643,'ContextLock':58,'RWLockAcquiredReadLocks':9,'RWLockReadersWaitMilliseconds':1,'RealTimeMicroseconds':2924,'UserTimeMicroseconds':1583,'SystemTimeMicroseconds':892,'SoftPageFaults':6,'OSCPUVirtualTimeMicroseconds':3423,'OSReadChars':438,'OSWriteChars':5086} +Settings: {'load_balancing':'random','max_memory_usage':'10000000000'} +``` +##### Last 10 failed queries +**query** +```sql +SELECT + type, + query_start_time, + query_duration_ms, + query_id, + query_kind, + is_initial_query, + query, + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + concat(toString(result_rows), ' rows / ', formatReadableSize(result_bytes)) AS result, + formatReadableSize(memory_usage) AS "memory usage", + exception, + '\n' || stack_trace AS stack_trace, + user, + initial_user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + client_hostname, + databases, + tables, + columns, + used_aggregate_functions, + used_aggregate_function_combinators, + used_database_engines, + used_data_type_families, + used_dictionaries, + used_formats, + used_functions, + used_storages, + used_table_functions, + thread_ids, + ProfileEvents, + Settings + FROM system.query_log +WHERE type != 'QueryStart' + AND event_date >= today() - 1 + AND event_time >= now() - INTERVAL 1 DAY + AND exception != '' +ORDER BY query_start_time DESC +LIMIT 10 +``` +**result** +``` +Row 1: +────── +type: ExceptionBeforeStart +query_start_time: 2021-12-25 15:33:29 +query_duration_ms: 0 +query_id: 323743ef-4dff-4ed3-9559-f405c64fbd4a +query_kind: Select +is_initial_query: 1 +query: SELECT + '\n' || arrayStringConcat( + arrayMap( + x, + y -> concat(x, ': ', y), + arrayMap(x -> addressToLine(x), trace), + arrayMap(x -> demangle(addressToSymbol(x)), trace)), + '\n') AS trace +FROM system.stack_trace FORMAT Vertical + +read: 0 rows / 0.00 B +written: 0 rows / 0.00 B +result: 0 rows / 0.00 B +memory usage: 0.00 B +exception: Code: 446. DB::Exception: default: Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0: While processing concat('\n', arrayStringConcat(arrayMap((x, y) -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n')) AS trace. (FUNCTION_NOT_ALLOWED) (version 21.11.8.4 (official build)) +stack_trace: +0. DB::Exception::Exception(std::__1::basic_string, std::__1::allocator > const&, int, bool) @ 0x9b682d4 in /usr/bin/clickhouse +1. bool DB::ContextAccess::checkAccessImplHelper(DB::AccessFlags const&) const::'lambda'(std::__1::basic_string, std::__1::allocator > const&, int)::operator()(std::__1::basic_string, std::__1::allocator > const&, int) const @ 0x119786bc in /usr/bin/clickhouse +2. bool DB::ContextAccess::checkAccessImplHelper(DB::AccessFlags const&) const @ 0x11977416 in /usr/bin/clickhouse +3. DB::Context::checkAccess(DB::AccessFlags const&) const @ 0x11eb2f08 in /usr/bin/clickhouse +4. ? @ 0xf96aefb in /usr/bin/clickhouse +5. DB::FunctionFactory::tryGetImpl(std::__1::basic_string, std::__1::allocator > const&, std::__1::shared_ptr) const @ 0x118f74b4 in /usr/bin/clickhouse +6. DB::FunctionFactory::getImpl(std::__1::basic_string, std::__1::allocator > const&, std::__1::shared_ptr) const @ 0x118f71fc in /usr/bin/clickhouse +7. DB::ActionsMatcher::visit(DB::ASTFunction const&, std::__1::shared_ptr const&, DB::ActionsMatcher::Data&) @ 0x120c3abf in /usr/bin/clickhouse +8. DB::ActionsMatcher::visit(DB::ASTFunction const&, std::__1::shared_ptr const&, DB::ActionsMatcher::Data&) @ 0x120c6b9f in /usr/bin/clickhouse +9. DB::ActionsMatcher::visit(DB::ASTFunction const&, std::__1::shared_ptr const&, DB::ActionsMatcher::Data&) @ 0x120c41ed in /usr/bin/clickhouse +10. DB::ActionsMatcher::visit(DB::ASTFunction const&, std::__1::shared_ptr const&, DB::ActionsMatcher::Data&) @ 0x120c41ed in /usr/bin/clickhouse +11. DB::ActionsMatcher::visit(DB::ASTFunction const&, std::__1::shared_ptr const&, DB::ActionsMatcher::Data&) @ 0x120c41ed in /usr/bin/clickhouse +12. DB::ActionsMatcher::visit(DB::ASTExpressionList&, std::__1::shared_ptr const&, DB::ActionsMatcher::Data&) @ 0x120ca818 in /usr/bin/clickhouse +13. DB::InDepthNodeVisitor const>::visit(std::__1::shared_ptr const&) @ 0x12099bb7 in /usr/bin/clickhouse +14. DB::ExpressionAnalyzer::getRootActions(std::__1::shared_ptr const&, bool, std::__1::shared_ptr&, bool) @ 0x120999cb in /usr/bin/clickhouse +15. DB::SelectQueryExpressionAnalyzer::appendSelect(DB::ExpressionActionsChain&, bool) @ 0x120a4409 in /usr/bin/clickhouse +16. DB::ExpressionAnalysisResult::ExpressionAnalysisResult(DB::SelectQueryExpressionAnalyzer&, std::__1::shared_ptr const&, bool, bool, bool, std::__1::shared_ptr const&, DB::Block const&) @ 0x120a9070 in /usr/bin/clickhouse +17. DB::InterpreterSelectQuery::getSampleBlockImpl() @ 0x1232fd0d in /usr/bin/clickhouse +18. ? @ 0x12328864 in /usr/bin/clickhouse +19. DB::InterpreterSelectQuery::InterpreterSelectQuery(std::__1::shared_ptr const&, std::__1::shared_ptr, std::__1::optional, std::__1::shared_ptr const&, DB::SelectQueryOptions const&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&, std::__1::shared_ptr const&, std::__1::unordered_map, DB::PreparedSetKey::Hash, std::__1::equal_to, std::__1::allocator > > >) @ 0x123232c7 in /usr/bin/clickhouse +20. DB::InterpreterSelectQuery::InterpreterSelectQuery(std::__1::shared_ptr const&, std::__1::shared_ptr, DB::SelectQueryOptions const&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&) @ 0x12321c54 in /usr/bin/clickhouse +21. DB::InterpreterSelectWithUnionQuery::buildCurrentChildInterpreter(std::__1::shared_ptr const&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&) @ 0x12547fa2 in /usr/bin/clickhouse +22. DB::InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(std::__1::shared_ptr const&, std::__1::shared_ptr, DB::SelectQueryOptions const&, std::__1::vector, std::__1::allocator >, std::__1::allocator, std::__1::allocator > > > const&) @ 0x12546680 in /usr/bin/clickhouse +23. DB::InterpreterFactory::get(std::__1::shared_ptr&, std::__1::shared_ptr, DB::SelectQueryOptions const&) @ 0x122c6216 in /usr/bin/clickhouse +24. ? @ 0x1277dd26 in /usr/bin/clickhouse +25. DB::executeQuery(DB::ReadBuffer&, DB::WriteBuffer&, bool, std::__1::shared_ptr, std::__1::function, std::__1::allocator > const&, std::__1::basic_string, std::__1::allocator > const&, std::__1::basic_string, std::__1::allocator > const&, std::__1::basic_string, std::__1::allocator > const&)>, std::__1::optional const&) @ 0x12781319 in /usr/bin/clickhouse +26. DB::HTTPHandler::processQuery(DB::HTTPServerRequest&, DB::HTMLForm&, DB::HTTPServerResponse&, DB::HTTPHandler::Output&, std::__1::optional&) @ 0x130c20fa in /usr/bin/clickhouse +27. DB::HTTPHandler::handleRequest(DB::HTTPServerRequest&, DB::HTTPServerResponse&) @ 0x130c6760 in /usr/bin/clickhouse +28. DB::HTTPServerConnection::run() @ 0x1312b5e8 in /usr/bin/clickhouse +29. Poco::Net::TCPServerConnection::start() @ 0x15d682cf in /usr/bin/clickhouse +30. Poco::Net::TCPServerDispatcher::run() @ 0x15d6a6c1 in /usr/bin/clickhouse +31. Poco::PooledThread::run() @ 0x15e7f069 in /usr/bin/clickhouse + +user: default +initial_user: default +client: python-requests/2.26.0 +client_hostname: +databases: [] +tables: [] +columns: [] +used_aggregate_functions: [] +used_aggregate_function_combinators: [] +used_database_engines: [] +used_data_type_families: [] +used_dictionaries: [] +used_formats: [] +used_functions: [] +used_storages: [] +used_table_functions: [] +thread_ids: [] +ProfileEvents: {} +Settings: {} + +``` +#### Stack traces +**query** +```sql +SELECT + '\n' || arrayStringConcat( + arrayMap( + x, + y -> concat(x, ': ', y), + arrayMap(x -> addressToLine(x), trace), + arrayMap(x -> demangle(addressToSymbol(x)), trace)), + '\n') AS trace +FROM system.stack_trace +``` +**result** +``` +ClickhouseError("Code: 446. DB::Exception: default: Introspection functions are disabled, because setting 'allow_introspection_functions' is set to 0: While processing concat('\\n', arrayStringConcat(arrayMap((x, y) -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\\n')) AS trace. (FUNCTION_NOT_ALLOWED) (version 21.11.8.4 (official build))",) +``` +#### uname +**command** +``` +uname -a +``` +**result** +``` +Linux clickhouse01 5.10.76-linuxkit #1 SMP Mon Nov 8 10:21:19 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux +``` diff --git a/utils/clickhouse-diagnostics/clickhouse-diagnostics b/utils/clickhouse-diagnostics/clickhouse-diagnostics new file mode 100644 index 00000000000..ffddee0bdc4 --- /dev/null +++ b/utils/clickhouse-diagnostics/clickhouse-diagnostics @@ -0,0 +1,960 @@ +#!/usr/bin/env python3 + +import argparse +import gzip +import io +import json +import socket +import subprocess +import sys +from copy import deepcopy +from datetime import datetime +from typing import MutableMapping + +import jinja2 +import requests +import sqlparse +import tenacity +import xmltodict +import yaml + +SELECT_VERSION = r'SELECT version()' + +SELECT_UPTIME = r''' +{% if version_ge('21.3') -%} +SELECT formatReadableTimeDelta(uptime()) +{% else -%} +SELECT + toString(floor(uptime() / 3600 / 24)) || ' days ' || + toString(floor(uptime() % (24 * 3600) / 3600, 1)) || ' hours' +{% endif -%} +''' + +SELECT_SYSTEM_TABLES = "SELECT name FROM system.tables WHERE database = 'system'" + +SELECT_DATABASE_ENGINES = r'''SELECT + engine, + count() "count" +FROM system.databases +GROUP BY engine +''' + +SELECT_DATABASES = r'''SELECT + name, + engine, + tables, + partitions, + parts, + formatReadableSize(bytes_on_disk) "disk_size" +FROM system.databases db +LEFT JOIN +( + SELECT + database, + uniq(table) "tables", + uniq(table, partition) "partitions", + count() AS parts, + sum(bytes_on_disk) "bytes_on_disk" + FROM system.parts + WHERE active + GROUP BY database +) AS db_stats ON db.name = db_stats.database +ORDER BY bytes_on_disk DESC +LIMIT 10 +''' + +SELECT_TABLE_ENGINES = r'''SELECT + engine, + count() "count" +FROM system.tables +WHERE database != 'system' +GROUP BY engine +''' + +SELECT_DICTIONARIES = r'''SELECT + source, + type, + status, + count() "count" +FROM system.dictionaries +GROUP BY source, type, status +ORDER BY status DESC, source +''' + +SELECT_ACCESS = "SHOW ACCESS" + +SELECT_QUOTA_USAGE = "SHOW QUOTA" + +SELECT_REPLICAS = r'''SELECT + database, + table, + is_leader, + is_readonly, + absolute_delay, + queue_size, + inserts_in_queue, + merges_in_queue +FROM system.replicas +ORDER BY absolute_delay DESC +LIMIT 10 +''' + +SELECT_REPLICATION_QUEUE = r'''SELECT + database, + table, + replica_name, + position, + node_name, + type, + source_replica, + parts_to_merge, + new_part_name, + create_time, + required_quorum, + is_detach, + is_currently_executing, + num_tries, + last_attempt_time, + last_exception, + concat('time: ', toString(last_postpone_time), ', number: ', toString(num_postponed), ', reason: ', postpone_reason) postpone +FROM system.replication_queue +ORDER BY create_time ASC +LIMIT 20 +''' + +SELECT_REPLICATED_FETCHES = r'''SELECT + database, + table, + round(elapsed, 1) "elapsed", + round(100 * progress, 1) "progress", + partition_id, + result_part_name, + result_part_path, + total_size_bytes_compressed, + bytes_read_compressed, + source_replica_path, + source_replica_hostname, + source_replica_port, + interserver_scheme, + to_detached, + thread_id +FROM system.replicated_fetches +''' + +SELECT_PARTS_PER_TABLE = r'''SELECT + database, + table, + count() "partitions", + sum(part_count) "parts", + max(part_count) "max_parts_per_partition" +FROM +( + SELECT + database, + table, + partition, + count() "part_count" + FROM system.parts + WHERE active + GROUP BY database, table, partition +) partitions +GROUP BY database, table +ORDER BY max_parts_per_partition DESC +LIMIT 10 +''' + +SELECT_MERGES = r'''SELECT + database, + table, + round(elapsed, 1) "elapsed", + round(100 * progress, 1) "progress", + is_mutation, + partition_id, +{% if version_ge('20.3') -%} + result_part_path, + source_part_paths, +{% endif -%} + num_parts, + formatReadableSize(total_size_bytes_compressed) "total_size_compressed", + formatReadableSize(bytes_read_uncompressed) "read_uncompressed", + formatReadableSize(bytes_written_uncompressed) "written_uncompressed", + columns_written, +{% if version_ge('20.3') -%} + formatReadableSize(memory_usage) "memory_usage", + thread_id +{% else -%} + formatReadableSize(memory_usage) "memory_usage" +{% endif -%} +FROM system.merges +''' + +SELECT_MUTATIONS = r'''SELECT + database, + table, + mutation_id, + command, + create_time, +{% if version_ge('20.3') -%} + parts_to_do_names, +{% endif -%} + parts_to_do, + is_done, + latest_failed_part, + latest_fail_time, + latest_fail_reason +FROM system.mutations +WHERE NOT is_done +ORDER BY create_time DESC +''' + +SELECT_RECENT_DATA_PARTS = r'''SELECT + database, + table, + engine, + partition_id, + name, +{% if version_ge('20.3') -%} + part_type, +{% endif -%} + active, + level, +{% if version_ge('20.3') -%} + disk_name, +{% endif -%} + path, + marks, + rows, + bytes_on_disk, + data_compressed_bytes, + data_uncompressed_bytes, + marks_bytes, + modification_time, + remove_time, + refcount, + is_frozen, + min_date, + max_date, + min_time, + max_time, + min_block_number, + max_block_number +FROM system.parts +WHERE modification_time > now() - INTERVAL 3 MINUTE +ORDER BY modification_time DESC +''' + +SELECT_DETACHED_DATA_PARTS = r'''SELECT + database, + table, + partition_id, + name, + disk, + reason, + min_block_number, + max_block_number, + level +FROM system.detached_parts +''' + +SELECT_PROCESSES = r'''SELECT + elapsed, + query_id, + {% if normalize_queries -%} + normalizeQuery(query) AS normalized_query, + {% else -%} + query, + {% endif -%} + is_cancelled, + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + formatReadableSize(memory_usage) AS "memory usage", + user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + {% if version_ge('21.3') -%} + thread_ids, + {% endif -%} + {% if version_ge('21.8') -%} + ProfileEvents, + Settings + {% else -%} + ProfileEvents.Names, + ProfileEvents.Values, + Settings.Names, + Settings.Values + {% endif -%} +FROM system.processes +ORDER BY elapsed DESC +''' + +SELECT_TOP_QUERIES_BY_DURATION = r'''SELECT + type, + query_start_time, + query_duration_ms, + query_id, + query_kind, + is_initial_query, + {% if normalize_queries -%} + normalizeQuery(query) AS normalized_query, + {% else -%} + query, + {% endif -%} + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + concat(toString(result_rows), ' rows / ', formatReadableSize(result_bytes)) AS result, + formatReadableSize(memory_usage) AS "memory usage", + exception, + '\n' || stack_trace AS stack_trace, + user, + initial_user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + client_hostname, + {% if version_ge('21.3') -%} + databases, + tables, + columns, + used_aggregate_functions, + used_aggregate_function_combinators, + used_database_engines, + used_data_type_families, + used_dictionaries, + used_formats, + used_functions, + used_storages, + used_table_functions, + thread_ids, + {% endif -%} + {% if version_ge('21.8') -%} + ProfileEvents, + Settings + {% else -%} + ProfileEvents.Names, + ProfileEvents.Values, + Settings.Names, + Settings.Values + {% endif -%} +FROM system.query_log +WHERE type != 'QueryStart' + AND event_date >= today() - 1 + AND event_time >= now() - INTERVAL 1 DAY +ORDER BY query_duration_ms DESC +LIMIT 10 +''' + +SELECT_TOP_QUERIES_BY_MEMORY_USAGE = r'''SELECT + type, + query_start_time, + query_duration_ms, + query_id, + query_kind, + is_initial_query, + {% if normalize_queries -%} + normalizeQuery(query) AS normalized_query, + {% else -%} + query, + {% endif -%} + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + concat(toString(result_rows), ' rows / ', formatReadableSize(result_bytes)) AS result, + formatReadableSize(memory_usage) AS "memory usage", + exception, + '\n' || stack_trace AS stack_trace, + user, + initial_user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + client_hostname, + {% if version_ge('21.3') -%} + databases, + tables, + columns, + used_aggregate_functions, + used_aggregate_function_combinators, + used_database_engines, + used_data_type_families, + used_dictionaries, + used_formats, + used_functions, + used_storages, + used_table_functions, + thread_ids, + {% endif -%} + {% if version_ge('21.8') -%} + ProfileEvents, + Settings + {% else -%} + ProfileEvents.Names, + ProfileEvents.Values, + Settings.Names, + Settings.Values + {% endif -%} +FROM system.query_log +WHERE type != 'QueryStart' + AND event_date >= today() - 1 + AND event_time >= now() - INTERVAL 1 DAY +ORDER BY memory_usage DESC +LIMIT 10 +''' + +SELECT_FAILED_QUERIES = r'''SELECT + type, + query_start_time, + query_duration_ms, + query_id, + query_kind, + is_initial_query, + {% if normalize_queries -%} + normalizeQuery(query) AS normalized_query, + {% else -%} + query, + {% endif -%} + concat(toString(read_rows), ' rows / ', formatReadableSize(read_bytes)) AS read, + concat(toString(written_rows), ' rows / ', formatReadableSize(written_bytes)) AS written, + concat(toString(result_rows), ' rows / ', formatReadableSize(result_bytes)) AS result, + formatReadableSize(memory_usage) AS "memory usage", + exception, + '\n' || stack_trace AS stack_trace, + user, + initial_user, + multiIf(empty(client_name), http_user_agent, concat(client_name, ' ', toString(client_version_major), '.', toString(client_version_minor), '.', toString(client_version_patch))) AS client, + client_hostname, + {% if version_ge('21.3') -%} + databases, + tables, + columns, + used_aggregate_functions, + used_aggregate_function_combinators, + used_database_engines, + used_data_type_families, + used_dictionaries, + used_formats, + used_functions, + used_storages, + used_table_functions, + thread_ids, + {% endif -%} + {% if version_ge('21.8') -%} + ProfileEvents, + Settings + {% else -%} + ProfileEvents.Names, + ProfileEvents.Values, + Settings.Names, + Settings.Values + {% endif -%} +FROM system.query_log +WHERE type != 'QueryStart' + AND event_date >= today() - 1 + AND event_time >= now() - INTERVAL 1 DAY + AND exception != '' +ORDER BY query_start_time DESC +LIMIT 10 +''' + +SELECT_STACK_TRACES = r'''SELECT + '\n' || arrayStringConcat( + arrayMap( + x, + y -> concat(x, ': ', y), + arrayMap(x -> addressToLine(x), trace), + arrayMap(x -> demangle(addressToSymbol(x)), trace)), + '\n') AS trace +FROM system.stack_trace +''' + +SELECT_CRASH_LOG = r'''SELECT + event_time, + signal, + thread_id, + query_id, + '\n' || arrayStringConcat(trace_full, '\n') AS trace, + version +FROM system.crash_log +ORDER BY event_time DESC +''' + + +def retry(exception_types, max_attempts=5, max_interval=5): + """ + Function decorator that retries wrapped function on failures. + """ + return tenacity.retry( + retry=tenacity.retry_if_exception_type(exception_types), + wait=tenacity.wait_random_exponential(multiplier=0.5, max=max_interval), + stop=tenacity.stop_after_attempt(max_attempts), + reraise=True) + + +class ClickhouseError(Exception): + """ + ClickHouse interaction error. + """ + + def __init__(self, response): + self.response = response + super().__init__(self.response.text.strip()) + + +class ClickhouseClient: + """ + ClickHouse client. + """ + + def __init__(self, *, host, port=8123, user=None): + self._session = requests.Session() + if user: + self._session.headers['X-ClickHouse-User'] = user + self._url = f'http://{host}:{port}' + self._timeout = 60 + self._ch_version = None + + @property + def clickhouse_version(self): + if self._ch_version is None: + self._ch_version = self.query(SELECT_VERSION) + + return self._ch_version + + @retry(requests.exceptions.ConnectionError) + def query(self, query, query_args=None, format=None, post_data=None, timeout=None, echo=False, dry_run=False): + """ + Execute query. + """ + if query_args: + query = self.render_query(query, **query_args) + + if format: + query += f' FORMAT {format}' + + if timeout is None: + timeout = self._timeout + + if echo: + print(sqlparse.format(query, reindent=True), '\n') + + if dry_run: + return None + + try: + response = self._session.post(self._url, + params={ + 'query': query, + }, + json=post_data, + timeout=timeout) + + response.raise_for_status() + + if format in ('JSON', 'JSONCompact'): + return response.json() + + return response.text.strip() + except requests.exceptions.HTTPError as e: + raise ClickhouseError(e.response) from None + + def render_query(self, query, **kwargs): + env = jinja2.Environment() + + env.globals['version_ge'] = lambda version: version_ge(self.clickhouse_version, version) + + template = env.from_string(query) + return template.render(kwargs) + + +class ClickhouseConfig: + """ + ClickHouse server configuration. + """ + + def __init__(self, config): + self._config = config + + def dump(self, mask_secrets=True): + config = deepcopy(self._config) + if mask_secrets: + self._mask_secrets(config) + + return xmltodict.unparse(config, pretty=True) + + @classmethod + def load(cls): + return ClickhouseConfig(cls._load_config('/var/lib/clickhouse/preprocessed_configs/config.xml')) + + @staticmethod + def _load_config(config_path): + with open(config_path, 'r') as file: + return xmltodict.parse(file.read()) + + @classmethod + def _mask_secrets(cls, config): + if isinstance(config, MutableMapping): + for key, value in list(config.items()): + if isinstance(value, MutableMapping): + cls._mask_secrets(config[key]) + elif key in ('password', 'secret_access_key', 'header', 'identity'): + config[key] = '*****' + + +class DiagnosticsData: + """ + Diagnostics data. + """ + + def __init__(self, args, host): + self.args = args + self.host = host + self._sections = [{'section': None, 'data': {}}] + + def add_string(self, name, value, section=None): + self._section(section)[name] = { + 'type': 'string', + 'value': value, + } + + def add_xml_document(self, name, document, section=None): + self._section(section)[name] = { + 'type': 'xml', + 'value': document, + } + + def add_query(self, name, query, result, section=None): + self._section(section)[name] = { + 'type': 'query', + 'query': query, + 'result': result, + } + + def add_command(self, name, command, result, section=None): + self._section(section)[name] = { + 'type': 'command', + 'command': command, + 'result': result, + } + + def dump(self, format): + if format.startswith('json'): + result = self._dump_json() + elif format.startswith('yaml'): + result = self._dump_yaml() + else: + result = self._dump_wiki() + + if format.endswith('.gz'): + compressor = gzip.GzipFile(mode='wb', fileobj=sys.stdout.buffer) + compressor.write(result.encode()) + else: + print(result) + + def _section(self, name=None): + if self._sections[-1]['section'] != name: + self._sections.append({'section': name, 'data': {}}) + + return self._sections[-1]['data'] + + def _dump_json(self): + """ + Dump diagnostic data in JSON format. + """ + return json.dumps(self._sections, indent=2, ensure_ascii=False) + + def _dump_yaml(self): + """ + Dump diagnostic data in YAML format. + """ + return yaml.dump(self._sections, default_flow_style=False, allow_unicode=True) + + def _dump_wiki(self): + """ + Dump diagnostic data in Yandex wiki format. + """ + + def _write_title(buffer, value): + buffer.write(f'### {value}\n') + + def _write_subtitle(buffer, value): + buffer.write(f'#### {value}\n') + + def _write_string_item(buffer, name, item): + value = item['value'] + if value != '': + value = f'**{value}**' + buffer.write(f'{name}: {value}\n') + + def _write_xml_item(buffer, section_name, name, item): + if section_name: + buffer.write(f'##### {name}\n') + else: + _write_subtitle(buffer, name) + + _write_result(buffer, item['value'], format='XML') + + def _write_query_item(buffer, section_name, name, item): + if section_name: + buffer.write(f'##### {name}\n') + else: + _write_subtitle(buffer, name) + + _write_query(buffer, item['query']) + _write_result(buffer, item['result']) + + def _write_command_item(buffer, section_name, name, item): + if section_name: + buffer.write(f'##### {name}\n') + else: + _write_subtitle(buffer, name) + + _write_command(buffer, item['command']) + _write_result(buffer, item['result']) + + def _write_unknown_item(buffer, section_name, name, item): + if section_name: + buffer.write(f'**{name}**\n') + else: + _write_subtitle(buffer, name) + + json.dump(item, buffer, indent=2) + + def _write_query(buffer, query): + buffer.write('**query**\n') + buffer.write('```sql\n') + buffer.write(query) + buffer.write('\n```\n') + + def _write_command(buffer, command): + buffer.write('**command**\n') + buffer.write('```\n') + buffer.write(command) + buffer.write('\n```\n') + + def _write_result(buffer, result, format=None): + buffer.write('**result**\n') + buffer.write(f'```{format}\n' if format else '```\n') + buffer.write(result) + buffer.write('\n```\n') + + buffer = io.StringIO() + + _write_title(buffer, f'Diagnostics data for host {self.host}') + for section in self._sections: + section_name = section['section'] + if section_name: + _write_subtitle(buffer, section_name) + + for name, item in section['data'].items(): + if item['type'] == 'string': + _write_string_item(buffer, name, item) + elif item['type'] == 'query': + _write_query_item(buffer, section_name, name, item) + elif item['type'] == 'command': + _write_command_item(buffer, section_name, name, item) + elif item['type'] == 'xml': + _write_xml_item(buffer, section_name, name, item) + else: + _write_unknown_item(buffer, section_name, name, item) + + return buffer.getvalue() + + +def main(): + """ + Program entry point. + """ + args = parse_args() + + host = socket.getfqdn() + timestamp = datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S') + client = ClickhouseClient(host=host) + ch_config = ClickhouseConfig.load() + version = client.clickhouse_version + system_tables = [row[0] for row in execute_query(client, SELECT_SYSTEM_TABLES, format='JSONCompact')['data']] + + diagnostics = DiagnosticsData(args, host) + diagnostics.add_string('Version', version) + diagnostics.add_string('Timestamp', timestamp) + diagnostics.add_string('Uptime', execute_query(client, SELECT_UPTIME)) + + diagnostics.add_xml_document('ClickHouse configuration', ch_config.dump()) + + if version_ge(version, '20.8'): + add_query(diagnostics, 'Access configuration', + client=client, + query=SELECT_ACCESS, + format='TSVRaw') + add_query(diagnostics, 'Quotas', + client=client, + query=SELECT_QUOTA_USAGE, + format='Vertical') + + add_query(diagnostics, 'Database engines', + client=client, + query=SELECT_DATABASE_ENGINES, + format='PrettyCompactNoEscapes', + section='Schema') + add_query(diagnostics, 'Databases (top 10 by size)', + client=client, + query=SELECT_DATABASES, + format='PrettyCompactNoEscapes', + section='Schema') + add_query(diagnostics, 'Table engines', + client=client, + query=SELECT_TABLE_ENGINES, + format='PrettyCompactNoEscapes', + section='Schema') + add_query(diagnostics, 'Dictionaries', + client=client, + query=SELECT_DICTIONARIES, + format='PrettyCompactNoEscapes', + section='Schema') + + add_query(diagnostics, 'Replicated tables (top 10 by absolute delay)', + client=client, + query=SELECT_REPLICAS, + format='PrettyCompactNoEscapes', + section='Replication') + add_query(diagnostics, 'Replication queue (top 20 oldest tasks)', + client=client, + query=SELECT_REPLICATION_QUEUE, + format='Vertical', + section='Replication') + if version_ge(version, '21.3'): + add_query(diagnostics, 'Replicated fetches', + client=client, + query=SELECT_REPLICATED_FETCHES, + format='Vertical', + section='Replication') + + add_query(diagnostics, 'Top 10 tables by max parts per partition', + client=client, + query=SELECT_PARTS_PER_TABLE, + format='PrettyCompactNoEscapes') + add_query(diagnostics, 'Merges in progress', + client=client, + query=SELECT_MERGES, + format='Vertical') + add_query(diagnostics, 'Mutations in progress', + client=client, + query=SELECT_MUTATIONS, + format='Vertical') + add_query(diagnostics, 'Recent data parts (modification time within last 3 minutes)', + client=client, + query=SELECT_RECENT_DATA_PARTS, + format='Vertical') + + add_query(diagnostics, 'system.detached_parts', + client=client, + query=SELECT_DETACHED_DATA_PARTS, + format='PrettyCompactNoEscapes', + section='Detached data') + add_command(diagnostics, 'Disk space usage', + command='du -sh -L -c /var/lib/clickhouse/data/*/*/detached/* | sort -rsh', + section='Detached data') + + add_query(diagnostics, 'Queries in progress (process list)', + client=client, + query=SELECT_PROCESSES, + format='Vertical', + section='Queries') + add_query(diagnostics, 'Top 10 queries by duration', + client=client, + query=SELECT_TOP_QUERIES_BY_DURATION, + format='Vertical', + section='Queries') + add_query(diagnostics, 'Top 10 queries by memory usage', + client=client, + query=SELECT_TOP_QUERIES_BY_MEMORY_USAGE, + format='Vertical', + section='Queries') + add_query(diagnostics, 'Last 10 failed queries', + client=client, + query=SELECT_FAILED_QUERIES, + format='Vertical', + section='Queries') + + add_query(diagnostics, 'Stack traces', + client=client, + query=SELECT_STACK_TRACES, + format='Vertical') + + if 'crash_log' in system_tables: + add_query(diagnostics, 'Crash log', + client=client, + query=SELECT_CRASH_LOG, + format='Vertical') + + add_command(diagnostics, 'uname', 'uname -a') + + diagnostics.dump(args.format) + + +def parse_args(): + """ + Parse command-line arguments. + """ + parser = argparse.ArgumentParser() + parser.add_argument('--format', + choices=['json', 'yaml', 'json.gz', 'yaml.gz', 'wiki', 'wiki.gz'], + default='wiki') + parser.add_argument('--normalize-queries', + action='store_true', + default=False) + return parser.parse_args() + + +def add_query(diagnostics, name, client, query, format, section=None): + query_args = { + 'normalize_queries': diagnostics.args.normalize_queries, + } + query = client.render_query(query, **query_args) + diagnostics.add_query( + name=name, + query=query, + result=execute_query(client, query, render_query=False, format=format), + section=section) + + +def execute_query(client, query, render_query=True, format=None): + if render_query: + query = client.render_query(query) + + try: + return client.query(query, format=format) + except Exception as e: + return repr(e) + + +def add_command(diagnostics, name, command, section=None): + diagnostics.add_command( + name=name, + command=command, + result=execute_command(command), + section=section) + + +def execute_command(command, input=None): + proc = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + if isinstance(input, str): + input = input.encode() + + stdout, stderr = proc.communicate(input=input) + + if proc.returncode: + return f'failed with exit code {proc.returncode}\n{stderr.decode()}' + + return stdout.decode() + + +def version_ge(version1, version2): + """ + Return True if version1 is greater or equal than version2. + """ + return parse_version(version1) >= parse_version(version2) + + +def parse_version(version): + """ + Parse version string. + """ + return [int(x) for x in version.strip().split('.')] + + +if __name__ == '__main__': + main() diff --git a/utils/clickhouse-diagnostics/requirements.txt b/utils/clickhouse-diagnostics/requirements.txt new file mode 100644 index 00000000000..1d2b6ef3916 --- /dev/null +++ b/utils/clickhouse-diagnostics/requirements.txt @@ -0,0 +1,6 @@ +Jinja2 +PyYAML +requests +sqlparse +tenacity +xmltodict diff --git a/utils/grammar-fuzzer/ClickHouseUnlexer.py b/utils/grammar-fuzzer/ClickHouseUnlexer.py deleted file mode 100644 index c91522bd7be..00000000000 --- a/utils/grammar-fuzzer/ClickHouseUnlexer.py +++ /dev/null @@ -1,1771 +0,0 @@ -# Generated by Grammarinator 19.3 - -from itertools import chain -from grammarinator.runtime import * - -charset_0 = list(chain(*multirange_diff(printable_unicode_ranges, [(39, 40),(92, 93)]))) -charset_1 = list(chain(range(97, 98), range(65, 66))) -charset_2 = list(chain(range(98, 99), range(66, 67))) -charset_3 = list(chain(range(99, 100), range(67, 68))) -charset_4 = list(chain(range(100, 101), range(68, 69))) -charset_5 = list(chain(range(101, 102), range(69, 70))) -charset_6 = list(chain(range(102, 103), range(70, 71))) -charset_7 = list(chain(range(103, 104), range(71, 72))) -charset_8 = list(chain(range(104, 105), range(72, 73))) -charset_9 = list(chain(range(105, 106), range(73, 74))) -charset_10 = list(chain(range(106, 107), range(74, 75))) -charset_11 = list(chain(range(107, 108), range(75, 76))) -charset_12 = list(chain(range(108, 109), range(76, 77))) -charset_13 = list(chain(range(109, 110), range(77, 78))) -charset_14 = list(chain(range(110, 111), range(78, 79))) -charset_15 = list(chain(range(111, 112), range(79, 80))) -charset_16 = list(chain(range(112, 113), range(80, 81))) -charset_17 = list(chain(range(113, 114), range(81, 82))) -charset_18 = list(chain(range(114, 115), range(82, 83))) -charset_19 = list(chain(range(115, 116), range(83, 84))) -charset_20 = list(chain(range(116, 117), range(84, 85))) -charset_21 = list(chain(range(117, 118), range(85, 86))) -charset_22 = list(chain(range(118, 119), range(86, 87))) -charset_23 = list(chain(range(119, 120), range(87, 88))) -charset_24 = list(chain(range(120, 121), range(88, 89))) -charset_25 = list(chain(range(121, 122), range(89, 90))) -charset_26 = list(chain(range(122, 123), range(90, 91))) -charset_27 = list(chain(range(97, 123), range(65, 91))) -charset_28 = list(chain(range(48, 58))) -charset_29 = list(chain(range(48, 58), range(97, 103), range(65, 71))) -charset_30 = list(chain(*multirange_diff(printable_unicode_ranges, [(92, 93),(92, 93)]))) -charset_31 = list(chain(range(32, 33), range(11, 12), range(12, 13), range(9, 10), range(13, 14), range(10, 11))) - - -class ClickHouseUnlexer(Grammarinator): - - def __init__(self, *, max_depth=float('inf'), weights=None, cooldown=1.0): - super(ClickHouseUnlexer, self).__init__() - self.unlexer = self - self.max_depth = max_depth - self.weights = weights or dict() - self.cooldown = cooldown - - def EOF(self, *args, **kwargs): - pass - - @depthcontrol - def INTERVAL_TYPE(self): - current = self.create_node(UnlexerRule(name='INTERVAL_TYPE')) - choice = self.choice([0 if [2, 2, 2, 2, 2, 2, 2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_0', i), 1) for i, w in enumerate([1, 1, 1, 1, 1, 1, 1, 1])]) - self.unlexer.weights[('alt_0', choice)] = self.unlexer.weights.get(('alt_0', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.SECOND() - elif choice == 1: - current += self.unlexer.MINUTE() - elif choice == 2: - current += self.unlexer.HOUR() - elif choice == 3: - current += self.unlexer.DAY() - elif choice == 4: - current += self.unlexer.WEEK() - elif choice == 5: - current += self.unlexer.MONTH() - elif choice == 6: - current += self.unlexer.QUARTER() - elif choice == 7: - current += self.unlexer.YEAR() - return current - INTERVAL_TYPE.min_depth = 2 - - @depthcontrol - def ALIAS(self): - current = self.create_node(UnlexerRule(name='ALIAS')) - current += self.unlexer.A() - current += self.unlexer.L() - current += self.unlexer.I() - current += self.unlexer.A() - current += self.unlexer.S() - return current - ALIAS.min_depth = 1 - - @depthcontrol - def ALL(self): - current = self.create_node(UnlexerRule(name='ALL')) - current += self.unlexer.A() - current += self.unlexer.L() - current += self.unlexer.L() - return current - ALL.min_depth = 1 - - @depthcontrol - def AND(self): - current = self.create_node(UnlexerRule(name='AND')) - current += self.unlexer.A() - current += self.unlexer.N() - current += self.unlexer.D() - return current - AND.min_depth = 1 - - @depthcontrol - def ANTI(self): - current = self.create_node(UnlexerRule(name='ANTI')) - current += self.unlexer.A() - current += self.unlexer.N() - current += self.unlexer.T() - current += self.unlexer.I() - return current - ANTI.min_depth = 1 - - @depthcontrol - def ANY(self): - current = self.create_node(UnlexerRule(name='ANY')) - current += self.unlexer.A() - current += self.unlexer.N() - current += self.unlexer.Y() - return current - ANY.min_depth = 1 - - @depthcontrol - def ARRAY(self): - current = self.create_node(UnlexerRule(name='ARRAY')) - current += self.unlexer.A() - current += self.unlexer.R() - current += self.unlexer.R() - current += self.unlexer.A() - current += self.unlexer.Y() - return current - ARRAY.min_depth = 1 - - @depthcontrol - def AS(self): - current = self.create_node(UnlexerRule(name='AS')) - current += self.unlexer.A() - current += self.unlexer.S() - return current - AS.min_depth = 1 - - @depthcontrol - def ASCENDING(self): - current = self.create_node(UnlexerRule(name='ASCENDING')) - choice = self.choice([0 if [1, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_9', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_9', choice)] = self.unlexer.weights.get(('alt_9', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.A() - current += self.unlexer.S() - current += self.unlexer.C() - elif choice == 1: - current += self.unlexer.A() - current += self.unlexer.S() - current += self.unlexer.C() - current += self.unlexer.E() - current += self.unlexer.N() - current += self.unlexer.D() - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.G() - return current - ASCENDING.min_depth = 1 - - @depthcontrol - def ASOF(self): - current = self.create_node(UnlexerRule(name='ASOF')) - current += self.unlexer.A() - current += self.unlexer.S() - current += self.unlexer.O() - current += self.unlexer.F() - return current - ASOF.min_depth = 1 - - @depthcontrol - def BETWEEN(self): - current = self.create_node(UnlexerRule(name='BETWEEN')) - current += self.unlexer.B() - current += self.unlexer.E() - current += self.unlexer.T() - current += self.unlexer.W() - current += self.unlexer.E() - current += self.unlexer.E() - current += self.unlexer.N() - return current - BETWEEN.min_depth = 1 - - @depthcontrol - def BOTH(self): - current = self.create_node(UnlexerRule(name='BOTH')) - current += self.unlexer.B() - current += self.unlexer.O() - current += self.unlexer.T() - current += self.unlexer.H() - return current - BOTH.min_depth = 1 - - @depthcontrol - def BY(self): - current = self.create_node(UnlexerRule(name='BY')) - current += self.unlexer.B() - current += self.unlexer.Y() - return current - BY.min_depth = 1 - - @depthcontrol - def CASE(self): - current = self.create_node(UnlexerRule(name='CASE')) - current += self.unlexer.C() - current += self.unlexer.A() - current += self.unlexer.S() - current += self.unlexer.E() - return current - CASE.min_depth = 1 - - @depthcontrol - def CAST(self): - current = self.create_node(UnlexerRule(name='CAST')) - current += self.unlexer.C() - current += self.unlexer.A() - current += self.unlexer.S() - current += self.unlexer.T() - return current - CAST.min_depth = 1 - - @depthcontrol - def CLUSTER(self): - current = self.create_node(UnlexerRule(name='CLUSTER')) - current += self.unlexer.C() - current += self.unlexer.L() - current += self.unlexer.U() - current += self.unlexer.S() - current += self.unlexer.T() - current += self.unlexer.E() - current += self.unlexer.R() - return current - CLUSTER.min_depth = 1 - - @depthcontrol - def COLLATE(self): - current = self.create_node(UnlexerRule(name='COLLATE')) - current += self.unlexer.C() - current += self.unlexer.O() - current += self.unlexer.L() - current += self.unlexer.L() - current += self.unlexer.A() - current += self.unlexer.T() - current += self.unlexer.E() - return current - COLLATE.min_depth = 1 - - @depthcontrol - def CREATE(self): - current = self.create_node(UnlexerRule(name='CREATE')) - current += self.unlexer.C() - current += self.unlexer.R() - current += self.unlexer.E() - current += self.unlexer.A() - current += self.unlexer.T() - current += self.unlexer.E() - return current - CREATE.min_depth = 1 - - @depthcontrol - def CROSS(self): - current = self.create_node(UnlexerRule(name='CROSS')) - current += self.unlexer.C() - current += self.unlexer.R() - current += self.unlexer.O() - current += self.unlexer.S() - current += self.unlexer.S() - return current - CROSS.min_depth = 1 - - @depthcontrol - def DATABASE(self): - current = self.create_node(UnlexerRule(name='DATABASE')) - current += self.unlexer.D() - current += self.unlexer.A() - current += self.unlexer.T() - current += self.unlexer.A() - current += self.unlexer.B() - current += self.unlexer.A() - current += self.unlexer.S() - current += self.unlexer.E() - return current - DATABASE.min_depth = 1 - - @depthcontrol - def DAY(self): - current = self.create_node(UnlexerRule(name='DAY')) - current += self.unlexer.D() - current += self.unlexer.A() - current += self.unlexer.Y() - return current - DAY.min_depth = 1 - - @depthcontrol - def DEFAULT(self): - current = self.create_node(UnlexerRule(name='DEFAULT')) - current += self.unlexer.D() - current += self.unlexer.E() - current += self.unlexer.F() - current += self.unlexer.A() - current += self.unlexer.U() - current += self.unlexer.L() - current += self.unlexer.T() - return current - DEFAULT.min_depth = 1 - - @depthcontrol - def DELETE(self): - current = self.create_node(UnlexerRule(name='DELETE')) - current += self.unlexer.D() - current += self.unlexer.E() - current += self.unlexer.L() - current += self.unlexer.E() - current += self.unlexer.T() - current += self.unlexer.E() - return current - DELETE.min_depth = 1 - - @depthcontrol - def DESCENDING(self): - current = self.create_node(UnlexerRule(name='DESCENDING')) - choice = self.choice([0 if [1, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_12', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_12', choice)] = self.unlexer.weights.get(('alt_12', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.D() - current += self.unlexer.E() - current += self.unlexer.S() - current += self.unlexer.C() - elif choice == 1: - current += self.unlexer.D() - current += self.unlexer.E() - current += self.unlexer.S() - current += self.unlexer.C() - current += self.unlexer.E() - current += self.unlexer.N() - current += self.unlexer.D() - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.G() - return current - DESCENDING.min_depth = 1 - - @depthcontrol - def DISK(self): - current = self.create_node(UnlexerRule(name='DISK')) - current += self.unlexer.D() - current += self.unlexer.I() - current += self.unlexer.S() - current += self.unlexer.K() - return current - DISK.min_depth = 1 - - @depthcontrol - def DISTINCT(self): - current = self.create_node(UnlexerRule(name='DISTINCT')) - current += self.unlexer.D() - current += self.unlexer.I() - current += self.unlexer.S() - current += self.unlexer.T() - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.C() - current += self.unlexer.T() - return current - DISTINCT.min_depth = 1 - - @depthcontrol - def DROP(self): - current = self.create_node(UnlexerRule(name='DROP')) - current += self.unlexer.D() - current += self.unlexer.R() - current += self.unlexer.O() - current += self.unlexer.P() - return current - DROP.min_depth = 1 - - @depthcontrol - def ELSE(self): - current = self.create_node(UnlexerRule(name='ELSE')) - current += self.unlexer.E() - current += self.unlexer.L() - current += self.unlexer.S() - current += self.unlexer.E() - return current - ELSE.min_depth = 1 - - @depthcontrol - def END(self): - current = self.create_node(UnlexerRule(name='END')) - current += self.unlexer.E() - current += self.unlexer.N() - current += self.unlexer.D() - return current - END.min_depth = 1 - - @depthcontrol - def ENGINE(self): - current = self.create_node(UnlexerRule(name='ENGINE')) - current += self.unlexer.E() - current += self.unlexer.N() - current += self.unlexer.G() - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.E() - return current - ENGINE.min_depth = 1 - - @depthcontrol - def EXISTS(self): - current = self.create_node(UnlexerRule(name='EXISTS')) - current += self.unlexer.E() - current += self.unlexer.X() - current += self.unlexer.I() - current += self.unlexer.S() - current += self.unlexer.T() - current += self.unlexer.S() - return current - EXISTS.min_depth = 1 - - @depthcontrol - def EXTRACT(self): - current = self.create_node(UnlexerRule(name='EXTRACT')) - current += self.unlexer.E() - current += self.unlexer.X() - current += self.unlexer.T() - current += self.unlexer.R() - current += self.unlexer.A() - current += self.unlexer.C() - current += self.unlexer.T() - return current - EXTRACT.min_depth = 1 - - @depthcontrol - def FINAL(self): - current = self.create_node(UnlexerRule(name='FINAL')) - current += self.unlexer.F() - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.A() - current += self.unlexer.L() - return current - FINAL.min_depth = 1 - - @depthcontrol - def FIRST(self): - current = self.create_node(UnlexerRule(name='FIRST')) - current += self.unlexer.F() - current += self.unlexer.I() - current += self.unlexer.R() - current += self.unlexer.S() - current += self.unlexer.T() - return current - FIRST.min_depth = 1 - - @depthcontrol - def FORMAT(self): - current = self.create_node(UnlexerRule(name='FORMAT')) - current += self.unlexer.F() - current += self.unlexer.O() - current += self.unlexer.R() - current += self.unlexer.M() - current += self.unlexer.A() - current += self.unlexer.T() - return current - FORMAT.min_depth = 1 - - @depthcontrol - def FROM(self): - current = self.create_node(UnlexerRule(name='FROM')) - current += self.unlexer.F() - current += self.unlexer.R() - current += self.unlexer.O() - current += self.unlexer.M() - return current - FROM.min_depth = 1 - - @depthcontrol - def FULL(self): - current = self.create_node(UnlexerRule(name='FULL')) - current += self.unlexer.F() - current += self.unlexer.U() - current += self.unlexer.L() - current += self.unlexer.L() - return current - FULL.min_depth = 1 - - @depthcontrol - def GLOBAL(self): - current = self.create_node(UnlexerRule(name='GLOBAL')) - current += self.unlexer.G() - current += self.unlexer.L() - current += self.unlexer.O() - current += self.unlexer.B() - current += self.unlexer.A() - current += self.unlexer.L() - return current - GLOBAL.min_depth = 1 - - @depthcontrol - def GROUP(self): - current = self.create_node(UnlexerRule(name='GROUP')) - current += self.unlexer.G() - current += self.unlexer.R() - current += self.unlexer.O() - current += self.unlexer.U() - current += self.unlexer.P() - return current - GROUP.min_depth = 1 - - @depthcontrol - def HAVING(self): - current = self.create_node(UnlexerRule(name='HAVING')) - current += self.unlexer.H() - current += self.unlexer.A() - current += self.unlexer.V() - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.G() - return current - HAVING.min_depth = 1 - - @depthcontrol - def HOUR(self): - current = self.create_node(UnlexerRule(name='HOUR')) - current += self.unlexer.H() - current += self.unlexer.O() - current += self.unlexer.U() - current += self.unlexer.R() - return current - HOUR.min_depth = 1 - - @depthcontrol - def IF(self): - current = self.create_node(UnlexerRule(name='IF')) - current += self.unlexer.I() - current += self.unlexer.F() - return current - IF.min_depth = 1 - - @depthcontrol - def IN(self): - current = self.create_node(UnlexerRule(name='IN')) - current += self.unlexer.I() - current += self.unlexer.N() - return current - IN.min_depth = 1 - - @depthcontrol - def INF(self): - current = self.create_node(UnlexerRule(name='INF')) - choice = self.choice([0 if [1, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_15', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_15', choice)] = self.unlexer.weights.get(('alt_15', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.F() - elif choice == 1: - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.F() - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.I() - current += self.unlexer.T() - current += self.unlexer.Y() - return current - INF.min_depth = 1 - - @depthcontrol - def INNER(self): - current = self.create_node(UnlexerRule(name='INNER')) - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.N() - current += self.unlexer.E() - current += self.unlexer.R() - return current - INNER.min_depth = 1 - - @depthcontrol - def INSERT(self): - current = self.create_node(UnlexerRule(name='INSERT')) - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.S() - current += self.unlexer.E() - current += self.unlexer.R() - current += self.unlexer.T() - return current - INSERT.min_depth = 1 - - @depthcontrol - def INTERVAL(self): - current = self.create_node(UnlexerRule(name='INTERVAL')) - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.T() - current += self.unlexer.E() - current += self.unlexer.R() - current += self.unlexer.V() - current += self.unlexer.A() - current += self.unlexer.L() - return current - INTERVAL.min_depth = 1 - - @depthcontrol - def INTO(self): - current = self.create_node(UnlexerRule(name='INTO')) - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.T() - current += self.unlexer.O() - return current - INTO.min_depth = 1 - - @depthcontrol - def IS(self): - current = self.create_node(UnlexerRule(name='IS')) - current += self.unlexer.I() - current += self.unlexer.S() - return current - IS.min_depth = 1 - - @depthcontrol - def JOIN(self): - current = self.create_node(UnlexerRule(name='JOIN')) - current += self.unlexer.J() - current += self.unlexer.O() - current += self.unlexer.I() - current += self.unlexer.N() - return current - JOIN.min_depth = 1 - - @depthcontrol - def KEY(self): - current = self.create_node(UnlexerRule(name='KEY')) - current += self.unlexer.K() - current += self.unlexer.E() - current += self.unlexer.Y() - return current - KEY.min_depth = 1 - - @depthcontrol - def LAST(self): - current = self.create_node(UnlexerRule(name='LAST')) - current += self.unlexer.L() - current += self.unlexer.A() - current += self.unlexer.S() - current += self.unlexer.T() - return current - LAST.min_depth = 1 - - @depthcontrol - def LEADING(self): - current = self.create_node(UnlexerRule(name='LEADING')) - current += self.unlexer.L() - current += self.unlexer.E() - current += self.unlexer.A() - current += self.unlexer.D() - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.G() - return current - LEADING.min_depth = 1 - - @depthcontrol - def LEFT(self): - current = self.create_node(UnlexerRule(name='LEFT')) - current += self.unlexer.L() - current += self.unlexer.E() - current += self.unlexer.F() - current += self.unlexer.T() - return current - LEFT.min_depth = 1 - - @depthcontrol - def LIKE(self): - current = self.create_node(UnlexerRule(name='LIKE')) - current += self.unlexer.L() - current += self.unlexer.I() - current += self.unlexer.K() - current += self.unlexer.E() - return current - LIKE.min_depth = 1 - - @depthcontrol - def LIMIT(self): - current = self.create_node(UnlexerRule(name='LIMIT')) - current += self.unlexer.L() - current += self.unlexer.I() - current += self.unlexer.M() - current += self.unlexer.I() - current += self.unlexer.T() - return current - LIMIT.min_depth = 1 - - @depthcontrol - def LOCAL(self): - current = self.create_node(UnlexerRule(name='LOCAL')) - current += self.unlexer.L() - current += self.unlexer.O() - current += self.unlexer.C() - current += self.unlexer.A() - current += self.unlexer.L() - return current - LOCAL.min_depth = 1 - - @depthcontrol - def MATERIALIZED(self): - current = self.create_node(UnlexerRule(name='MATERIALIZED')) - current += self.unlexer.M() - current += self.unlexer.A() - current += self.unlexer.T() - current += self.unlexer.E() - current += self.unlexer.R() - current += self.unlexer.I() - current += self.unlexer.A() - current += self.unlexer.L() - current += self.unlexer.I() - current += self.unlexer.Z() - current += self.unlexer.E() - current += self.unlexer.D() - return current - MATERIALIZED.min_depth = 1 - - @depthcontrol - def MINUTE(self): - current = self.create_node(UnlexerRule(name='MINUTE')) - current += self.unlexer.M() - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.U() - current += self.unlexer.T() - current += self.unlexer.E() - return current - MINUTE.min_depth = 1 - - @depthcontrol - def MONTH(self): - current = self.create_node(UnlexerRule(name='MONTH')) - current += self.unlexer.M() - current += self.unlexer.O() - current += self.unlexer.N() - current += self.unlexer.T() - current += self.unlexer.H() - return current - MONTH.min_depth = 1 - - @depthcontrol - def NAN_SQL(self): - current = self.create_node(UnlexerRule(name='NAN_SQL')) - current += self.unlexer.N() - current += self.unlexer.A() - current += self.unlexer.N() - return current - NAN_SQL.min_depth = 1 - - @depthcontrol - def NOT(self): - current = self.create_node(UnlexerRule(name='NOT')) - current += self.unlexer.N() - current += self.unlexer.O() - current += self.unlexer.T() - return current - NOT.min_depth = 1 - - @depthcontrol - def NULL_SQL(self): - current = self.create_node(UnlexerRule(name='NULL_SQL')) - current += self.unlexer.N() - current += self.unlexer.U() - current += self.unlexer.L() - current += self.unlexer.L() - return current - NULL_SQL.min_depth = 1 - - @depthcontrol - def NULLS(self): - current = self.create_node(UnlexerRule(name='NULLS')) - current += self.unlexer.N() - current += self.unlexer.U() - current += self.unlexer.L() - current += self.unlexer.L() - current += self.unlexer.S() - return current - NULLS.min_depth = 1 - - @depthcontrol - def OFFSET(self): - current = self.create_node(UnlexerRule(name='OFFSET')) - current += self.unlexer.O() - current += self.unlexer.F() - current += self.unlexer.F() - current += self.unlexer.S() - current += self.unlexer.E() - current += self.unlexer.T() - return current - OFFSET.min_depth = 1 - - @depthcontrol - def ON(self): - current = self.create_node(UnlexerRule(name='ON')) - current += self.unlexer.O() - current += self.unlexer.N() - return current - ON.min_depth = 1 - - @depthcontrol - def OR(self): - current = self.create_node(UnlexerRule(name='OR')) - current += self.unlexer.O() - current += self.unlexer.R() - return current - OR.min_depth = 1 - - @depthcontrol - def ORDER(self): - current = self.create_node(UnlexerRule(name='ORDER')) - current += self.unlexer.O() - current += self.unlexer.R() - current += self.unlexer.D() - current += self.unlexer.E() - current += self.unlexer.R() - return current - ORDER.min_depth = 1 - - @depthcontrol - def OUTER(self): - current = self.create_node(UnlexerRule(name='OUTER')) - current += self.unlexer.O() - current += self.unlexer.U() - current += self.unlexer.T() - current += self.unlexer.E() - current += self.unlexer.R() - return current - OUTER.min_depth = 1 - - @depthcontrol - def OUTFILE(self): - current = self.create_node(UnlexerRule(name='OUTFILE')) - current += self.unlexer.O() - current += self.unlexer.U() - current += self.unlexer.T() - current += self.unlexer.F() - current += self.unlexer.I() - current += self.unlexer.L() - current += self.unlexer.E() - return current - OUTFILE.min_depth = 1 - - @depthcontrol - def PARTITION(self): - current = self.create_node(UnlexerRule(name='PARTITION')) - current += self.unlexer.P() - current += self.unlexer.A() - current += self.unlexer.R() - current += self.unlexer.T() - current += self.unlexer.I() - current += self.unlexer.T() - current += self.unlexer.I() - current += self.unlexer.O() - current += self.unlexer.N() - return current - PARTITION.min_depth = 1 - - @depthcontrol - def PREWHERE(self): - current = self.create_node(UnlexerRule(name='PREWHERE')) - current += self.unlexer.P() - current += self.unlexer.R() - current += self.unlexer.E() - current += self.unlexer.W() - current += self.unlexer.H() - current += self.unlexer.E() - current += self.unlexer.R() - current += self.unlexer.E() - return current - PREWHERE.min_depth = 1 - - @depthcontrol - def PRIMARY(self): - current = self.create_node(UnlexerRule(name='PRIMARY')) - current += self.unlexer.P() - current += self.unlexer.R() - current += self.unlexer.I() - current += self.unlexer.M() - current += self.unlexer.A() - current += self.unlexer.R() - current += self.unlexer.Y() - return current - PRIMARY.min_depth = 1 - - @depthcontrol - def QUARTER(self): - current = self.create_node(UnlexerRule(name='QUARTER')) - current += self.unlexer.Q() - current += self.unlexer.U() - current += self.unlexer.A() - current += self.unlexer.R() - current += self.unlexer.T() - current += self.unlexer.E() - current += self.unlexer.R() - return current - QUARTER.min_depth = 1 - - @depthcontrol - def RIGHT(self): - current = self.create_node(UnlexerRule(name='RIGHT')) - current += self.unlexer.R() - current += self.unlexer.I() - current += self.unlexer.G() - current += self.unlexer.H() - current += self.unlexer.T() - return current - RIGHT.min_depth = 1 - - @depthcontrol - def SAMPLE(self): - current = self.create_node(UnlexerRule(name='SAMPLE')) - current += self.unlexer.S() - current += self.unlexer.A() - current += self.unlexer.M() - current += self.unlexer.P() - current += self.unlexer.L() - current += self.unlexer.E() - return current - SAMPLE.min_depth = 1 - - @depthcontrol - def SECOND(self): - current = self.create_node(UnlexerRule(name='SECOND')) - current += self.unlexer.S() - current += self.unlexer.E() - current += self.unlexer.C() - current += self.unlexer.O() - current += self.unlexer.N() - current += self.unlexer.D() - return current - SECOND.min_depth = 1 - - @depthcontrol - def SELECT(self): - current = self.create_node(UnlexerRule(name='SELECT')) - current += self.unlexer.S() - current += self.unlexer.E() - current += self.unlexer.L() - current += self.unlexer.E() - current += self.unlexer.C() - current += self.unlexer.T() - return current - SELECT.min_depth = 1 - - @depthcontrol - def SEMI(self): - current = self.create_node(UnlexerRule(name='SEMI')) - current += self.unlexer.S() - current += self.unlexer.E() - current += self.unlexer.M() - current += self.unlexer.I() - return current - SEMI.min_depth = 1 - - @depthcontrol - def SET(self): - current = self.create_node(UnlexerRule(name='SET')) - current += self.unlexer.S() - current += self.unlexer.E() - current += self.unlexer.T() - return current - SET.min_depth = 1 - - @depthcontrol - def SETTINGS(self): - current = self.create_node(UnlexerRule(name='SETTINGS')) - current += self.unlexer.S() - current += self.unlexer.E() - current += self.unlexer.T() - current += self.unlexer.T() - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.G() - current += self.unlexer.S() - return current - SETTINGS.min_depth = 1 - - @depthcontrol - def TABLE(self): - current = self.create_node(UnlexerRule(name='TABLE')) - current += self.unlexer.T() - current += self.unlexer.A() - current += self.unlexer.B() - current += self.unlexer.L() - current += self.unlexer.E() - return current - TABLE.min_depth = 1 - - @depthcontrol - def TEMPORARY(self): - current = self.create_node(UnlexerRule(name='TEMPORARY')) - current += self.unlexer.T() - current += self.unlexer.E() - current += self.unlexer.M() - current += self.unlexer.P() - current += self.unlexer.O() - current += self.unlexer.R() - current += self.unlexer.A() - current += self.unlexer.R() - current += self.unlexer.Y() - return current - TEMPORARY.min_depth = 1 - - @depthcontrol - def THEN(self): - current = self.create_node(UnlexerRule(name='THEN')) - current += self.unlexer.T() - current += self.unlexer.H() - current += self.unlexer.E() - current += self.unlexer.N() - return current - THEN.min_depth = 1 - - @depthcontrol - def TO(self): - current = self.create_node(UnlexerRule(name='TO')) - current += self.unlexer.T() - current += self.unlexer.O() - return current - TO.min_depth = 1 - - @depthcontrol - def TOTALS(self): - current = self.create_node(UnlexerRule(name='TOTALS')) - current += self.unlexer.T() - current += self.unlexer.O() - current += self.unlexer.T() - current += self.unlexer.A() - current += self.unlexer.L() - current += self.unlexer.S() - return current - TOTALS.min_depth = 1 - - @depthcontrol - def TRAILING(self): - current = self.create_node(UnlexerRule(name='TRAILING')) - current += self.unlexer.T() - current += self.unlexer.R() - current += self.unlexer.A() - current += self.unlexer.I() - current += self.unlexer.L() - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.G() - return current - TRAILING.min_depth = 1 - - @depthcontrol - def TRIM(self): - current = self.create_node(UnlexerRule(name='TRIM')) - current += self.unlexer.T() - current += self.unlexer.R() - current += self.unlexer.I() - current += self.unlexer.M() - return current - TRIM.min_depth = 1 - - @depthcontrol - def TTL(self): - current = self.create_node(UnlexerRule(name='TTL')) - current += self.unlexer.T() - current += self.unlexer.T() - current += self.unlexer.L() - return current - TTL.min_depth = 1 - - @depthcontrol - def UNION(self): - current = self.create_node(UnlexerRule(name='UNION')) - current += self.unlexer.U() - current += self.unlexer.N() - current += self.unlexer.I() - current += self.unlexer.O() - current += self.unlexer.N() - return current - UNION.min_depth = 1 - - @depthcontrol - def USING(self): - current = self.create_node(UnlexerRule(name='USING')) - current += self.unlexer.U() - current += self.unlexer.S() - current += self.unlexer.I() - current += self.unlexer.N() - current += self.unlexer.G() - return current - USING.min_depth = 1 - - @depthcontrol - def VALUES(self): - current = self.create_node(UnlexerRule(name='VALUES')) - current += self.unlexer.V() - current += self.unlexer.A() - current += self.unlexer.L() - current += self.unlexer.U() - current += self.unlexer.E() - current += self.unlexer.S() - return current - VALUES.min_depth = 1 - - @depthcontrol - def VOLUME(self): - current = self.create_node(UnlexerRule(name='VOLUME')) - current += self.unlexer.V() - current += self.unlexer.O() - current += self.unlexer.L() - current += self.unlexer.U() - current += self.unlexer.M() - current += self.unlexer.E() - return current - VOLUME.min_depth = 1 - - @depthcontrol - def WEEK(self): - current = self.create_node(UnlexerRule(name='WEEK')) - current += self.unlexer.W() - current += self.unlexer.E() - current += self.unlexer.E() - current += self.unlexer.K() - return current - WEEK.min_depth = 1 - - @depthcontrol - def WHEN(self): - current = self.create_node(UnlexerRule(name='WHEN')) - current += self.unlexer.W() - current += self.unlexer.H() - current += self.unlexer.E() - current += self.unlexer.N() - return current - WHEN.min_depth = 1 - - @depthcontrol - def WHERE(self): - current = self.create_node(UnlexerRule(name='WHERE')) - current += self.unlexer.W() - current += self.unlexer.H() - current += self.unlexer.E() - current += self.unlexer.R() - current += self.unlexer.E() - return current - WHERE.min_depth = 1 - - @depthcontrol - def WITH(self): - current = self.create_node(UnlexerRule(name='WITH')) - current += self.unlexer.W() - current += self.unlexer.I() - current += self.unlexer.T() - current += self.unlexer.H() - return current - WITH.min_depth = 1 - - @depthcontrol - def YEAR(self): - current = self.create_node(UnlexerRule(name='YEAR')) - current += self.unlexer.Y() - current += self.unlexer.E() - current += self.unlexer.A() - current += self.unlexer.R() - return current - YEAR.min_depth = 1 - - @depthcontrol - def IDENTIFIER(self): - current = self.create_node(UnlexerRule(name='IDENTIFIER')) - choice = self.choice([0 if [1, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_18', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_18', choice)] = self.unlexer.weights.get(('alt_18', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.LETTER() - elif choice == 1: - current += self.unlexer.UNDERSCORE() - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_more(): - choice = self.choice([0 if [1, 1, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_22', i), 1) for i, w in enumerate([1, 1, 1])]) - self.unlexer.weights[('alt_22', choice)] = self.unlexer.weights.get(('alt_22', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.LETTER() - elif choice == 1: - current += self.unlexer.UNDERSCORE() - elif choice == 2: - current += self.unlexer.DEC_DIGIT() - - return current - IDENTIFIER.min_depth = 1 - - @depthcontrol - def FLOATING_LITERAL(self): - current = self.create_node(UnlexerRule(name='FLOATING_LITERAL')) - choice = self.choice([0 if [2, 2, 2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_26', i), 1) for i, w in enumerate([1, 1, 1, 1])]) - self.unlexer.weights[('alt_26', choice)] = self.unlexer.weights.get(('alt_26', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.HEXADECIMAL_LITERAL() - current += self.unlexer.DOT() - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_more(): - current += self.unlexer.HEX_DIGIT() - - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_one(): - choice = self.choice([0 if [1, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_33', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_33', choice)] = self.unlexer.weights.get(('alt_33', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.P() - elif choice == 1: - current += self.unlexer.E() - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_one(): - choice = self.choice([0 if [1, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_37', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_37', choice)] = self.unlexer.weights.get(('alt_37', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.PLUS() - elif choice == 1: - current += self.unlexer.DASH() - - if self.unlexer.max_depth >= 0: - for _ in self.one_or_more(): - current += self.unlexer.DEC_DIGIT() - - - elif choice == 1: - current += self.unlexer.HEXADECIMAL_LITERAL() - choice = self.choice([0 if [1, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_40', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_40', choice)] = self.unlexer.weights.get(('alt_40', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.P() - elif choice == 1: - current += self.unlexer.E() - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_one(): - choice = self.choice([0 if [1, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_44', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_44', choice)] = self.unlexer.weights.get(('alt_44', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.PLUS() - elif choice == 1: - current += self.unlexer.DASH() - - if self.unlexer.max_depth >= 0: - for _ in self.one_or_more(): - current += self.unlexer.DEC_DIGIT() - - elif choice == 2: - current += self.unlexer.INTEGER_LITERAL() - current += self.unlexer.DOT() - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_more(): - current += self.unlexer.DEC_DIGIT() - - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_one(): - current += self.unlexer.E() - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_one(): - choice = self.choice([0 if [1, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_50', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_50', choice)] = self.unlexer.weights.get(('alt_50', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.PLUS() - elif choice == 1: - current += self.unlexer.DASH() - - if self.unlexer.max_depth >= 0: - for _ in self.one_or_more(): - current += self.unlexer.DEC_DIGIT() - - - elif choice == 3: - current += self.unlexer.INTEGER_LITERAL() - current += self.unlexer.E() - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_one(): - choice = self.choice([0 if [1, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_54', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_54', choice)] = self.unlexer.weights.get(('alt_54', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.PLUS() - elif choice == 1: - current += self.unlexer.DASH() - - if self.unlexer.max_depth >= 0: - for _ in self.one_or_more(): - current += self.unlexer.DEC_DIGIT() - - return current - FLOATING_LITERAL.min_depth = 2 - - @depthcontrol - def HEXADECIMAL_LITERAL(self): - current = self.create_node(UnlexerRule(name='HEXADECIMAL_LITERAL')) - current += self.create_node(UnlexerRule(src='0')) - current += self.unlexer.X() - if self.unlexer.max_depth >= 0: - for _ in self.one_or_more(): - current += self.unlexer.HEX_DIGIT() - - return current - HEXADECIMAL_LITERAL.min_depth = 1 - - @depthcontrol - def INTEGER_LITERAL(self): - current = self.create_node(UnlexerRule(name='INTEGER_LITERAL')) - if self.unlexer.max_depth >= 0: - for _ in self.one_or_more(): - current += self.unlexer.DEC_DIGIT() - - return current - INTEGER_LITERAL.min_depth = 1 - - @depthcontrol - def STRING_LITERAL(self): - current = self.create_node(UnlexerRule(name='STRING_LITERAL')) - current += self.unlexer.QUOTE_SINGLE() - if self.unlexer.max_depth >= 0: - for _ in self.zero_or_more(): - choice = self.choice([0 if [0, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_59', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_59', choice)] = self.unlexer.weights.get(('alt_59', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += UnlexerRule(src=self.char_from_list(charset_0)) - elif choice == 1: - current += self.unlexer.BACKSLASH() - current += UnlexerRule(src=self.any_char()) - - current += self.unlexer.QUOTE_SINGLE() - return current - STRING_LITERAL.min_depth = 1 - - @depthcontrol - def A(self): - current = self.create_node(UnlexerRule(name='A')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_1))) - return current - A.min_depth = 0 - - @depthcontrol - def B(self): - current = self.create_node(UnlexerRule(name='B')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_2))) - return current - B.min_depth = 0 - - @depthcontrol - def C(self): - current = self.create_node(UnlexerRule(name='C')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_3))) - return current - C.min_depth = 0 - - @depthcontrol - def D(self): - current = self.create_node(UnlexerRule(name='D')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_4))) - return current - D.min_depth = 0 - - @depthcontrol - def E(self): - current = self.create_node(UnlexerRule(name='E')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_5))) - return current - E.min_depth = 0 - - @depthcontrol - def F(self): - current = self.create_node(UnlexerRule(name='F')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_6))) - return current - F.min_depth = 0 - - @depthcontrol - def G(self): - current = self.create_node(UnlexerRule(name='G')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_7))) - return current - G.min_depth = 0 - - @depthcontrol - def H(self): - current = self.create_node(UnlexerRule(name='H')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_8))) - return current - H.min_depth = 0 - - @depthcontrol - def I(self): - current = self.create_node(UnlexerRule(name='I')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_9))) - return current - I.min_depth = 0 - - @depthcontrol - def J(self): - current = self.create_node(UnlexerRule(name='J')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_10))) - return current - J.min_depth = 0 - - @depthcontrol - def K(self): - current = self.create_node(UnlexerRule(name='K')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_11))) - return current - K.min_depth = 0 - - @depthcontrol - def L(self): - current = self.create_node(UnlexerRule(name='L')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_12))) - return current - L.min_depth = 0 - - @depthcontrol - def M(self): - current = self.create_node(UnlexerRule(name='M')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_13))) - return current - M.min_depth = 0 - - @depthcontrol - def N(self): - current = self.create_node(UnlexerRule(name='N')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_14))) - return current - N.min_depth = 0 - - @depthcontrol - def O(self): - current = self.create_node(UnlexerRule(name='O')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_15))) - return current - O.min_depth = 0 - - @depthcontrol - def P(self): - current = self.create_node(UnlexerRule(name='P')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_16))) - return current - P.min_depth = 0 - - @depthcontrol - def Q(self): - current = self.create_node(UnlexerRule(name='Q')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_17))) - return current - Q.min_depth = 0 - - @depthcontrol - def R(self): - current = self.create_node(UnlexerRule(name='R')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_18))) - return current - R.min_depth = 0 - - @depthcontrol - def S(self): - current = self.create_node(UnlexerRule(name='S')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_19))) - return current - S.min_depth = 0 - - @depthcontrol - def T(self): - current = self.create_node(UnlexerRule(name='T')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_20))) - return current - T.min_depth = 0 - - @depthcontrol - def U(self): - current = self.create_node(UnlexerRule(name='U')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_21))) - return current - U.min_depth = 0 - - @depthcontrol - def V(self): - current = self.create_node(UnlexerRule(name='V')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_22))) - return current - V.min_depth = 0 - - @depthcontrol - def W(self): - current = self.create_node(UnlexerRule(name='W')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_23))) - return current - W.min_depth = 0 - - @depthcontrol - def X(self): - current = self.create_node(UnlexerRule(name='X')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_24))) - return current - X.min_depth = 0 - - @depthcontrol - def Y(self): - current = self.create_node(UnlexerRule(name='Y')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_25))) - return current - Y.min_depth = 0 - - @depthcontrol - def Z(self): - current = self.create_node(UnlexerRule(name='Z')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_26))) - return current - Z.min_depth = 0 - - @depthcontrol - def LETTER(self): - current = self.create_node(UnlexerRule(name='LETTER')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_27))) - return current - LETTER.min_depth = 0 - - @depthcontrol - def DEC_DIGIT(self): - current = self.create_node(UnlexerRule(name='DEC_DIGIT')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_28))) - return current - DEC_DIGIT.min_depth = 0 - - @depthcontrol - def HEX_DIGIT(self): - current = self.create_node(UnlexerRule(name='HEX_DIGIT')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_29))) - return current - HEX_DIGIT.min_depth = 0 - - @depthcontrol - def ARROW(self): - current = self.create_node(UnlexerRule(name='ARROW')) - current += self.create_node(UnlexerRule(src='->')) - return current - ARROW.min_depth = 0 - - @depthcontrol - def ASTERISK(self): - current = self.create_node(UnlexerRule(name='ASTERISK')) - current += self.create_node(UnlexerRule(src='*')) - return current - ASTERISK.min_depth = 0 - - @depthcontrol - def BACKQUOTE(self): - current = self.create_node(UnlexerRule(name='BACKQUOTE')) - current += self.create_node(UnlexerRule(src='`')) - return current - BACKQUOTE.min_depth = 0 - - @depthcontrol - def BACKSLASH(self): - current = self.create_node(UnlexerRule(name='BACKSLASH')) - current += self.create_node(UnlexerRule(src='\\')) - return current - BACKSLASH.min_depth = 0 - - @depthcontrol - def COLON(self): - current = self.create_node(UnlexerRule(name='COLON')) - current += self.create_node(UnlexerRule(src=':')) - return current - COLON.min_depth = 0 - - @depthcontrol - def COMMA(self): - current = self.create_node(UnlexerRule(name='COMMA')) - current += self.create_node(UnlexerRule(src=',')) - return current - COMMA.min_depth = 0 - - @depthcontrol - def CONCAT(self): - current = self.create_node(UnlexerRule(name='CONCAT')) - current += self.create_node(UnlexerRule(src='||')) - return current - CONCAT.min_depth = 0 - - @depthcontrol - def DASH(self): - current = self.create_node(UnlexerRule(name='DASH')) - current += self.create_node(UnlexerRule(src='-')) - return current - DASH.min_depth = 0 - - @depthcontrol - def DOT(self): - current = self.create_node(UnlexerRule(name='DOT')) - current += self.create_node(UnlexerRule(src='.')) - return current - DOT.min_depth = 0 - - @depthcontrol - def EQ_DOUBLE(self): - current = self.create_node(UnlexerRule(name='EQ_DOUBLE')) - current += self.create_node(UnlexerRule(src='==')) - return current - EQ_DOUBLE.min_depth = 0 - - @depthcontrol - def EQ_SINGLE(self): - current = self.create_node(UnlexerRule(name='EQ_SINGLE')) - current += self.create_node(UnlexerRule(src='=')) - return current - EQ_SINGLE.min_depth = 0 - - @depthcontrol - def GE(self): - current = self.create_node(UnlexerRule(name='GE')) - current += self.create_node(UnlexerRule(src='>=')) - return current - GE.min_depth = 0 - - @depthcontrol - def GT(self): - current = self.create_node(UnlexerRule(name='GT')) - current += self.create_node(UnlexerRule(src='>')) - return current - GT.min_depth = 0 - - @depthcontrol - def LBRACKET(self): - current = self.create_node(UnlexerRule(name='LBRACKET')) - current += self.create_node(UnlexerRule(src='[')) - return current - LBRACKET.min_depth = 0 - - @depthcontrol - def LE(self): - current = self.create_node(UnlexerRule(name='LE')) - current += self.create_node(UnlexerRule(src='<=')) - return current - LE.min_depth = 0 - - @depthcontrol - def LPAREN(self): - current = self.create_node(UnlexerRule(name='LPAREN')) - current += self.create_node(UnlexerRule(src='(')) - return current - LPAREN.min_depth = 0 - - @depthcontrol - def LT(self): - current = self.create_node(UnlexerRule(name='LT')) - current += self.create_node(UnlexerRule(src='<')) - return current - LT.min_depth = 0 - - @depthcontrol - def NOT_EQ(self): - current = self.create_node(UnlexerRule(name='NOT_EQ')) - choice = self.choice([0 if [0, 0][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_79', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_79', choice)] = self.unlexer.weights.get(('alt_79', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.create_node(UnlexerRule(src='!=')) - elif choice == 1: - current += self.create_node(UnlexerRule(src='<>')) - return current - NOT_EQ.min_depth = 0 - - @depthcontrol - def PERCENT(self): - current = self.create_node(UnlexerRule(name='PERCENT')) - current += self.create_node(UnlexerRule(src='%')) - return current - PERCENT.min_depth = 0 - - @depthcontrol - def PLUS(self): - current = self.create_node(UnlexerRule(name='PLUS')) - current += self.create_node(UnlexerRule(src='+')) - return current - PLUS.min_depth = 0 - - @depthcontrol - def QUERY(self): - current = self.create_node(UnlexerRule(name='QUERY')) - current += self.create_node(UnlexerRule(src='?')) - return current - QUERY.min_depth = 0 - - @depthcontrol - def QUOTE_SINGLE(self): - current = self.create_node(UnlexerRule(name='QUOTE_SINGLE')) - current += self.create_node(UnlexerRule(src='\'')) - return current - QUOTE_SINGLE.min_depth = 0 - - @depthcontrol - def RBRACKET(self): - current = self.create_node(UnlexerRule(name='RBRACKET')) - current += self.create_node(UnlexerRule(src=']')) - return current - RBRACKET.min_depth = 0 - - @depthcontrol - def RPAREN(self): - current = self.create_node(UnlexerRule(name='RPAREN')) - current += self.create_node(UnlexerRule(src=')')) - return current - RPAREN.min_depth = 0 - - @depthcontrol - def SEMICOLON(self): - current = self.create_node(UnlexerRule(name='SEMICOLON')) - current += self.create_node(UnlexerRule(src=';')) - return current - SEMICOLON.min_depth = 0 - - @depthcontrol - def SLASH(self): - current = self.create_node(UnlexerRule(name='SLASH')) - current += self.create_node(UnlexerRule(src='/')) - return current - SLASH.min_depth = 0 - - @depthcontrol - def UNDERSCORE(self): - current = self.create_node(UnlexerRule(name='UNDERSCORE')) - current += self.create_node(UnlexerRule(src='_')) - return current - UNDERSCORE.min_depth = 0 - - @depthcontrol - def SINGLE_LINE_COMMENT(self): - current = self.create_node(UnlexerRule(name='SINGLE_LINE_COMMENT')) - current += self.create_node(UnlexerRule(src='--')) - if self.unlexer.max_depth >= 0: - for _ in self.zero_or_more(): - current += UnlexerRule(src=self.char_from_list(charset_30)) - - choice = self.choice([0 if [0, 0, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_95', i), 1) for i, w in enumerate([1, 1, 1])]) - self.unlexer.weights[('alt_95', choice)] = self.unlexer.weights.get(('alt_95', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.create_node(UnlexerRule(src='\n')) - elif choice == 1: - current += self.create_node(UnlexerRule(src='\r')) - elif choice == 2: - current += self.unlexer.EOF() - return current - SINGLE_LINE_COMMENT.min_depth = 0 - - @depthcontrol - def MULTI_LINE_COMMENT(self): - current = self.create_node(UnlexerRule(name='MULTI_LINE_COMMENT')) - current += self.create_node(UnlexerRule(src='/*')) - if self.unlexer.max_depth >= 0: - for _ in self.zero_or_more(): - current += UnlexerRule(src=self.any_char()) - - current += self.create_node(UnlexerRule(src='*/')) - return current - MULTI_LINE_COMMENT.min_depth = 0 - - @depthcontrol - def WHITESPACE(self): - current = self.create_node(UnlexerRule(name='WHITESPACE')) - current += self.create_node(UnlexerRule(src=self.char_from_list(charset_31))) - return current - WHITESPACE.min_depth = 0 - diff --git a/utils/grammar-fuzzer/ClickHouseUnparser.py b/utils/grammar-fuzzer/ClickHouseUnparser.py deleted file mode 100644 index 7fa5eb96d31..00000000000 --- a/utils/grammar-fuzzer/ClickHouseUnparser.py +++ /dev/null @@ -1,1815 +0,0 @@ -# Generated by Grammarinator 19.3 - -from itertools import chain -from grammarinator.runtime import * - -import ClickHouseUnlexer - - -class ClickHouseUnparser(Grammarinator): - - def __init__(self, unlexer): - super(ClickHouseUnparser, self).__init__() - self.unlexer = unlexer - @depthcontrol - def queryList(self): - current = self.create_node(UnparserRule(name='queryList')) - current += self.queryStmt() - if self.unlexer.max_depth >= 8: - for _ in self.zero_or_more(): - current += self.unlexer.SEMICOLON() - current += self.queryStmt() - - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_one(): - current += self.unlexer.SEMICOLON() - - current += self.unlexer.EOF() - return current - queryList.min_depth = 8 - - @depthcontrol - def queryStmt(self): - current = self.create_node(UnparserRule(name='queryStmt')) - current += self.query() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.INTO() - current += self.unlexer.OUTFILE() - current += self.unlexer.STRING_LITERAL() - - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_one(): - current += self.unlexer.FORMAT() - current += self.identifier() - - return current - queryStmt.min_depth = 7 - - @depthcontrol - def query(self): - current = self.create_node(UnparserRule(name='query')) - choice = self.choice([0 if [6, 7, 6, 6][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_108', i), 1) for i, w in enumerate([1, 1, 1, 1])]) - self.unlexer.weights[('alt_108', choice)] = self.unlexer.weights.get(('alt_108', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.distributedStmt() - elif choice == 1: - current += self.insertStmt() - elif choice == 2: - current += self.selectUnionStmt() - elif choice == 3: - current += self.setStmt() - return current - query.min_depth = 6 - - @depthcontrol - def distributedStmt(self): - current = self.create_node(UnparserRule(name='distributedStmt')) - choice = self.choice([0 if [5, 6, 6][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_113', i), 1) for i, w in enumerate([1, 1, 1])]) - self.unlexer.weights[('alt_113', choice)] = self.unlexer.weights.get(('alt_113', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.createDatabaseStmt() - elif choice == 1: - current += self.createTableStmt() - elif choice == 2: - current += self.dropStmt() - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_one(): - current += self.unlexer.ON() - current += self.unlexer.CLUSTER() - current += self.identifier() - - return current - distributedStmt.min_depth = 5 - - @depthcontrol - def createDatabaseStmt(self): - current = self.create_node(UnparserRule(name='createDatabaseStmt')) - current += self.unlexer.CREATE() - current += self.unlexer.DATABASE() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.IF() - current += self.unlexer.NOT() - current += self.unlexer.EXISTS() - - current += self.databaseIdentifier() - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.engineExpr() - - return current - createDatabaseStmt.min_depth = 4 - - @depthcontrol - def createTableStmt(self): - current = self.create_node(UnparserRule(name='createTableStmt')) - current += self.unlexer.CREATE() - current += self.unlexer.TABLE() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.IF() - current += self.unlexer.NOT() - current += self.unlexer.EXISTS() - - current += self.tableIdentifier() - current += self.schemaClause() - return current - createTableStmt.min_depth = 5 - - @depthcontrol - def schemaClause(self): - current = self.create_node(UnparserRule(name='schemaClause')) - choice = self.choice([0 if [8, 7, 5, 4][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_121', i), 1) for i, w in enumerate([1, 1, 1, 1])]) - self.unlexer.weights[('alt_121', choice)] = self.unlexer.weights.get(('alt_121', choice), 1) * self.unlexer.cooldown - if choice == 0: - current = self.schemaClause_SchemaDescriptionClause() - elif choice == 1: - current = self.schemaClause_SchemaAsSubqueryClause() - elif choice == 2: - current = self.schemaClause_SchemaAsTableClause() - elif choice == 3: - current = self.schemaClause_SchemaAsFunctionClause() - return current - schemaClause.min_depth = 4 - - @depthcontrol - def schemaClause_SchemaDescriptionClause(self): - current = self.create_node(UnparserRule(name='schemaClause_SchemaDescriptionClause')) - current += self.unlexer.LPAREN() - current += self.tableElementExpr() - if self.unlexer.max_depth >= 7: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.tableElementExpr() - - current += self.unlexer.RPAREN() - current += self.engineClause() - return current - schemaClause_SchemaDescriptionClause.min_depth = 7 - - @depthcontrol - def schemaClause_SchemaAsSubqueryClause(self): - current = self.create_node(UnparserRule(name='schemaClause_SchemaAsSubqueryClause')) - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_one(): - current += self.engineClause() - - current += self.unlexer.AS() - current += self.selectUnionStmt() - return current - schemaClause_SchemaAsSubqueryClause.min_depth = 6 - - @depthcontrol - def schemaClause_SchemaAsTableClause(self): - current = self.create_node(UnparserRule(name='schemaClause_SchemaAsTableClause')) - current += self.unlexer.AS() - current += self.tableIdentifier() - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_one(): - current += self.engineClause() - - return current - schemaClause_SchemaAsTableClause.min_depth = 4 - - @depthcontrol - def schemaClause_SchemaAsFunctionClause(self): - current = self.create_node(UnparserRule(name='schemaClause_SchemaAsFunctionClause')) - current += self.unlexer.AS() - current += self.identifier() - current += self.unlexer.LPAREN() - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_one(): - current += self.tableArgList() - - current += self.unlexer.RPAREN() - return current - schemaClause_SchemaAsFunctionClause.min_depth = 3 - - @depthcontrol - def engineClause(self): - current = self.create_node(UnparserRule(name='engineClause')) - current += self.engineExpr() - if self.unlexer.max_depth >= 6: - for _ in self.zero_or_one(): - current += self.orderByClause() - - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.partitionByClause() - - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.primaryKeyClause() - - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.sampleByClause() - - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_one(): - current += self.ttlClause() - - if self.unlexer.max_depth >= 6: - for _ in self.zero_or_one(): - current += self.settingsClause() - - return current - engineClause.min_depth = 4 - - @depthcontrol - def partitionByClause(self): - current = self.create_node(UnparserRule(name='partitionByClause')) - current += self.unlexer.PARTITION() - current += self.unlexer.BY() - current += self.columnExpr() - return current - partitionByClause.min_depth = 3 - - @depthcontrol - def primaryKeyClause(self): - current = self.create_node(UnparserRule(name='primaryKeyClause')) - current += self.unlexer.PRIMARY() - current += self.unlexer.KEY() - current += self.columnExpr() - return current - primaryKeyClause.min_depth = 3 - - @depthcontrol - def sampleByClause(self): - current = self.create_node(UnparserRule(name='sampleByClause')) - current += self.unlexer.SAMPLE() - current += self.unlexer.BY() - current += self.columnExpr() - return current - sampleByClause.min_depth = 3 - - @depthcontrol - def ttlClause(self): - current = self.create_node(UnparserRule(name='ttlClause')) - current += self.unlexer.TTL() - current += self.ttlExpr() - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.ttlExpr() - - return current - ttlClause.min_depth = 4 - - @depthcontrol - def engineExpr(self): - current = self.create_node(UnparserRule(name='engineExpr')) - current += self.unlexer.ENGINE() - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_one(): - current += self.unlexer.EQ_SINGLE() - - current += self.identifier() - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_one(): - current += self.unlexer.LPAREN() - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_one(): - current += self.tableArgList() - - current += self.unlexer.RPAREN() - - return current - engineExpr.min_depth = 3 - - @depthcontrol - def tableElementExpr(self): - current = self.create_node(UnparserRule(name='tableElementExpr')) - current = self.tableElementExpr_TableElementColumn() - return current - tableElementExpr.min_depth = 6 - - @depthcontrol - def tableElementExpr_TableElementColumn(self): - current = self.create_node(UnparserRule(name='tableElementExpr_TableElementColumn')) - current += self.identifier() - current += self.columnTypeExpr() - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.tableColumnPropertyExpr() - - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_one(): - current += self.unlexer.TTL() - current += self.columnExpr() - - return current - tableElementExpr_TableElementColumn.min_depth = 5 - - @depthcontrol - def tableColumnPropertyExpr(self): - current = self.create_node(UnparserRule(name='tableColumnPropertyExpr')) - choice = self.choice([0 if [2, 2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_142', i), 1) for i, w in enumerate([1, 1, 1])]) - self.unlexer.weights[('alt_142', choice)] = self.unlexer.weights.get(('alt_142', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.DEFAULT() - elif choice == 1: - current += self.unlexer.MATERIALIZED() - elif choice == 2: - current += self.unlexer.ALIAS() - current += self.columnExpr() - return current - tableColumnPropertyExpr.min_depth = 3 - - @depthcontrol - def ttlExpr(self): - current = self.create_node(UnparserRule(name='ttlExpr')) - current += self.columnExpr() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - choice = self.choice([0 if [2, 2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_147', i), 1) for i, w in enumerate([1, 1, 1])]) - self.unlexer.weights[('alt_147', choice)] = self.unlexer.weights.get(('alt_147', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.DELETE() - elif choice == 1: - current += self.unlexer.TO() - current += self.unlexer.DISK() - current += self.unlexer.STRING_LITERAL() - elif choice == 2: - current += self.unlexer.TO() - current += self.unlexer.VOLUME() - current += self.unlexer.STRING_LITERAL() - - return current - ttlExpr.min_depth = 3 - - @depthcontrol - def dropStmt(self): - current = self.create_node(UnparserRule(name='dropStmt')) - choice = self.choice([0 if [5, 5][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_151', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_151', choice)] = self.unlexer.weights.get(('alt_151', choice), 1) * self.unlexer.cooldown - if choice == 0: - current = self.dropStmt_DropDatabaseStmt() - elif choice == 1: - current = self.dropStmt_DropTableStmt() - return current - dropStmt.min_depth = 5 - - @depthcontrol - def dropStmt_DropDatabaseStmt(self): - current = self.create_node(UnparserRule(name='dropStmt_DropDatabaseStmt')) - current += self.unlexer.DROP() - current += self.unlexer.DATABASE() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.IF() - current += self.unlexer.EXISTS() - - current += self.databaseIdentifier() - return current - dropStmt_DropDatabaseStmt.min_depth = 4 - - @depthcontrol - def dropStmt_DropTableStmt(self): - current = self.create_node(UnparserRule(name='dropStmt_DropTableStmt')) - current += self.unlexer.DROP() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.TEMPORARY() - - current += self.unlexer.TABLE() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.IF() - current += self.unlexer.EXISTS() - - current += self.tableIdentifier() - return current - dropStmt_DropTableStmt.min_depth = 4 - - @depthcontrol - def insertStmt(self): - current = self.create_node(UnparserRule(name='insertStmt')) - current += self.unlexer.INSERT() - current += self.unlexer.INTO() - current += self.tableIdentifier() - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_one(): - current += self.unlexer.LPAREN() - current += self.identifier() - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.identifier() - - current += self.unlexer.RPAREN() - - current += self.valuesClause() - return current - insertStmt.min_depth = 6 - - @depthcontrol - def valuesClause(self): - current = self.create_node(UnparserRule(name='valuesClause')) - choice = self.choice([0 if [5, 6][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_159', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_159', choice)] = self.unlexer.weights.get(('alt_159', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.VALUES() - current += self.valueTupleExpr() - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.valueTupleExpr() - - elif choice == 1: - current += self.selectUnionStmt() - return current - valuesClause.min_depth = 5 - - @depthcontrol - def valueTupleExpr(self): - current = self.create_node(UnparserRule(name='valueTupleExpr')) - current += self.unlexer.LPAREN() - current += self.valueExprList() - current += self.unlexer.RPAREN() - return current - valueTupleExpr.min_depth = 4 - - @depthcontrol - def selectUnionStmt(self): - current = self.create_node(UnparserRule(name='selectUnionStmt')) - current += self.selectStmt() - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_more(): - current += self.unlexer.UNION() - current += self.unlexer.ALL() - current += self.selectStmt() - - return current - selectUnionStmt.min_depth = 5 - - @depthcontrol - def selectStmt(self): - current = self.create_node(UnparserRule(name='selectStmt')) - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_one(): - current += self.withClause() - - current += self.unlexer.SELECT() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.DISTINCT() - - current += self.columnExprList() - if self.unlexer.max_depth >= 8: - for _ in self.zero_or_one(): - current += self.fromClause() - - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.sampleClause() - - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_one(): - current += self.arrayJoinClause() - - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.prewhereClause() - - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.whereClause() - - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_one(): - current += self.groupByClause() - - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.havingClause() - - if self.unlexer.max_depth >= 6: - for _ in self.zero_or_one(): - current += self.orderByClause() - - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_one(): - current += self.limitByClause() - - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.limitClause() - - if self.unlexer.max_depth >= 6: - for _ in self.zero_or_one(): - current += self.settingsClause() - - return current - selectStmt.min_depth = 4 - - @depthcontrol - def withClause(self): - current = self.create_node(UnparserRule(name='withClause')) - current += self.unlexer.WITH() - current += self.columnExprList() - return current - withClause.min_depth = 4 - - @depthcontrol - def fromClause(self): - current = self.create_node(UnparserRule(name='fromClause')) - current += self.unlexer.FROM() - current += self.joinExpr() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.FINAL() - - return current - fromClause.min_depth = 7 - - @depthcontrol - def sampleClause(self): - current = self.create_node(UnparserRule(name='sampleClause')) - current += self.unlexer.SAMPLE() - current += self.ratioExpr() - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_one(): - current += self.unlexer.OFFSET() - current += self.ratioExpr() - - return current - sampleClause.min_depth = 3 - - @depthcontrol - def arrayJoinClause(self): - current = self.create_node(UnparserRule(name='arrayJoinClause')) - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.LEFT() - - current += self.unlexer.ARRAY() - current += self.unlexer.JOIN() - current += self.columnExprList() - return current - arrayJoinClause.min_depth = 4 - - @depthcontrol - def prewhereClause(self): - current = self.create_node(UnparserRule(name='prewhereClause')) - current += self.unlexer.PREWHERE() - current += self.columnExpr() - return current - prewhereClause.min_depth = 3 - - @depthcontrol - def whereClause(self): - current = self.create_node(UnparserRule(name='whereClause')) - current += self.unlexer.WHERE() - current += self.columnExpr() - return current - whereClause.min_depth = 3 - - @depthcontrol - def groupByClause(self): - current = self.create_node(UnparserRule(name='groupByClause')) - current += self.unlexer.GROUP() - current += self.unlexer.BY() - current += self.columnExprList() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.WITH() - current += self.unlexer.TOTALS() - - return current - groupByClause.min_depth = 4 - - @depthcontrol - def havingClause(self): - current = self.create_node(UnparserRule(name='havingClause')) - current += self.unlexer.HAVING() - current += self.columnExpr() - return current - havingClause.min_depth = 3 - - @depthcontrol - def orderByClause(self): - current = self.create_node(UnparserRule(name='orderByClause')) - current += self.unlexer.ORDER() - current += self.unlexer.BY() - current += self.orderExprList() - return current - orderByClause.min_depth = 5 - - @depthcontrol - def limitByClause(self): - current = self.create_node(UnparserRule(name='limitByClause')) - current += self.unlexer.LIMIT() - current += self.limitExpr() - current += self.unlexer.BY() - current += self.columnExprList() - return current - limitByClause.min_depth = 4 - - @depthcontrol - def limitClause(self): - current = self.create_node(UnparserRule(name='limitClause')) - current += self.unlexer.LIMIT() - current += self.limitExpr() - return current - limitClause.min_depth = 3 - - @depthcontrol - def settingsClause(self): - current = self.create_node(UnparserRule(name='settingsClause')) - current += self.unlexer.SETTINGS() - current += self.settingExprList() - return current - settingsClause.min_depth = 5 - - @depthcontrol - def joinExpr(self): - current = self.create_node(UnparserRule(name='joinExpr')) - choice = self.choice([0 if [6, 8, 8, 8][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_181', i), 1) for i, w in enumerate([1, 1, 1, 1])]) - self.unlexer.weights[('alt_181', choice)] = self.unlexer.weights.get(('alt_181', choice), 1) * self.unlexer.cooldown - if choice == 0: - current = self.joinExpr_JoinExprTable() - elif choice == 1: - current = self.joinExpr_JoinExprParens() - elif choice == 2: - current = self.joinExpr_JoinExprOp() - elif choice == 3: - current = self.joinExpr_JoinExprCrossOp() - return current - joinExpr.min_depth = 6 - - @depthcontrol - def joinExpr_JoinExprTable(self): - current = self.create_node(UnparserRule(name='joinExpr_JoinExprTable')) - current += self.tableExpr() - return current - joinExpr_JoinExprTable.min_depth = 5 - - @depthcontrol - def joinExpr_JoinExprParens(self): - current = self.create_node(UnparserRule(name='joinExpr_JoinExprParens')) - current += self.unlexer.LPAREN() - current += self.joinExpr() - current += self.unlexer.RPAREN() - return current - joinExpr_JoinExprParens.min_depth = 7 - - @depthcontrol - def joinExpr_JoinExprOp(self): - current = self.create_node(UnparserRule(name='joinExpr_JoinExprOp')) - current += self.joinExpr() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - choice = self.choice([0 if [2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_187', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_187', choice)] = self.unlexer.weights.get(('alt_187', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.GLOBAL() - elif choice == 1: - current += self.unlexer.LOCAL() - - current += self.joinOp() - current += self.unlexer.JOIN() - current += self.joinExpr() - current += self.joinConstraintClause() - return current - joinExpr_JoinExprOp.min_depth = 7 - - @depthcontrol - def joinExpr_JoinExprCrossOp(self): - current = self.create_node(UnparserRule(name='joinExpr_JoinExprCrossOp')) - current += self.joinExpr() - current += self.joinOpCross() - current += self.joinExpr() - return current - joinExpr_JoinExprCrossOp.min_depth = 7 - - @depthcontrol - def joinOp(self): - current = self.create_node(UnparserRule(name='joinOp')) - choice = self.choice([0 if [3, 3, 3][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_190', i), 1) for i, w in enumerate([1, 1, 1])]) - self.unlexer.weights[('alt_190', choice)] = self.unlexer.weights.get(('alt_190', choice), 1) * self.unlexer.cooldown - if choice == 0: - current = self.joinOp_JoinOpInner() - elif choice == 1: - current = self.joinOp_JoinOpLeftRight() - elif choice == 2: - current = self.joinOp_JoinOpFull() - return current - joinOp.min_depth = 3 - - @depthcontrol - def joinOp_JoinOpInner(self): - current = self.create_node(UnparserRule(name='joinOp_JoinOpInner')) - choice = self.choice([0 if [2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_194', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_194', choice)] = self.unlexer.weights.get(('alt_194', choice), 1) * self.unlexer.cooldown - if choice == 0: - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.ANY() - - current += self.unlexer.INNER() - elif choice == 1: - current += self.unlexer.INNER() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.ANY() - - return current - joinOp_JoinOpInner.min_depth = 2 - - @depthcontrol - def joinOp_JoinOpLeftRight(self): - current = self.create_node(UnparserRule(name='joinOp_JoinOpLeftRight')) - choice = self.choice([0 if [2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_199', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_199', choice)] = self.unlexer.weights.get(('alt_199', choice), 1) * self.unlexer.cooldown - if choice == 0: - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - choice = self.choice([0 if [2, 2, 2, 2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_203', i), 1) for i, w in enumerate([1, 1, 1, 1, 1])]) - self.unlexer.weights[('alt_203', choice)] = self.unlexer.weights.get(('alt_203', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.OUTER() - elif choice == 1: - current += self.unlexer.SEMI() - elif choice == 2: - current += self.unlexer.ANTI() - elif choice == 3: - current += self.unlexer.ANY() - elif choice == 4: - current += self.unlexer.ASOF() - - choice = self.choice([0 if [2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_209', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_209', choice)] = self.unlexer.weights.get(('alt_209', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.LEFT() - elif choice == 1: - current += self.unlexer.RIGHT() - elif choice == 1: - choice = self.choice([0 if [2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_212', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_212', choice)] = self.unlexer.weights.get(('alt_212', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.LEFT() - elif choice == 1: - current += self.unlexer.RIGHT() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - choice = self.choice([0 if [2, 2, 2, 2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_216', i), 1) for i, w in enumerate([1, 1, 1, 1, 1])]) - self.unlexer.weights[('alt_216', choice)] = self.unlexer.weights.get(('alt_216', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.OUTER() - elif choice == 1: - current += self.unlexer.SEMI() - elif choice == 2: - current += self.unlexer.ANTI() - elif choice == 3: - current += self.unlexer.ANY() - elif choice == 4: - current += self.unlexer.ASOF() - - return current - joinOp_JoinOpLeftRight.min_depth = 2 - - @depthcontrol - def joinOp_JoinOpFull(self): - current = self.create_node(UnparserRule(name='joinOp_JoinOpFull')) - choice = self.choice([0 if [2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_222', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_222', choice)] = self.unlexer.weights.get(('alt_222', choice), 1) * self.unlexer.cooldown - if choice == 0: - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - choice = self.choice([0 if [2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_226', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_226', choice)] = self.unlexer.weights.get(('alt_226', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.OUTER() - elif choice == 1: - current += self.unlexer.ANY() - - current += self.unlexer.FULL() - elif choice == 1: - current += self.unlexer.FULL() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - choice = self.choice([0 if [2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_230', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_230', choice)] = self.unlexer.weights.get(('alt_230', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.OUTER() - elif choice == 1: - current += self.unlexer.ANY() - - return current - joinOp_JoinOpFull.min_depth = 2 - - @depthcontrol - def joinOpCross(self): - current = self.create_node(UnparserRule(name='joinOpCross')) - choice = self.choice([0 if [2, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_233', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_233', choice)] = self.unlexer.weights.get(('alt_233', choice), 1) * self.unlexer.cooldown - if choice == 0: - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - choice = self.choice([0 if [2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_237', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_237', choice)] = self.unlexer.weights.get(('alt_237', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.GLOBAL() - elif choice == 1: - current += self.unlexer.LOCAL() - - current += self.unlexer.CROSS() - current += self.unlexer.JOIN() - elif choice == 1: - current += self.unlexer.COMMA() - return current - joinOpCross.min_depth = 1 - - @depthcontrol - def joinConstraintClause(self): - current = self.create_node(UnparserRule(name='joinConstraintClause')) - choice = self.choice([0 if [4, 4, 4][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_240', i), 1) for i, w in enumerate([1, 1, 1])]) - self.unlexer.weights[('alt_240', choice)] = self.unlexer.weights.get(('alt_240', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.ON() - current += self.columnExprList() - elif choice == 1: - current += self.unlexer.USING() - current += self.unlexer.LPAREN() - current += self.columnExprList() - current += self.unlexer.RPAREN() - elif choice == 2: - current += self.unlexer.USING() - current += self.columnExprList() - return current - joinConstraintClause.min_depth = 4 - - @depthcontrol - def limitExpr(self): - current = self.create_node(UnparserRule(name='limitExpr')) - current += self.unlexer.INTEGER_LITERAL() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - choice = self.choice([0 if [1, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_245', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_245', choice)] = self.unlexer.weights.get(('alt_245', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.COMMA() - elif choice == 1: - current += self.unlexer.OFFSET() - current += self.unlexer.INTEGER_LITERAL() - - return current - limitExpr.min_depth = 2 - - @depthcontrol - def orderExprList(self): - current = self.create_node(UnparserRule(name='orderExprList')) - current += self.orderExpr() - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.orderExpr() - - return current - orderExprList.min_depth = 4 - - @depthcontrol - def orderExpr(self): - current = self.create_node(UnparserRule(name='orderExpr')) - current += self.columnExpr() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - choice = self.choice([0 if [2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_250', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_250', choice)] = self.unlexer.weights.get(('alt_250', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.ASCENDING() - elif choice == 1: - current += self.unlexer.DESCENDING() - - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.NULLS() - choice = self.choice([0 if [2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_254', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_254', choice)] = self.unlexer.weights.get(('alt_254', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.FIRST() - elif choice == 1: - current += self.unlexer.LAST() - - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.COLLATE() - current += self.unlexer.STRING_LITERAL() - - return current - orderExpr.min_depth = 3 - - @depthcontrol - def ratioExpr(self): - current = self.create_node(UnparserRule(name='ratioExpr')) - current += self.unlexer.INTEGER_LITERAL() - current += self.unlexer.SLASH() - current += self.unlexer.INTEGER_LITERAL() - return current - ratioExpr.min_depth = 2 - - @depthcontrol - def settingExprList(self): - current = self.create_node(UnparserRule(name='settingExprList')) - current += self.settingExpr() - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.settingExpr() - - return current - settingExprList.min_depth = 4 - - @depthcontrol - def settingExpr(self): - current = self.create_node(UnparserRule(name='settingExpr')) - current += self.identifier() - current += self.unlexer.EQ_SINGLE() - current += self.literal() - return current - settingExpr.min_depth = 3 - - @depthcontrol - def setStmt(self): - current = self.create_node(UnparserRule(name='setStmt')) - current += self.unlexer.SET() - current += self.settingExprList() - return current - setStmt.min_depth = 5 - - @depthcontrol - def valueExprList(self): - current = self.create_node(UnparserRule(name='valueExprList')) - current += self.valueExpr() - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.valueExpr() - - return current - valueExprList.min_depth = 3 - - @depthcontrol - def valueExpr(self): - current = self.create_node(UnparserRule(name='valueExpr')) - choice = self.choice([0 if [4, 6, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_260', i), 1) for i, w in enumerate([1, 1, 1])]) - self.unlexer.weights[('alt_260', choice)] = self.unlexer.weights.get(('alt_260', choice), 1) * self.unlexer.cooldown - if choice == 0: - current = self.valueExpr_ValueExprLiteral() - elif choice == 1: - current = self.valueExpr_ValueExprTuple() - elif choice == 2: - current = self.valueExpr_ValueExprArray() - return current - valueExpr.min_depth = 2 - - @depthcontrol - def valueExpr_ValueExprLiteral(self): - current = self.create_node(UnparserRule(name='valueExpr_ValueExprLiteral')) - current += self.literal() - return current - valueExpr_ValueExprLiteral.min_depth = 3 - - @depthcontrol - def valueExpr_ValueExprTuple(self): - current = self.create_node(UnparserRule(name='valueExpr_ValueExprTuple')) - current += self.valueTupleExpr() - return current - valueExpr_ValueExprTuple.min_depth = 5 - - @depthcontrol - def valueExpr_ValueExprArray(self): - current = self.create_node(UnparserRule(name='valueExpr_ValueExprArray')) - current += self.unlexer.LBRACKET() - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.valueExprList() - - current += self.unlexer.RBRACKET() - return current - valueExpr_ValueExprArray.min_depth = 1 - - @depthcontrol - def columnTypeExpr(self): - current = self.create_node(UnparserRule(name='columnTypeExpr')) - choice = self.choice([0 if [4, 5, 4, 6][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_265', i), 1) for i, w in enumerate([1, 1, 1, 1])]) - self.unlexer.weights[('alt_265', choice)] = self.unlexer.weights.get(('alt_265', choice), 1) * self.unlexer.cooldown - if choice == 0: - current = self.columnTypeExpr_ColumnTypeExprSimple() - elif choice == 1: - current = self.columnTypeExpr_ColumnTypeExprParam() - elif choice == 2: - current = self.columnTypeExpr_ColumnTypeExprEnum() - elif choice == 3: - current = self.columnTypeExpr_ColumnTypeExprComplex() - return current - columnTypeExpr.min_depth = 4 - - @depthcontrol - def columnTypeExpr_ColumnTypeExprSimple(self): - current = self.create_node(UnparserRule(name='columnTypeExpr_ColumnTypeExprSimple')) - current += self.identifier() - return current - columnTypeExpr_ColumnTypeExprSimple.min_depth = 3 - - @depthcontrol - def columnTypeExpr_ColumnTypeExprParam(self): - current = self.create_node(UnparserRule(name='columnTypeExpr_ColumnTypeExprParam')) - current += self.identifier() - current += self.unlexer.LPAREN() - current += self.columnParamList() - current += self.unlexer.RPAREN() - return current - columnTypeExpr_ColumnTypeExprParam.min_depth = 4 - - @depthcontrol - def columnTypeExpr_ColumnTypeExprEnum(self): - current = self.create_node(UnparserRule(name='columnTypeExpr_ColumnTypeExprEnum')) - current += self.identifier() - current += self.unlexer.LPAREN() - current += self.enumValue() - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.enumValue() - - current += self.unlexer.RPAREN() - return current - columnTypeExpr_ColumnTypeExprEnum.min_depth = 3 - - @depthcontrol - def columnTypeExpr_ColumnTypeExprComplex(self): - current = self.create_node(UnparserRule(name='columnTypeExpr_ColumnTypeExprComplex')) - current += self.identifier() - current += self.unlexer.LPAREN() - current += self.columnTypeExpr() - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.columnTypeExpr() - - current += self.unlexer.RPAREN() - return current - columnTypeExpr_ColumnTypeExprComplex.min_depth = 5 - - @depthcontrol - def columnExprList(self): - current = self.create_node(UnparserRule(name='columnExprList')) - current += self.columnExpr() - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.columnExpr() - - return current - columnExprList.min_depth = 3 - - @depthcontrol - def columnExpr(self): - current = self.create_node(UnparserRule(name='columnExpr')) - choice = self.choice([0 if [4, 2, 5, 2, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_273', i), 1) for i, w in enumerate([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]) - self.unlexer.weights[('alt_273', choice)] = self.unlexer.weights.get(('alt_273', choice), 1) * self.unlexer.cooldown - if choice == 0: - current = self.columnExpr_ColumnExprLiteral() - elif choice == 1: - current = self.columnExpr_ColumnExprAsterisk() - elif choice == 2: - current = self.columnExpr_ColumnExprTuple() - elif choice == 3: - current = self.columnExpr_ColumnExprArray() - elif choice == 4: - current = self.columnExpr_ColumnExprCase() - elif choice == 5: - current = self.columnExpr_ColumnExprExtract() - elif choice == 6: - current = self.columnExpr_ColumnExprTrim() - elif choice == 7: - current = self.columnExpr_ColumnExprInterval() - elif choice == 8: - current = self.columnExpr_ColumnExprIdentifier() - elif choice == 9: - current = self.columnExpr_ColumnExprFunction() - elif choice == 10: - current = self.columnExpr_ColumnExprArrayAccess() - elif choice == 11: - current = self.columnExpr_ColumnExprTupleAccess() - elif choice == 12: - current = self.columnExpr_ColumnExprUnaryOp() - elif choice == 13: - current = self.columnExpr_ColumnExprIsNull() - elif choice == 14: - current = self.columnExpr_ColumnExprBinaryOp() - elif choice == 15: - current = self.columnExpr_ColumnExprTernaryOp() - elif choice == 16: - current = self.columnExpr_ColumnExprBetween() - elif choice == 17: - current = self.columnExpr_ColumnExprAlias() - return current - columnExpr.min_depth = 2 - - @depthcontrol - def columnExpr_ColumnExprLiteral(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprLiteral')) - current += self.literal() - return current - columnExpr_ColumnExprLiteral.min_depth = 3 - - @depthcontrol - def columnExpr_ColumnExprAsterisk(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprAsterisk')) - current += self.unlexer.ASTERISK() - return current - columnExpr_ColumnExprAsterisk.min_depth = 1 - - @depthcontrol - def columnExpr_ColumnExprTuple(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprTuple')) - current += self.unlexer.LPAREN() - current += self.columnExprList() - current += self.unlexer.RPAREN() - return current - columnExpr_ColumnExprTuple.min_depth = 4 - - @depthcontrol - def columnExpr_ColumnExprArray(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprArray')) - current += self.unlexer.LBRACKET() - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.columnExprList() - - current += self.unlexer.RBRACKET() - return current - columnExpr_ColumnExprArray.min_depth = 1 - - @depthcontrol - def columnExpr_ColumnExprCase(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprCase')) - current += self.unlexer.CASE() - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_one(): - current += self.columnExpr() - - if self.unlexer.max_depth >= 0: - for _ in self.one_or_more(): - current += self.unlexer.WHEN() - current += self.columnExpr() - current += self.unlexer.THEN() - current += self.columnExpr() - - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_one(): - current += self.unlexer.ELSE() - current += self.columnExpr() - - current += self.unlexer.END() - return current - columnExpr_ColumnExprCase.min_depth = 3 - - @depthcontrol - def columnExpr_ColumnExprExtract(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprExtract')) - current += self.unlexer.EXTRACT() - current += self.unlexer.LPAREN() - current += self.unlexer.INTERVAL_TYPE() - current += self.unlexer.FROM() - current += self.columnExpr() - current += self.unlexer.RPAREN() - return current - columnExpr_ColumnExprExtract.min_depth = 3 - - @depthcontrol - def columnExpr_ColumnExprTrim(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprTrim')) - current += self.unlexer.TRIM() - current += self.unlexer.LPAREN() - choice = self.choice([0 if [2, 2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_295', i), 1) for i, w in enumerate([1, 1, 1])]) - self.unlexer.weights[('alt_295', choice)] = self.unlexer.weights.get(('alt_295', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.BOTH() - elif choice == 1: - current += self.unlexer.LEADING() - elif choice == 2: - current += self.unlexer.TRAILING() - current += self.unlexer.STRING_LITERAL() - current += self.unlexer.FROM() - current += self.columnExpr() - current += self.unlexer.RPAREN() - return current - columnExpr_ColumnExprTrim.min_depth = 3 - - @depthcontrol - def columnExpr_ColumnExprInterval(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprInterval')) - current += self.unlexer.INTERVAL() - current += self.columnExpr() - current += self.unlexer.INTERVAL_TYPE() - return current - columnExpr_ColumnExprInterval.min_depth = 3 - - @depthcontrol - def columnExpr_ColumnExprIdentifier(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprIdentifier')) - current += self.columnIdentifier() - return current - columnExpr_ColumnExprIdentifier.min_depth = 4 - - @depthcontrol - def columnExpr_ColumnExprFunction(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprFunction')) - current += self.identifier() - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_one(): - current += self.unlexer.LPAREN() - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.columnParamList() - - current += self.unlexer.RPAREN() - - current += self.unlexer.LPAREN() - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_one(): - current += self.columnArgList() - - current += self.unlexer.RPAREN() - return current - columnExpr_ColumnExprFunction.min_depth = 3 - - @depthcontrol - def columnExpr_ColumnExprArrayAccess(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprArrayAccess')) - current += self.columnExpr() - current += self.unlexer.LBRACKET() - current += self.columnExpr() - current += self.unlexer.RBRACKET() - return current - columnExpr_ColumnExprArrayAccess.min_depth = 3 - - @depthcontrol - def columnExpr_ColumnExprTupleAccess(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprTupleAccess')) - current += self.columnExpr() - current += self.unlexer.DOT() - current += self.unlexer.INTEGER_LITERAL() - return current - columnExpr_ColumnExprTupleAccess.min_depth = 3 - - @depthcontrol - def columnExpr_ColumnExprUnaryOp(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprUnaryOp')) - current += self.unaryOp() - current += self.columnExpr() - return current - columnExpr_ColumnExprUnaryOp.min_depth = 3 - - @depthcontrol - def columnExpr_ColumnExprIsNull(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprIsNull')) - current += self.columnExpr() - current += self.unlexer.IS() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.NOT() - - current += self.unlexer.NULL_SQL() - return current - columnExpr_ColumnExprIsNull.min_depth = 3 - - @depthcontrol - def columnExpr_ColumnExprBinaryOp(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprBinaryOp')) - current += self.columnExpr() - current += self.binaryOp() - current += self.columnExpr() - return current - columnExpr_ColumnExprBinaryOp.min_depth = 3 - - @depthcontrol - def columnExpr_ColumnExprTernaryOp(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprTernaryOp')) - current += self.columnExpr() - current += self.unlexer.QUERY() - current += self.columnExpr() - current += self.unlexer.COLON() - current += self.columnExpr() - return current - columnExpr_ColumnExprTernaryOp.min_depth = 3 - - @depthcontrol - def columnExpr_ColumnExprBetween(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprBetween')) - current += self.columnExpr() - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.NOT() - - current += self.unlexer.BETWEEN() - current += self.columnExpr() - current += self.unlexer.AND() - current += self.columnExpr() - return current - columnExpr_ColumnExprBetween.min_depth = 3 - - @depthcontrol - def columnExpr_ColumnExprAlias(self): - current = self.create_node(UnparserRule(name='columnExpr_ColumnExprAlias')) - current += self.columnExpr() - current += self.unlexer.AS() - current += self.identifier() - return current - columnExpr_ColumnExprAlias.min_depth = 3 - - @depthcontrol - def columnParamList(self): - current = self.create_node(UnparserRule(name='columnParamList')) - current += self.literal() - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.literal() - - return current - columnParamList.min_depth = 3 - - @depthcontrol - def columnArgList(self): - current = self.create_node(UnparserRule(name='columnArgList')) - current += self.columnArgExpr() - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.columnArgExpr() - - return current - columnArgList.min_depth = 4 - - @depthcontrol - def columnArgExpr(self): - current = self.create_node(UnparserRule(name='columnArgExpr')) - choice = self.choice([0 if [4, 3][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_306', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_306', choice)] = self.unlexer.weights.get(('alt_306', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.columnLambdaExpr() - elif choice == 1: - current += self.columnExpr() - return current - columnArgExpr.min_depth = 3 - - @depthcontrol - def columnLambdaExpr(self): - current = self.create_node(UnparserRule(name='columnLambdaExpr')) - choice = self.choice([0 if [3, 3][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_309', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_309', choice)] = self.unlexer.weights.get(('alt_309', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.LPAREN() - current += self.identifier() - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.identifier() - - current += self.unlexer.RPAREN() - elif choice == 1: - current += self.identifier() - if self.unlexer.max_depth >= 3: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.identifier() - - current += self.unlexer.ARROW() - current += self.columnExpr() - return current - columnLambdaExpr.min_depth = 3 - - @depthcontrol - def columnIdentifier(self): - current = self.create_node(UnparserRule(name='columnIdentifier')) - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.tableIdentifier() - current += self.unlexer.DOT() - - current += self.identifier() - return current - columnIdentifier.min_depth = 3 - - @depthcontrol - def tableExpr(self): - current = self.create_node(UnparserRule(name='tableExpr')) - choice = self.choice([0 if [5, 4, 7, 6][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_315', i), 1) for i, w in enumerate([1, 1, 1, 1])]) - self.unlexer.weights[('alt_315', choice)] = self.unlexer.weights.get(('alt_315', choice), 1) * self.unlexer.cooldown - if choice == 0: - current = self.tableExpr_TableExprIdentifier() - elif choice == 1: - current = self.tableExpr_TableExprFunction() - elif choice == 2: - current = self.tableExpr_TableExprSubquery() - elif choice == 3: - current = self.tableExpr_TableExprAlias() - return current - tableExpr.min_depth = 4 - - @depthcontrol - def tableExpr_TableExprIdentifier(self): - current = self.create_node(UnparserRule(name='tableExpr_TableExprIdentifier')) - current += self.tableIdentifier() - return current - tableExpr_TableExprIdentifier.min_depth = 4 - - @depthcontrol - def tableExpr_TableExprFunction(self): - current = self.create_node(UnparserRule(name='tableExpr_TableExprFunction')) - current += self.identifier() - current += self.unlexer.LPAREN() - if self.unlexer.max_depth >= 5: - for _ in self.zero_or_one(): - current += self.tableArgList() - - current += self.unlexer.RPAREN() - return current - tableExpr_TableExprFunction.min_depth = 3 - - @depthcontrol - def tableExpr_TableExprSubquery(self): - current = self.create_node(UnparserRule(name='tableExpr_TableExprSubquery')) - current += self.unlexer.LPAREN() - current += self.selectUnionStmt() - current += self.unlexer.RPAREN() - return current - tableExpr_TableExprSubquery.min_depth = 6 - - @depthcontrol - def tableExpr_TableExprAlias(self): - current = self.create_node(UnparserRule(name='tableExpr_TableExprAlias')) - current += self.tableExpr() - current += self.unlexer.AS() - current += self.identifier() - return current - tableExpr_TableExprAlias.min_depth = 5 - - @depthcontrol - def tableIdentifier(self): - current = self.create_node(UnparserRule(name='tableIdentifier')) - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_one(): - current += self.databaseIdentifier() - current += self.unlexer.DOT() - - current += self.identifier() - return current - tableIdentifier.min_depth = 3 - - @depthcontrol - def tableArgList(self): - current = self.create_node(UnparserRule(name='tableArgList')) - current += self.tableArgExpr() - if self.unlexer.max_depth >= 4: - for _ in self.zero_or_more(): - current += self.unlexer.COMMA() - current += self.tableArgExpr() - - return current - tableArgList.min_depth = 4 - - @depthcontrol - def tableArgExpr(self): - current = self.create_node(UnparserRule(name='tableArgExpr')) - choice = self.choice([0 if [3, 4][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_323', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_323', choice)] = self.unlexer.weights.get(('alt_323', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.literal() - elif choice == 1: - current += self.tableIdentifier() - return current - tableArgExpr.min_depth = 3 - - @depthcontrol - def databaseIdentifier(self): - current = self.create_node(UnparserRule(name='databaseIdentifier')) - current += self.identifier() - return current - databaseIdentifier.min_depth = 3 - - @depthcontrol - def literal(self): - current = self.create_node(UnparserRule(name='literal')) - choice = self.choice([0 if [2, 2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_326', i), 1) for i, w in enumerate([1, 1, 1])]) - self.unlexer.weights[('alt_326', choice)] = self.unlexer.weights.get(('alt_326', choice), 1) * self.unlexer.cooldown - if choice == 0: - if self.unlexer.max_depth >= 1: - for _ in self.zero_or_one(): - choice = self.choice([0 if [1, 1][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_331', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_331', choice)] = self.unlexer.weights.get(('alt_331', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.PLUS() - elif choice == 1: - current += self.unlexer.DASH() - - choice = self.choice([0 if [3, 2, 2, 2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_334', i), 1) for i, w in enumerate([1, 1, 1, 1, 1])]) - self.unlexer.weights[('alt_334', choice)] = self.unlexer.weights.get(('alt_334', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.FLOATING_LITERAL() - elif choice == 1: - current += self.unlexer.HEXADECIMAL_LITERAL() - elif choice == 2: - current += self.unlexer.INTEGER_LITERAL() - elif choice == 3: - current += self.unlexer.INF() - elif choice == 4: - current += self.unlexer.NAN_SQL() - elif choice == 1: - current += self.unlexer.STRING_LITERAL() - elif choice == 2: - current += self.unlexer.NULL_SQL() - return current - literal.min_depth = 2 - - @depthcontrol - def keyword(self): - current = self.create_node(UnparserRule(name='keyword')) - choice = self.choice([0 if [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_340', i), 1) for i, w in enumerate([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]) - self.unlexer.weights[('alt_340', choice)] = self.unlexer.weights.get(('alt_340', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.ALIAS() - elif choice == 1: - current += self.unlexer.ALL() - elif choice == 2: - current += self.unlexer.AND() - elif choice == 3: - current += self.unlexer.ANTI() - elif choice == 4: - current += self.unlexer.ANY() - elif choice == 5: - current += self.unlexer.ARRAY() - elif choice == 6: - current += self.unlexer.AS() - elif choice == 7: - current += self.unlexer.ASCENDING() - elif choice == 8: - current += self.unlexer.ASOF() - elif choice == 9: - current += self.unlexer.BETWEEN() - elif choice == 10: - current += self.unlexer.BOTH() - elif choice == 11: - current += self.unlexer.BY() - elif choice == 12: - current += self.unlexer.CASE() - elif choice == 13: - current += self.unlexer.CAST() - elif choice == 14: - current += self.unlexer.CLUSTER() - elif choice == 15: - current += self.unlexer.COLLATE() - elif choice == 16: - current += self.unlexer.CREATE() - elif choice == 17: - current += self.unlexer.CROSS() - elif choice == 18: - current += self.unlexer.DAY() - elif choice == 19: - current += self.unlexer.DATABASE() - elif choice == 20: - current += self.unlexer.DEFAULT() - elif choice == 21: - current += self.unlexer.DELETE() - elif choice == 22: - current += self.unlexer.DESCENDING() - elif choice == 23: - current += self.unlexer.DISK() - elif choice == 24: - current += self.unlexer.DISTINCT() - elif choice == 25: - current += self.unlexer.DROP() - elif choice == 26: - current += self.unlexer.ELSE() - elif choice == 27: - current += self.unlexer.END() - elif choice == 28: - current += self.unlexer.ENGINE() - elif choice == 29: - current += self.unlexer.EXISTS() - elif choice == 30: - current += self.unlexer.EXTRACT() - elif choice == 31: - current += self.unlexer.FINAL() - elif choice == 32: - current += self.unlexer.FIRST() - elif choice == 33: - current += self.unlexer.FORMAT() - elif choice == 34: - current += self.unlexer.FROM() - elif choice == 35: - current += self.unlexer.FULL() - elif choice == 36: - current += self.unlexer.GLOBAL() - elif choice == 37: - current += self.unlexer.GROUP() - elif choice == 38: - current += self.unlexer.HAVING() - elif choice == 39: - current += self.unlexer.HOUR() - elif choice == 40: - current += self.unlexer.IF() - elif choice == 41: - current += self.unlexer.IN() - elif choice == 42: - current += self.unlexer.INNER() - elif choice == 43: - current += self.unlexer.INSERT() - elif choice == 44: - current += self.unlexer.INTERVAL() - elif choice == 45: - current += self.unlexer.INTO() - elif choice == 46: - current += self.unlexer.IS() - elif choice == 47: - current += self.unlexer.JOIN() - elif choice == 48: - current += self.unlexer.KEY() - elif choice == 49: - current += self.unlexer.LAST() - elif choice == 50: - current += self.unlexer.LEADING() - elif choice == 51: - current += self.unlexer.LEFT() - elif choice == 52: - current += self.unlexer.LIKE() - elif choice == 53: - current += self.unlexer.LIMIT() - elif choice == 54: - current += self.unlexer.LOCAL() - elif choice == 55: - current += self.unlexer.MATERIALIZED() - elif choice == 56: - current += self.unlexer.MINUTE() - elif choice == 57: - current += self.unlexer.MONTH() - elif choice == 58: - current += self.unlexer.NOT() - elif choice == 59: - current += self.unlexer.NULLS() - elif choice == 60: - current += self.unlexer.OFFSET() - elif choice == 61: - current += self.unlexer.ON() - elif choice == 62: - current += self.unlexer.OR() - elif choice == 63: - current += self.unlexer.ORDER() - elif choice == 64: - current += self.unlexer.OUTER() - elif choice == 65: - current += self.unlexer.OUTFILE() - elif choice == 66: - current += self.unlexer.PARTITION() - elif choice == 67: - current += self.unlexer.PREWHERE() - elif choice == 68: - current += self.unlexer.PRIMARY() - elif choice == 69: - current += self.unlexer.QUARTER() - elif choice == 70: - current += self.unlexer.RIGHT() - elif choice == 71: - current += self.unlexer.SAMPLE() - elif choice == 72: - current += self.unlexer.SECOND() - elif choice == 73: - current += self.unlexer.SEMI() - elif choice == 74: - current += self.unlexer.SET() - elif choice == 75: - current += self.unlexer.SETTINGS() - elif choice == 76: - current += self.unlexer.TABLE() - elif choice == 77: - current += self.unlexer.TEMPORARY() - elif choice == 78: - current += self.unlexer.THEN() - elif choice == 79: - current += self.unlexer.TOTALS() - elif choice == 80: - current += self.unlexer.TRAILING() - elif choice == 81: - current += self.unlexer.TRIM() - elif choice == 82: - current += self.unlexer.TO() - elif choice == 83: - current += self.unlexer.TTL() - elif choice == 84: - current += self.unlexer.UNION() - elif choice == 85: - current += self.unlexer.USING() - elif choice == 86: - current += self.unlexer.VALUES() - elif choice == 87: - current += self.unlexer.VOLUME() - elif choice == 88: - current += self.unlexer.WEEK() - elif choice == 89: - current += self.unlexer.WHEN() - elif choice == 90: - current += self.unlexer.WHERE() - elif choice == 91: - current += self.unlexer.WITH() - elif choice == 92: - current += self.unlexer.YEAR() - return current - keyword.min_depth = 2 - - @depthcontrol - def identifier(self): - current = self.create_node(UnparserRule(name='identifier')) - choice = self.choice([0 if [2, 3, 3][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_434', i), 1) for i, w in enumerate([1, 1, 1])]) - self.unlexer.weights[('alt_434', choice)] = self.unlexer.weights.get(('alt_434', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.IDENTIFIER() - elif choice == 1: - current += self.unlexer.INTERVAL_TYPE() - elif choice == 2: - current += self.keyword() - return current - identifier.min_depth = 2 - - @depthcontrol - def unaryOp(self): - current = self.create_node(UnparserRule(name='unaryOp')) - choice = self.choice([0 if [1, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_438', i), 1) for i, w in enumerate([1, 1])]) - self.unlexer.weights[('alt_438', choice)] = self.unlexer.weights.get(('alt_438', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.DASH() - elif choice == 1: - current += self.unlexer.NOT() - return current - unaryOp.min_depth = 1 - - @depthcontrol - def binaryOp(self): - current = self.create_node(UnparserRule(name='binaryOp')) - choice = self.choice([0 if [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2][i] > self.unlexer.max_depth else w * self.unlexer.weights.get(('alt_441', i), 1) for i, w in enumerate([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]) - self.unlexer.weights[('alt_441', choice)] = self.unlexer.weights.get(('alt_441', choice), 1) * self.unlexer.cooldown - if choice == 0: - current += self.unlexer.CONCAT() - elif choice == 1: - current += self.unlexer.ASTERISK() - elif choice == 2: - current += self.unlexer.SLASH() - elif choice == 3: - current += self.unlexer.PLUS() - elif choice == 4: - current += self.unlexer.DASH() - elif choice == 5: - current += self.unlexer.PERCENT() - elif choice == 6: - current += self.unlexer.EQ_DOUBLE() - elif choice == 7: - current += self.unlexer.EQ_SINGLE() - elif choice == 8: - current += self.unlexer.NOT_EQ() - elif choice == 9: - current += self.unlexer.LE() - elif choice == 10: - current += self.unlexer.GE() - elif choice == 11: - current += self.unlexer.LT() - elif choice == 12: - current += self.unlexer.GT() - elif choice == 13: - current += self.unlexer.AND() - elif choice == 14: - current += self.unlexer.OR() - elif choice == 15: - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.NOT() - - current += self.unlexer.LIKE() - elif choice == 16: - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.GLOBAL() - - if self.unlexer.max_depth >= 2: - for _ in self.zero_or_one(): - current += self.unlexer.NOT() - - current += self.unlexer.IN() - return current - binaryOp.min_depth = 1 - - @depthcontrol - def enumValue(self): - current = self.create_node(UnparserRule(name='enumValue')) - current += self.unlexer.STRING_LITERAL() - current += self.unlexer.EQ_SINGLE() - current += self.unlexer.INTEGER_LITERAL() - return current - enumValue.min_depth = 2 - - default_rule = queryList - diff --git a/utils/grammar-fuzzer/README.md b/utils/grammar-fuzzer/README.md deleted file mode 100644 index b3f233c8648..00000000000 --- a/utils/grammar-fuzzer/README.md +++ /dev/null @@ -1,41 +0,0 @@ -How to use Fuzzer -=== - -The fuzzer consists of auto-generated files: - - ClickHouseUnlexer.py - ClickHouseUnparser.py - -They are generated from grammar files (.g4) using Grammarinator: - - pip3 install grammarinator - grammarinator-process ClickHouseLexer.g4 ClickHouseParser.g4 -o fuzzer/ - -Then you can generate test input for ClickHouse client: - - cd fuzzer - grammarinator-generate \ - -r query_list \ # top-level rule - -o /tmp/sql_test_%d.sql \ # template for output test names - -n 10 \ # number of tests - -c 0.3 \ - -d 20 \ # depth of recursion - -p ClickHouseUnparser.py -l ClickHouseUnlexer.py \ # auto-generated unparser and unlexer - --test-transformers SpaceTransformer.single_line_whitespace \ # transform function to insert whitespace - -For more details see `grammarinator-generate --help`. As a test-transformer function also can be used `SpaceTransformer.multi_line_transformer` - both functions reside in `fuzzer/SpaceTransformer.py` file. - - -Parsing steps -=== - -1. Replace all operators with corresponding functions. -2. Replace all asterisks with columns - if it's inside function call, then expand it as multiple arguments. Warn about undeterministic invocations when functions have positional arguments. - -Old vs. new parser -=== - -- `a as b [c]` - accessing aliased array expression is not possible. -- `a as b . 1` - accessing aliased tuple expression is not possible. -- `between a is not null and b` - `between` operator should have lower priority than `is null`. -- `*.1` - accessing asterisk tuple expression is not possible. diff --git a/utils/grammar-fuzzer/SpaceTransformer.py b/utils/grammar-fuzzer/SpaceTransformer.py deleted file mode 100644 index ad96845c7e2..00000000000 --- a/utils/grammar-fuzzer/SpaceTransformer.py +++ /dev/null @@ -1,38 +0,0 @@ -# -*- coding: utf-8 -*- - -from grammarinator.runtime.tree import * - -from itertools import tee, islice, zip_longest -import random - - -def single_line_whitespace(node): - return _whitespace(node, ' \t') - - -def multi_line_whitespace(node): - return _whitespace(node, ' \t\r\n') - - -def _whitespace(node, symbols): - for child in node.children: - _whitespace(child, symbols) - - # helper function to look ahead one child - def with_next(iterable): - items, nexts = tee(iterable, 2) - nexts = islice(nexts, 1, None) - return zip_longest(items, nexts) - - if isinstance(node, UnparserRule): - new_children = [] - for child, next_child in with_next(node.children): - if (not next_child or - next_child and isinstance(next_child, UnlexerRule) and next_child.name == 'DOT' or - isinstance(child, UnlexerRule) and child.name == 'DOT'): - new_children.append(child) - else: - new_children.extend([child, UnlexerRule(src=random.choice(symbols))]) - node.children = new_children - - return node diff --git a/utils/grammar-fuzzer/__init__.py b/utils/grammar-fuzzer/__init__.py deleted file mode 100644 index 40a96afc6ff..00000000000 --- a/utils/grammar-fuzzer/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- diff --git a/utils/junit_to_html/junit-noframes.xsl b/utils/junit_to_html/junit-noframes.xsl deleted file mode 100644 index ae70e230ef6..00000000000 --- a/utils/junit_to_html/junit-noframes.xsl +++ /dev/null @@ -1,390 +0,0 @@ - - - - - - - - Test Results - - - - - - - - -
- - - - -
- - - - - - - - - - - - -

-
- - - - - - - - - -
-

- Back to top - - -

Summary

- - - - - - - - - - - - - - - - - Failure - Error - - - - - - - - -
TestsFailuresErrorsSuccess rateTime
- - - - - - - -
- - - - -
- Note: failures are anticipated and checked for with assertions while errors are unanticipated. -
-
- - - - -

Test Results

-
-
- - - Name - Tests - Errors - Failures - Time(s) - - - - - - Name - Tests - Errors - Failures - Time(s) - Time Stamp - Host - - - - - - Name - Status - Type - Time(s) - - - - - - - - - Failure - Error - - - - - - - - - - - - - - - - - - - - - Error - Failure - TableRowColor - - - - - - Failure - - - - Error - - - - Success - - - - - - - - - - - - -

- - - - - -
- - - -

- - - - - -
- - - - N/A - - - - - - -

- at line - - - , column - - -
-
-
- - - - - - - - - - 32 - - - - - - - - - - - - -
- - - -
- - -
- - - -
- - - -
-
- - - - - - - - -
diff --git a/utils/junit_to_html/junit_to_html b/utils/junit_to_html/junit_to_html deleted file mode 100755 index 132763c7d4c..00000000000 --- a/utils/junit_to_html/junit_to_html +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -import os -import lxml.etree as etree -import json -import argparse - -def export_testcases_json(report, path): - with open(os.path.join(path, "cases.jer"), "w") as testcases_file: - for testsuite in report.getroot(): - for testcase in testsuite: - row = {} - row["hostname"] = testsuite.get("hostname") - row["suite"] = testsuite.get("name") - row["suite_duration"] = testsuite.get("time") - row["timestamp"] = testsuite.get("timestamp") - row["testname"] = testcase.get("name") - row["classname"] = testcase.get("classname") - row["file"] = testcase.get("file") - row["line"] = testcase.get("line") - row["duration"] = testcase.get("time") - for el in testcase: - if el.tag == "system-err": - row["stderr"] = el.text - else: - row["stderr"] = "" - - if el.tag == "system-out": - row["stdout"] = el.text - else: - row["stdout"] = "" - - json.dump(row, testcases_file) - testcases_file.write("\n") - -def export_testsuites_json(report, path): - with open(os.path.join(path, "suites.jer"), "w") as testsuites_file: - for testsuite in report.getroot(): - row = {} - row["suite"] = testsuite.get("name") - row["errors"] = testsuite.get("errors") - row["failures"] = testsuite.get("failures") - row["hostname"] = testsuite.get("hostname") - row["skipped"] = testsuite.get("skipped") - row["duration"] = testsuite.get("time") - row["timestamp"] = testsuite.get("timestamp") - json.dump(row, testsuites_file) - testsuites_file.write("\n") - - -def _convert_junit_to_html(junit_path, result_path, export_cases, export_suites): - with open(os.path.join(os.path.dirname(__file__), "junit-noframes.xsl")) as xslt_file: - junit_to_html_xslt = etree.parse(xslt_file) - if not os.path.exists(result_path): - os.makedirs(result_path) - - with open(junit_path) as junit_file: - junit_xml = etree.parse(junit_file) - - if export_suites: - export_testsuites_json(junit_xml, result_path) - if export_cases: - export_testcases_json(junit_xml, result_path) - transform = etree.XSLT(junit_to_html_xslt) - html = etree.tostring(transform(junit_xml), encoding="utf-8") - - with open(os.path.join(result_path, "result.html"), "w") as html_file: - html_file.write(html) - -if __name__ == "__main__": - - parser = argparse.ArgumentParser(description='Convert JUnit XML.') - parser.add_argument('junit', help='path to junit.xml report') - parser.add_argument('result_dir', nargs='?', help='directory for result files. Default to junit.xml directory') - parser.add_argument('--export-cases', help='Export JSONEachRow result for testcases to upload in CI', action='store_true') - parser.add_argument('--export-suites', help='Export JSONEachRow result for testsuites to upload in CI', action='store_true') - - args = parser.parse_args() - - junit_path = args.junit - if args.result_dir: - result_path = args.result_dir - else: - result_path = os.path.dirname(junit_path) - print("junit_path: {}, result_path: {}, export cases:{}, export suites: {}".format(junit_path, result_path, args.export_cases, args.export_suites)) - _convert_junit_to_html(junit_path, result_path, args.export_cases, args.export_suites) diff --git a/utils/link-validate/link-validate.sh b/utils/link-validate/link-validate.sh deleted file mode 100755 index 2d8d57b95fc..00000000000 --- a/utils/link-validate/link-validate.sh +++ /dev/null @@ -1,42 +0,0 @@ -#/bin/sh -# -# This script is used to validate the shared libraries -# -# Authors: FoundationDB team, https://github.com/apple/foundationdb/blame/master/build/link-validate.sh -# License: Apache License 2.0 - -verlte() { - [ "$1" = "`echo -e "$1\n$2" | sort -V | head -n1`" ] -} - -ALLOWED_SHARED_LIBS=("libdl.so.2" "libpthread.so.0" "librt.so.1" "libm.so.6" "libc.so.6" "ld-linux-x86-64.so.2") - -if [ "$#" -lt 1 ]; then - echo "USAGE: link-validate.sh BINNAME GLIBC_VERSION" - exit 1 -fi - -# Step 1: glibc version - -for i in $(objdump -T "$1" | awk '{print $5}' | grep GLIBC | sed 's/ *$//g' | sed 's/GLIBC_//' | sort | uniq); do - if ! verlte "$i" "${2:-2.10}"; then - echo "Dependency on newer libc detected: $i" - exit 1 - fi -done - -# Step 2: Other dynamic dependencies - -for j in $(objdump -p "$1" | grep NEEDED | awk '{print $2}'); do - PRESENT=0 - for k in ${ALLOWED_SHARED_LIBS[@]}; do - if [[ "$k" == "$j" ]]; then - PRESENT=1 - break - fi - done - if ! [[ $PRESENT == 1 ]]; then - echo "Unexpected shared object dependency detected: $j" - exit 1 - fi -done diff --git a/utils/tests-visualizer/index.html b/utils/tests-visualizer/index.html new file mode 100644 index 00000000000..a15b09ea58e --- /dev/null +++ b/utils/tests-visualizer/index.html @@ -0,0 +1,129 @@ + + + + + +

Loading (10 seconds, 20 MB)...

+ + + + + diff --git a/utils/upload_test_results/README.md b/utils/upload_test_results/README.md deleted file mode 100644 index e6b361081a2..00000000000 --- a/utils/upload_test_results/README.md +++ /dev/null @@ -1,34 +0,0 @@ -## Tool to upload results to CI ClickHouse - -Currently allows to upload results from `junit_to_html` tool to ClickHouse CI - -``` -usage: upload_test_results [-h] --sha SHA --pr PR --file FILE --type - {suites,cases} [--user USER] --password PASSWORD - [--ca-cert CA_CERT] [--host HOST] [--db DB] - -Upload test result to CI ClickHouse. - -optional arguments: - -h, --help show this help message and exit - --sha SHA sha of current commit - --pr PR pr of current commit. 0 for master - --file FILE file to upload - --type {suites,cases} - Export type - --user USER user name - --password PASSWORD password - --ca-cert CA_CERT CA certificate path - --host HOST CI ClickHouse host - --db DB CI ClickHouse database name -``` - -$ ./upload_test_results --sha "cf7eaee3301d4634acdacbfa308ddbe0cc6a061d" --pr "0" --file xyz/cases.jer --type cases --password $PASSWD - -CI checks has single commit sha and pr identifier. -While uploading your local results for testing purposes try to use correct sha and pr. - -CA Certificate for ClickHouse CI can be obtained from Yandex.Cloud where CI database is hosted -``` bash -wget "https://storage.yandexcloud.net/cloud-certs/CA.pem" -O YandexInternalRootCA.crt -``` \ No newline at end of file diff --git a/utils/upload_test_results/upload_test_results b/utils/upload_test_results/upload_test_results deleted file mode 100755 index 5916d0d85e8..00000000000 --- a/utils/upload_test_results/upload_test_results +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env python3 -import requests -import argparse - -# CREATE TABLE test_suites -# ( -# sha String, -# pr UInt16, -# suite String, -# errors UInt16, -# failures UInt16, -# hostname String, -# skipped UInt16, -# duration Double, -# timestamp DateTime -# ) ENGINE = MergeTree ORDER BY tuple(timestamp, suite); - -QUERY_SUITES="INSERT INTO test_suites "\ - "SELECT '{sha}' AS sha, "\ - "{pr} AS pr, "\ - "suite, "\ - "errors, "\ - "failures, "\ - "hostname, "\ - "skipped, "\ - "duration, "\ - "timestamp "\ - "FROM input('"\ - "suite String, "\ - "errors UInt16, "\ - "failures UInt16, "\ - "hostname String, "\ - "skipped UInt16, "\ - "duration Double, "\ - "timestamp DateTime"\ - "') FORMAT JSONEachRow" - -# CREATE TABLE test_cases -# ( -# sha String, -# pr UInt16, -# hostname String, -# suite String, -# timestamp DateTime, -# testname String, -# classname String, -# file String, -# line UInt16, -# duration Double, -# suite_duration Double, -# stderr String, -# stdout String -# ) ENGINE = MergeTree ORDER BY tuple(timestamp, testname); - -QUERY_CASES="INSERT INTO test_cases "\ - "SELECT '{sha}' AS sha, "\ - "{pr} AS pr, "\ - "hostname, "\ - "suite, "\ - "timestamp, "\ - "testname, "\ - "classname, "\ - "file, "\ - "line, "\ - "duration, "\ - "suite_duration, "\ - "stderr,"\ - "stdout "\ - "FROM input('"\ - "hostname String, "\ - "suite String, "\ - "timestamp DateTime, "\ - "testname String, "\ - "classname String, "\ - "file String, "\ - "line UInt16, "\ - "duration Double, "\ - "suite_duration Double, "\ - "stderr String, "\ - "stdout String"\ - "') FORMAT JSONEachRow" - - -def upload_request(sha, pr, file, q_type, user, password, ca_cert, host, db): - with open(file) as upload_f: - query = QUERY_SUITES if q_type=="suites" else QUERY_CASES - query = query.format(sha=sha, pr=pr) - url = 'https://{host}:8443/?database={db}&query={query}&date_time_input_format=best_effort'.format( - host=host, - db=db, - query=query - ) - data=upload_f - auth = { - 'X-ClickHouse-User': user, - 'X-ClickHouse-Key': password, - } - - print(query); - - res = requests.post( - url, - data=data, - headers=auth, - verify=ca_cert) - res.raise_for_status() - return res.text - -if __name__ == "__main__": - - parser = argparse.ArgumentParser(description='Upload test result to CI ClickHouse.') - parser.add_argument('--sha', help='sha of current commit', type=str, required=True) - parser.add_argument('--pr', help='pr of current commit. 0 for master', type=int, required=True) - parser.add_argument('--file', help='file to upload', required=True) - parser.add_argument('--type', help='Export type', choices=['suites', 'cases'] , required=True) - parser.add_argument('--user', help='user name', type=str, default="clickhouse-ci") - parser.add_argument('--password', help='password', type=str, required=True) - parser.add_argument('--ca-cert', help='CA certificate path', type=str, default="/usr/local/share/ca-certificates/YandexInternalRootCA.crt") - parser.add_argument('--host', help='CI ClickHouse host', type=str, default="c1a-ity5agjmuhyu6nu9.mdb.yandexcloud.net") - parser.add_argument('--db', help='CI ClickHouse database name', type=str, default="clickhouse-ci") - - args = parser.parse_args() - - print((upload_request(args.sha, args.pr, args.file, args.type, args.user, args.password, args.ca_cert, args.host, args.db))) - - - diff --git a/website/benchmark/hardware/index.html b/website/benchmark/hardware/index.html index 260a928184d..c6b1e2be275 100644 --- a/website/benchmark/hardware/index.html +++ b/website/benchmark/hardware/index.html @@ -82,6 +82,9 @@ Comparison of EBS and EFS is from Ramazan Polat.
Results for Hetzner and Scaleway are from Anthony Najjar Simon (Panelbear).
Results for GCP are from Vy Nguyen Tan.
Results for ThinkPad P15 are from Mikhail Shiryaev.
+Results for RockPi4 are from Kirill Zholnay.
+Results for Xeon 6266C are from David in Shanghai.
+Results for SSDNodes and Cavium are from Lorenzo QXIP.

diff --git a/website/benchmark/hardware/results/cavium_4core.json b/website/benchmark/hardware/results/cavium_4core.json new file mode 100644 index 00000000000..a7cb96b2cd3 --- /dev/null +++ b/website/benchmark/hardware/results/cavium_4core.json @@ -0,0 +1,54 @@ +[ + { + "system": "Cavium ARM64 CPU (4 Core, 1.5 GHz, NVMe SSD)", + "system_full": "Cavium ARM64 CPU (4 Corem 1.5 GHz, NVMe SSD), 16 GiB", + "time": "2021-12-27 00:00:00", + "kind": "server", + "result": + [ +[0.004, 0.004, 0.004], +[0.196, 0.178, 0.180], +[0.495, 0.437, 0.426], +[0.715, 0.499, 0.499], +[0.992, 0.798, 0.795], +[3.958, 3.750, 3.751], +[0.288, 0.274, 0.273], +[0.236, 0.231, 0.239], +[3.129, 2.936, 2.918], +[4.221, 3.924, 3.934], +[2.395, 2.285, 2.226], +[2.832, 2.703, 2.644], +[6.510, 6.301, 6.262], +[7.933, 7.669, 7.704], +[7.397, 7.122, 7.146], +[4.692, 4.537, 4.540], +[15.194, 14.835, 15.051], +[10.446, 10.036, 10.072], +[26.472, 25.655, 25.809], +[0.879, 0.669, 0.694], +[14.614, 13.755, 13.726], +[16.876, 15.675, 15.703], +[34.715, 33.204, 33.250], +[18.850, 15.387, 15.332], +[4.455, 4.025, 4.016], +[3.667, 3.415, 3.457], +[4.507, 4.057, 4.049], +[14.344, 13.394, 13.390], +[17.519, 17.052, 17.067], +[8.606, 8.611, 8.545], +[6.936, 6.491, 6.496], +[10.020, 9.260, 9.233], +[39.793, 39.631, 39.553], +[30.310, 29.604, 29.572], +[30.485, 29.557, 29.649], +[8.539, 8.337, 8.342], +[0.931, 0.912, 0.912], +[0.523, 0.516, 0.507], +[0.460, 0.448, 0.450], +[1.880, 1.817, 1.884], +[0.141, 0.119, 0.117], +[0.116, 0.095, 0.092], +[0.021, 0.017, 0.014] + ] + } +] diff --git a/website/benchmark/hardware/results/rock_pi.json b/website/benchmark/hardware/results/rock_pi.json new file mode 100644 index 00000000000..210dc213a49 --- /dev/null +++ b/website/benchmark/hardware/results/rock_pi.json @@ -0,0 +1,54 @@ +[ + { + "system": "Rock Pi 4, 4GiB, NVMe", + "system_full": "Rock Pi 4, 4GiB C, NVMe", + "time": "2021-12-23 00:00:00", + "kind": "desktop", + "result": + [ +[0.007, 0.014, 0.005], +[0.229, 0.132, 0.215], +[0.489, 0.351, 0.306], +[0.879, 0.774, 0.768], +[1.034, 0.966, 0.879], +[2.491, 2.249, 2.493], +[0.379, 0.212, 0.213], +[0.227, 0.140, 0.152], +[3.944, 3.823, 3.805], +[5.272, 4.985, 5.069], +[2.356, 2.193, 2.254], +[2.819, 2.595, 2.568], +[9.124, 8.306, 8.529], +[11.857, 11.412, 11.290], +[9.796, 9.477, 9.610], +[8.846, 8.867, 8.909], +[null, null, null], +[null, null, null], +[null, null, null], +[1.293, 0.887, 0.980], +[15.018, 14.928, 14.748], +[19.179, 17.889, 18.021], +[45.524, 46.927, 46.909], +[23.904, 23.197, 23.511], +[5.264, 4.891, 4.936], +[4.211, 3.940, 4.047], +[5.113, 4.615, 4.783], +[17.910, 16.800, 16.410], +[23.537, 22.249, 22.172], +[16.549, 16.388, 16.337], +[9.562, 9.006, 9.260], +[17.097, 17.676, 17.585], +[null, null, null], +[null, null, null], +[null, null, null], +[null, null, null], +[1.668, 1.469, 1.342], +[0.463, 0.442, 0.353], +[0.486, 0.410, 0.346], +[2.190, 2.014, 1.878], +[0.263, 0.097, 0.201], +[0.173, 0.082, 0.139], +[0.188, 0.024, 0.016] + ] + } +] diff --git a/website/benchmark/hardware/results/ssdnodes.json b/website/benchmark/hardware/results/ssdnodes.json new file mode 100644 index 00000000000..623f4b49687 --- /dev/null +++ b/website/benchmark/hardware/results/ssdnodes.json @@ -0,0 +1,54 @@ +[ + { + "system": "SSDNodes G6", + "system_full": "G6 Performance+ 48GB RAM, 720GB NVMe, 12x Intel Silver vCPU, KVM", + "time": "2021-12-27 00:00:00", + "kind": "cloud", + "result": + [ +[0.002, 0.002, 0.002], +[0.021, 0.017, 0.017], +[0.053, 0.034, 0.039], +[0.090, 0.053, 0.047], +[0.146, 0.123, 0.117], +[0.358, 0.325, 0.323], +[0.025, 0.020, 0.021], +[0.042, 0.015, 0.014], +[0.566, 0.511, 0.524], +[0.704, 0.626, 0.591], +[0.229, 0.174, 0.194], +[0.255, 0.210, 0.206], +[0.849, 0.725, 0.701], +[0.984, 0.907, 0.948], +[0.952, 0.886, 0.899], +[0.772, 0.741, 0.738], +[2.945, 2.667, 2.703], +[1.645, 1.646, 1.576], +[5.342, 5.042, 5.306], +[0.088, 0.052, 0.051], +[1.176, 0.825, 0.839], +[1.261, 1.001, 0.933], +[2.977, 2.190, 2.193], +[1.872, 0.991, 0.956], +[0.368, 0.264, 0.275], +[0.300, 0.247, 0.241], +[0.329, 0.272, 0.277], +[1.124, 0.870, 0.824], +[1.545, 1.270, 1.281], +[1.478, 1.399, 1.463], +[0.809, 0.696, 0.677], +[1.095, 0.875, 0.832], +[5.164, 4.841, 4.613], +[3.859, 3.435, 3.396], +[4.054, 3.479, 3.496], +[1.325, 1.274, 1.294], +[0.261, 0.248, 0.266], +[0.102, 0.096, 0.104], +[0.102, 0.090, 0.094], +[0.600, 0.550, 0.566], +[0.041, 0.031, 0.028], +[0.029, 0.021, 0.025], +[0.007, 0.006, 0.005] + ] + } +] diff --git a/website/benchmark/hardware/results/xeon_gold_6266.json b/website/benchmark/hardware/results/xeon_gold_6266.json new file mode 100644 index 00000000000..0e68466a633 --- /dev/null +++ b/website/benchmark/hardware/results/xeon_gold_6266.json @@ -0,0 +1,56 @@ +[ + { + "system": "Huawei Cloud c6.xlarge.4, 4vCPUs, 16 GiB", + "system_full": "Huawei Cloud c6.xlarge.4, Xeon Gold 6266C, 3GHz, 4vCPU, 16GiB RAM, vda1 40GB", + "cpu_vendor": "Intel", + "cpu_model": "Xeon Gold 6266C", + "time": "2021-12-23 00:00:00", + "kind": "cloud", + "result": + [ +[0.001, 0.001, 0.001], +[0.034, 0.023, 0.023], +[0.168, 0.105, 0.104], +[0.745, 0.162, 0.160], +[1.512, 0.328, 0.327], +[2.408, 1.162, 1.155], +[0.069, 0.052, 0.051], +[0.074, 0.027, 0.026], +[2.314, 1.833, 1.796], +[2.749, 2.014, 2.011], +[1.424, 0.618, 0.579], +[1.494, 0.681, 0.677], +[3.208, 2.457, 2.529], +[5.071, 3.329, 3.411], +[3.968, 3.289, 3.330], +[3.142, 2.925, 2.827], +[9.473, 9.034, 8.850], +[6.768, 6.256, 6.115], +[18.388, 17.790, 17.892], +[1.105, 0.195, 0.194], +[20.310, 3.459, 3.416], +[22.772, 3.811, 3.773], +[42.554, 8.738, 8.640], +[30.747, 4.013, 3.967], +[4.707, 0.973, 0.965], +[2.003, 0.845, 0.839], +[4.978, 0.991, 0.974], +[19.726, 3.293, 3.264], +[17.151, 5.171, 5.134], +[3.620, 3.600, 3.600], +[4.693, 2.172, 2.115], +[10.842, 2.686, 2.750], +[17.857, 17.086, 16.907], +[22.926, 13.070, 12.808], +[22.803, 12.727, 12.867], +[4.189, 3.888, 3.893], +[0.227, 0.176, 0.177], +[0.085, 0.068, 0.067], +[0.101, 0.064, 0.067], +[0.493, 0.438, 0.399], +[0.042, 0.022, 0.021], +[0.029, 0.017, 0.015], +[0.007, 0.005, 0.003] + ] + } +] diff --git a/website/blog/en/2021/clickhouse-v21.12-released.md b/website/blog/en/2021/clickhouse-v21.12-released.md index 01450280fa0..d1e6cddbc35 100644 --- a/website/blog/en/2021/clickhouse-v21.12-released.md +++ b/website/blog/en/2021/clickhouse-v21.12-released.md @@ -41,9 +41,9 @@ ClickHouse Keeper development started in Sep 2020, more than a year ago. It was **How does this help you?** -ClickHouse Keeper is a drop-in replacement for ZooKeeper. It implements ZooKeeper wire protocol and data model, but does it better. +ClickHouse Keeper is a drop-in replacement for ZooKeeper. It implements the ZooKeeper wire protocol and data model, but does it better. -In contrast to ZooKeeper, there are no issues with zxid overflow or packet sizes. It has better memory usage and it does not require JVM tuning (because it does not use JVM). Logs and snapshots are compressed (about 10x typical) and checksummed. It can run as a separate process or directly inside clickhouse-server. You can use it with ClickHouse or with your Kafkas and Hadoops as well. +In contrast to ZooKeeper, there are no issues with zxid overflow or packet sizes. It has better memory usage and it does not require JVM tuning (because it does not use the JVM). Logs and snapshots are compressed (by about 10x typically) and checksummed. It can run as a separate process or directly inside clickhouse-server. You can use it with ClickHouse or with your Kafkas and Hadoops as well. [More info](http://presentations.clickhouse.tech/meetup54/keeper.pdf). @@ -54,11 +54,11 @@ When using the table engines `File`, `URL`, and `HDFS` ClickHouse now supports p Similarly, when exporting data from ClickHouse using the `file`, `url`, and `hdfs` table functions you can now specify that the data is to be partitioned into multiple files using a `PARTITION BY` clause. For example, `INSERT INTO TABLE FUNCTION file('path/hits_{_partition_id}', 'TSV', 'columns...') PARTITION BY toYYYYMM(EventDate) VALUES ...` will create as many files as there are unique months in the dataset. -The `s3` table function has supported partitioned writes since ClickHouse 21.10. +The `s3` table function has already supported partitioned writes since ClickHouse 21.10. **How does this help you?** -If data is split into multiple files, then `SELECT` query will be automatically parallelized. Example: +If data is split into multiple files, `SELECT` queries will be automatically parallelized. For example: ``` SELECT user_id, count() FROM s3( @@ -68,7 +68,7 @@ SELECT user_id, count() FROM s3( 'user_id UInt64, ...') ``` -You can even parallelize data processing across distributed compute cluster if you use `s3Cluster` table function: +You can even parallelize data processing across a distributed compute cluster if you use the `s3Cluster` table function: ``` SELECT user_id, count() FROM s3Cluster( @@ -79,7 +79,7 @@ SELECT user_id, count() FROM s3Cluster( 'user_id UInt64, ...') ``` -It can also be used for integrations with external data processing tools that consumes data from `s3`. +It can also be used for integration with external data processing tools that consume data from `s3`. ## FROM INFILE in clickhouse-client now supports glob patterns and parallel reading @@ -91,18 +91,18 @@ INSERT INTO my_table FROM INFILE '*.csv.gz' FORMAT CSV ``` Glob patterns support `*`, `?` and `{n..m}` with `{1..10}` or (aligned) `{01..10}` forms. -This query will be automatically parallelized, it will also automatically detect compression format from file extension and decompress transparently. +This query will be automatically parallelized and it will also automatically detect the compression format from the file extension and decompress transparently. This improvement is done by **Arthur Filatenkov**. **How does this help you?** -Now you don't have to recall how to write parallel for loop in your command line shell. clickhouse-client will do everything for you, it works intuitively and fast. +Now you don't have to recall how to write a parallel for loop in your command line shell. clickhouse-client will do everything for you, it works intuitively and fast. ## Support for INTERVAL operator inside WITH FILL modifier for ORDER BY clause -What's the... WITH FILL modifier in ORDER BY clause? Just look at the example. +What's the... `WITH FILL` modifier in the `ORDER BY` clause? Take a look at the example: ``` :) SELECT EventDate, count() FROM test.hits WHERE CounterID = 2841673 GROUP BY EventDate ORDER BY EventDate @@ -115,10 +115,10 @@ What's the... WITH FILL modifier in ORDER BY clause? Just look at the example. └────────────┴─────────┘ ``` -We have the report with Mar 17th, 19th, 21th, 22th. But Mar 18th and 20th are missing, because there is no data for these dates. +We have the report with Mar 17th, 19th, 21st, and 22nd. But Mar 18th and 20th are missing, because there is no data for these dates. And this is how it works in all SQL databases. -But ClickHouse also has quite unique and neat `WITH FILL` modifier for `ORDER BY clause`. +But ClickHouse also has a quite unique and neat `WITH FILL` modifier for the `ORDER BY` clause. You just write: ``` @@ -140,12 +140,12 @@ And missing data is automatically filled. You can also add `FROM` and `TO`: ``` -ORDER BY date WITH FILL FROM '2014-03-01'::Date TO '2014-03-31'::Date STEP 1; +ORDER BY EventDate WITH FILL FROM '2014-03-01'::Date TO '2014-03-31'::Date STEP 1; ``` And it will automatically fill missing rows in the report. -The STEP can be arbitrary number. But what to do if you want fill missing dates for report by months? You cannot just write STEP 30 or STEP 31 because months contain different number of days... +The `STEP` can be an arbitrary number. But what can you do if you want to fill missing dates for a report by months? You cannot just write `STEP 30` or `STEP 31` because different months contain different number of days... Since ClickHouse version 21.12 you can do it like this: @@ -155,16 +155,16 @@ ORDER BY EventDate WITH FILL STEP INTERVAL 1 MONTH `INTERVAL` is a standard SQL operator, you can use SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER and YEAR. -This is implemented by **Anton Popov** who is the author of "WITH FILL" feature. +This is implemented by **Anton Popov** who is the author of the "WITH FILL" feature. **How does this help you?** -It allows to avoid postprocessing step for your reports. +It allows you to avoid a postprocessing step for your reports. ## Add Support For "Identifier" Table and Database Query Parameters -ClickHouse has support for parameterized queries. +ClickHouse has support for parameterized queries. For example: ``` SELECT uniq(user_id) FROM table WHERE website = {name:String} @@ -178,7 +178,7 @@ curl https://clickhouse-server:8443/?param_name=upyachka -d 'SELECT uniq(user_id You can even create customized API handlers for clickhouse-server based on prepared queries. -Since version 21.12 we introduce support for using parameters for tables and databases in your queries. This is implemented with `Identifier` table parameter: +In version 21.12 we introduce support for using parameters for tables and databases in your queries. This is implemented with the `Identifier` table parameter: ``` SELECT uniq(user_id) FROM {tbl:Identifier} @@ -188,7 +188,7 @@ Identifier parameters also work for CREATE, DROP and all DDL queries. This is im **How does this help you?** -Let ClickHouse do the heavy-lifting and keep your scripts safe and secure. +Let ClickHouse do the heavy lifting and keep your scripts safe and secure. ## Bool Data Type @@ -220,7 +220,7 @@ CREATE TABLE ) ... ``` -Constraints are checked on INSERT. In this example we validate the URL and check that Domain column actually contains the domain of URL. +Constraints are checked on `INSERT`. In this example we validate the URL and check that the `Domain` column actually contains the domain of the URL. Since version 21.12 constraints can also automatically optimize your queries! For example, if you write: @@ -234,19 +234,19 @@ The query can be automatically rewritten to: SELECT count() FROM hits WHERE Domain = 'ghe.clickhouse.tech' ``` -because `Domain` column is smaller, more compressable, will be faster to read and it does not require calculation of the domain from URL. -The only thing you need is to enable the `optimize_using_constraints` and `optimize_substitute_columns` settings. +Because the `Domain` column is smaller and more compressable it will be faster to read and does not require calculation of the domain from the URL. +The only thing you need to do is to enable the `optimize_using_constraints` and `optimize_substitute_columns` settings. -As a bonus, new type of constraints is introduced: `ASSUME`. +As a bonus, we introduced a new type of constraint: `ASSUME`. ``` CONSTRAINT my_constraint ASSUME Domain = domainWithoutWWW(URL) ``` -This type of constraint will not check anything on INSERT, but still use the assumption to optimize the queries. +This type of constraint will not check anything on `INSERT` but still use the assumption to optimize the queries. It can also do logical inference, simplify the conditions and remove the conditions that are proved to be satisfied by constraints. -It is controlled by `convert_query_to_cnf` setting. You can also enable `optimize_append_index` setting. With this setting ClickHouse will derive more consitions on the table primary key. +It is controlled by the `convert_query_to_cnf` setting. You can also enable `optimize_append_index`. With this setting ClickHouse will derive more conditions on the table primary key. The idea is so powerful that we cannot resist adding one more feature: *indices for hypothesis*. @@ -267,11 +267,11 @@ Rather than tell all your users to change their queries you can use a table cons ## Read Large Remote Files In Chunks -ClickHouse combines fast query engine and efficient data storage. It also allows to integrate external data sources for data import and export or even to process external datasets on the fly without the need for data import or preprocessing. +ClickHouse combines a fast query engine and efficient data storage. It also allows to integrate external data sources for data import and export or even to process external datasets on the fly without the need for data import or preprocessing. When reading large files in `Parquet`, `ORC`, and `Arrow` format using the `s3`, `url`, and `hdfs` table functions, ClickHouse will now automatically choose whether to read the entire file at once or read parts of it incrementally. This is now enabled by default and the setting `remote_read_min_bytes_for_seek` controls when to switch from reading it all to reading in chunks. The default is 1MiB. -`Parquet`, `ORC`, and `Arrow` are column-oriented formats (quite similar to ClickHouse Native format) and now we can read only requested columns even if they are being read from remote HTTP server with the `url` table function (range requests will be performed to skip unneeded data). +`Parquet`, `ORC`, and `Arrow` are column-oriented formats (quite similar to the ClickHouse Native format) and now we can read only requested columns even if they are being read from a remote HTTP server with the `url` table function (range requests will be performed to skip unneeded data). This feature is implemented by **Kseniia Sumarokova**. @@ -282,4 +282,4 @@ In previous versions, when reading files in Arrow-based formats from remote loca ## ... And Many More -Read the [full changelog](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) for 21.12 "Christmas" release for the full list of the gifts from [ClickHouse Team](https://clickhouse.com/careers/). +Read the [full changelog](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) for the 21.12 "Christmas" release for the full list of gifts from the [ClickHouse Team](https://clickhouse.com/company/). diff --git a/website/blog/en/2021/tests-visualization.md b/website/blog/en/2021/tests-visualization.md new file mode 100644 index 00000000000..259cb4d8e34 --- /dev/null +++ b/website/blog/en/2021/tests-visualization.md @@ -0,0 +1,45 @@ +--- +title: 'Decorating a Christmas Tree With the Help Of Flaky Tests' +image: 'https://blog-images.clickhouse.com/en/2021/tests-visualization/tests.png' +date: '2021-12-27' +author: '[Alexey Milovidov](https://github.com/alexey-milovidov)' +tags: ['tests', 'ci', 'flaky', 'christmas', 'visualization'] +--- + +Test suites and testing infrastructure are one of the main assets of ClickHouse. We have tons of functional, integration, unit, performance, stress and fuzz tests. Tests are run on a per commit basis and results are publicly available. + +We also save the results of all test runs into the database in ClickHouse. We started collecting results in June 2020, and we have 1 777 608 240 records so far. Now we run around 5 to 9 million tests every day. + +Tests are good (in general). A good test suite allows for fast development iterations, stable releases, and to accept more contributions from the community. We love tests. If there's something strange in ClickHouse, what are we gonna do? Write more tests. + +Some tests can be flaky. The reasons for flakiness are uncountable - most of them are simple timing issues in the test script itself, but sometimes if a test has failed one of a thousand times it can uncover subtle logic errors in code. + +The problem is how to deal with flaky tests. Some people suggest automatically muting the "annoying" flaky tests. Or adding automatic retries in case of failure. We believe that this is all wrong. Instead of trying to ignore flaky tests, we do the opposite: we put maximum effort into making the tests even more flaky! + +Our recipes for flaky tests: +— never mute or restart them; if the test failed once, always look and investigate the cause; +— randomize the environment for every test run so the test will have more possible reasons to fail; +— if new tests are added, run them 100 times and if at least one fails, do not merge the pull request; +— if new tests are added, use them as a corpus for fuzzing - it will uncover corner cases even if author did not write tests for them; +— [randomize thread scheduling](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ThreadFuzzer.h) and add random sleeps and switching between CPU cores at random places and before and after mutex locks/unlocks; +— run everything in parallel on slow machines; + +Key point: to prevent flaky tests, we make our tests as flaky as possible. + +## Nice Way To Visualize Flaky Tests + +There is a test suite named "[functional stateless tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/0_stateless)" that has 3772 tests. For every day since 2020-06-13 (561 days) and every test (3772 tests), I drew a picture of size 561x3772 where a pixel is green if all test runs finished successfully in the master branch during this day (for all commits and all combinations: release, debug+assertions, ASan, MSan, TSan, UBSan), and a pixel is red if at least one run failed. The pixel will be transparent if the test did not exist that day. + +This visualization is a toy that I've made for fun: + +![Visualization](https://blog-images.clickhouse.com/en/2021/tests-visualization/tree_half.png) + +It looks like a Christmas Tree (you need a bit of imagination). If you have a different kind of imagination, you can see it as a green field with flowers. + +The time is from left to right. The tests are numbered with non-unique numbers (new tests usually get larger numbers), and these numbers are on the vertical axis (newer tests on top). + +If you see red dots in a horizontal line - it is a flaky test. If you see red dots in a vertical line - it means that one day we accidentally broke the master branch. If you see black horizontal lines or cuts in the tree - it means that the tests were added with some old numbers, most likely because some long living feature branch was merged. If you see black vertical lines - it means that some days tests were not run. + +The velocity of adding new tests is represented by how tall and narrow the Christmas tree is. When we add a large number of tests, the tree grows with almost vertical slope. + +The image is prepared by [HTML page](https://github.com/ClickHouse/ClickHouse/pull/33185) with some JavaScript that is querying a ClickHouse database directly and writing to a canvas. It took around ten seconds to build this picture. I also prepared an [interactive version](https://blog-images.clickhouse.com/en/2021/tests-visualization/demo.html) with already-saved data where you can play and find your favorite tests. diff --git a/website/css/main.css b/website/css/main.css index 0e36340c25c..56230529a11 100644 --- a/website/css/main.css +++ b/website/css/main.css @@ -1 +1 @@ -@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:grid;-moz-column-gap:24px;column-gap:24px;row-gap:16px;grid-auto-flow:column;justify-content:center}@media screen and (max-width:767.98px){.btns{grid-auto-flow:row}}.btns.btns-lg{-moz-column-gap:40px;column-gap:40px}.btns.is-2{grid-template-columns:1fr 1fr}@media screen and (max-width:767.98px){.btns.is-2{grid-template-columns:1fr}}.btns.is-3{grid-template-columns:1fr 1fr 1fr}@media screen and (max-width:767.98px){.btns.is-3{grid-template-columns:1fr}}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(50%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(70%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3{left:-60%;position:relative;transition:left .4s;transition-delay:.6s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-3{flex:0 0 250px;max-width:250px;width:250px}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{left:0;transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s;transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{opacity:1;transform:none;transition-delay:.2s}}.footer-copy{white-space:nowrap}form .form-group{position:relative}form .form-group.is-select:before{border-left:6px solid transparent;border-right:6px solid transparent;border-top:8px solid #6c757d;content:"";display:block;position:absolute;right:33px;top:calc(50% - 4px);z-index:10}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none;padding-right:24px;white-space:pre-wrap}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.logo{display:block;height:36px;max-width:220px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;width:100%}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}.page,.photo-frame{overflow:hidden;width:100%}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;position:relative}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.severity-table th{background:#f1f6f9;font-size:.875rem;padding:8px 16px}.severity-table td{border-top:1px solid #d6dbdf;padding:16px}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.overflow-auto{overflow:auto}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file +@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:grid;-moz-column-gap:24px;column-gap:24px;row-gap:16px;grid-auto-flow:column;justify-content:center}@media screen and (max-width:767.98px){.btns{grid-auto-flow:row}}.btns.btns-lg{-moz-column-gap:40px;column-gap:40px}.btns.is-2{grid-template-columns:1fr 1fr}@media screen and (max-width:767.98px){.btns.is-2{grid-template-columns:1fr}}.btns.is-3{grid-template-columns:1fr 1fr 1fr}@media screen and (max-width:767.98px){.btns.is-3{grid-template-columns:1fr}}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(50%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(70%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3{left:-60%;position:relative;transition:left .4s;transition-delay:.6s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-3{flex:0 0 250px;max-width:250px;width:250px}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{left:0;transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s;transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{opacity:1;transform:none;transition-delay:.2s}}.footer-copy{white-space:nowrap}form .form-group{position:relative}form .form-group.is-select:before{border-left:6px solid transparent;border-right:6px solid transparent;border-top:8px solid #6c757d;content:"";display:block;position:absolute;right:33px;top:calc(50% - 4px);z-index:10}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none;padding-right:24px;white-space:pre-wrap}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.logo{display:block;height:36px;max-width:220px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;width:100%}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}@media screen and (min-width:616px){.navbar.py-1+div .anchor-fixer :target{scroll-margin-top:62px}}@media screen and (min-width:616px){.navbar.py-2+div .anchor-fixer :target{scroll-margin-top:78px}}@media screen and (min-width:616px){.navbar.py-3+div .anchor-fixer :target{scroll-margin-top:94px}}@media screen and (min-width:616px){.navbar.py-4+div .anchor-fixer :target{scroll-margin-top:110px}}@media screen and (min-width:616px){.navbar.py-5+div .anchor-fixer :target{scroll-margin-top:126px}}@media screen and (min-width:616px){.navbar.py-6+div .anchor-fixer :target{scroll-margin-top:142px}}@media screen and (min-width:616px){.navbar.py-7+div .anchor-fixer :target{scroll-margin-top:158px}}@media screen and (min-width:616px){.navbar.py-8+div .anchor-fixer :target{scroll-margin-top:174px}}@media screen and (max-width:615.98px){.navbar+div .anchor-fixer :target{scroll-margin-top:73px}}@media screen and (max-width:399.98px){.navbar+div .anchor-fixer :target{scroll-margin-top:80px}}.page,.photo-frame{overflow:hidden;width:100%}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;position:relative}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.severity-table th{background:#f1f6f9;font-size:.875rem;padding:8px 16px}.severity-table td{border-top:1px solid #d6dbdf;padding:16px}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.overflow-auto{overflow:auto}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file diff --git a/website/src/scss/_variables.scss b/website/src/scss/_variables.scss index d511c757055..55b06ac8409 100644 --- a/website/src/scss/_variables.scss +++ b/website/src/scss/_variables.scss @@ -289,6 +289,8 @@ $nav-tabs-link-active-border-color: $gray-700; $navbar-padding-y: 24px; $navbar-padding-x: 0; +$navbar-nav-height: 46px; +$navbar-height-xl: 80px; // Cards diff --git a/website/src/scss/components/_navbar.scss b/website/src/scss/components/_navbar.scss index 53a834d2ed7..ca6bc52630b 100644 --- a/website/src/scss/components/_navbar.scss +++ b/website/src/scss/components/_navbar.scss @@ -52,7 +52,7 @@ &-nav { align-items: center; - height: 46px; + height: $navbar-nav-height; } .nav-item:not(:last-child) { @@ -131,6 +131,35 @@ } @media screen and (max-width: 399.98px) { - height: 80px; + height: $navbar-height-xl; + } +} + + +.navbar { + @for $i from 1 through 8 { + &.py-#{$i} { + + div { + .anchor-fixer { + :target { + @media screen and (min-width: 616px) { + scroll-margin-top: $navbar-nav-height + $spacer * $i * 2; + } + } + } + } + } + } + + div { + .anchor-fixer { + :target { + @media screen and (max-width: 615.98px) { + scroll-margin-top: 73px; + } + @media screen and (max-width: 399.98px) { + scroll-margin-top: $navbar-height-xl; + } + } + } } } diff --git a/website/templates/docs/content.html b/website/templates/docs/content.html index 3f4db728e99..c2835dd1f39 100644 --- a/website/templates/docs/content.html +++ b/website/templates/docs/content.html @@ -1,4 +1,4 @@ -
+
{% if not single_page %} {% set ancestors = page.ancestors|reverse|list %} diff --git a/website/templates/global/banner.html b/website/templates/global/banner.html index 47763f98082..6a3e38b6e1a 100644 --- a/website/templates/global/banner.html +++ b/website/templates/global/banner.html @@ -1,6 +1,6 @@