diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index a9e472ab09c..3b1bcc6bf78 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -9,7 +9,24 @@ on: # yamllint disable-line rule:truthy branches: - 'backport/**' jobs: - DockerHubPush: + DockerHubPushAarch64: + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - name: Clear repository @@ -20,12 +37,40 @@ jobs: - name: Images check run: | cd $GITHUB_WORKSPACE/tests/ci - python3 docker_images_check.py + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json + path: ${{ runner.temp }}/changed_images.json CompatibilityCheck: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] @@ -106,6 +151,47 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH $CACHES_PATH + BuilderDebAarch64: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + CHECK_NAME=ClickHouse build check (actions) + BUILD_NAME=package_aarch64 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'true' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH $CACHES_PATH BuilderDebAsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -247,6 +333,7 @@ jobs: BuilderReport: needs: - BuilderDebRelease + - BuilderDebAarch64 - BuilderDebAsan - BuilderDebTsan - BuilderDebDebug diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml index 857cbf2c495..cb06d853219 100644 --- a/.github/workflows/cancel.yml +++ b/.github/workflows/cancel.yml @@ -6,7 +6,7 @@ env: on: # yamllint disable-line rule:truthy workflow_run: - workflows: ["CIGithubActions", "ReleaseCI", "DocsCheck", "BackportPR"] + workflows: ["PullRequestCI", "ReleaseCI", "DocsCheck", "BackportPR"] types: - requested jobs: diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index 23c0840d379..9d400329aec 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -28,24 +28,70 @@ jobs: run: | cd $GITHUB_WORKSPACE/tests/ci python3 run_check.py - DockerHubPush: + DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | - sudo rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE - name: Check out repository code uses: actions/checkout@v2 - name: Images check run: | cd $GITHUB_WORKSPACE/tests/ci - python3 docker_images_check.py + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: + needs: CheckLabels + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json + path: ${{ runner.temp }}/changed_images.json DocsCheck: needs: DockerHubPush runs-on: [self-hosted, func-tester] diff --git a/.github/workflows/jepsen.yml b/.github/workflows/jepsen.yml index 1b01b4d5074..5c67b045fa7 100644 --- a/.github/workflows/jepsen.yml +++ b/.github/workflows/jepsen.yml @@ -8,7 +8,7 @@ on: # yamllint disable-line rule:truthy schedule: - cron: '0 */6 * * *' workflow_run: - workflows: ["CIGithubActions"] + workflows: ["PullRequestCI"] types: - completed workflow_dispatch: diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index c2ed39224aa..e7eb4f7d69e 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -9,7 +9,24 @@ on: # yamllint disable-line rule:truthy branches: - 'master' jobs: - DockerHubPush: + DockerHubPushAarch64: + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - name: Clear repository @@ -20,12 +37,40 @@ jobs: - name: Images check run: | cd $GITHUB_WORKSPACE/tests/ci - python3 docker_images_check.py + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json + path: ${{ runner.temp }}/changed_images.json StyleCheck: needs: DockerHubPush runs-on: [self-hosted, style-checker] @@ -168,6 +213,47 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH + BuilderDebAarch64: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + CHECK_NAME=ClickHouse build check (actions) + BUILD_NAME=package_aarch64 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'true' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH $CACHES_PATH BuilderPerformance: needs: DockerHubPush runs-on: [self-hosted, builder] @@ -815,6 +901,7 @@ jobs: BuilderReport: needs: - BuilderDebRelease + - BuilderDebAarch64 - BuilderBinRelease - BuilderDebAsan - BuilderDebTsan @@ -963,6 +1050,41 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH + FunctionalStatelessTestAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (aarch64, actions) + REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH FunctionalStatelessTestAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -1478,6 +1600,41 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH + FunctionalStatefulTestAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateful_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateful tests (aarch64, actions) + REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse + KILL_TIMEOUT=3600 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH FunctionalStatefulTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -2659,6 +2816,7 @@ jobs: - FunctionalStatelessTestDebug2 - FunctionalStatelessTestRelease - FunctionalStatelessTestReleaseDatabaseOrdinary + - FunctionalStatelessTestAarch64 - FunctionalStatelessTestAsan0 - FunctionalStatelessTestAsan1 - FunctionalStatelessTestTsan0 @@ -2671,6 +2829,7 @@ jobs: - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease - FunctionalStatefulTestReleaseDatabaseOrdinary + - FunctionalStatefulTestAarch64 - FunctionalStatefulTestAsan - FunctionalStatefulTestTsan - FunctionalStatefulTestMsan diff --git a/.github/workflows/main.yml b/.github/workflows/pull_request.yml similarity index 96% rename from .github/workflows/main.yml rename to .github/workflows/pull_request.yml index c42513ff9a8..ffcd9c50281 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/pull_request.yml @@ -1,4 +1,4 @@ -name: CIGithubActions +name: PullRequestCI env: # Force the stdout and stderr streams to be unbuffered @@ -31,7 +31,25 @@ jobs: run: | cd $GITHUB_WORKSPACE/tests/ci python3 run_check.py - DockerHubPush: + DockerHubPushAarch64: + needs: CheckLabels + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: needs: CheckLabels runs-on: [self-hosted, style-checker] steps: @@ -43,12 +61,40 @@ jobs: - name: Images check run: | cd $GITHUB_WORKSPACE/tests/ci - python3 docker_images_check.py + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json + path: ${{ runner.temp }}/changed_images.json StyleCheck: needs: DockerHubPush runs-on: [self-hosted, style-checker] @@ -928,8 +974,8 @@ jobs: BuilderReport: needs: - BuilderDebRelease - - BuilderBinRelease - BuilderDebAarch64 + - BuilderBinRelease - BuilderDebAsan - BuilderDebTsan - BuilderDebUBsan @@ -1153,6 +1199,41 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH + FunctionalStatelessTestAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (aarch64, actions) + REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH FunctionalStatelessTestAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -1668,6 +1749,41 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH + FunctionalStatefulTestAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateful_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateful tests (aarch64, actions) + REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse + KILL_TIMEOUT=3600 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH FunctionalStatefulTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -2888,6 +3004,7 @@ jobs: - FunctionalStatelessTestReleaseDatabaseReplicated0 - FunctionalStatelessTestReleaseDatabaseReplicated1 - FunctionalStatelessTestReleaseWideParts + - FunctionalStatelessTestAarch64 - FunctionalStatelessTestAsan0 - FunctionalStatelessTestAsan1 - FunctionalStatelessTestTsan0 @@ -2899,6 +3016,7 @@ jobs: - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease + - FunctionalStatefulTestAarch64 - FunctionalStatefulTestAsan - FunctionalStatefulTestTsan - FunctionalStatefulTestMsan diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1212bddb4a5..cb081c6ff41 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,24 @@ on: # yamllint disable-line rule:truthy - '.github/**' workflow_dispatch: jobs: - DockerHubPush: + DockerHubPushAarch64: + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - name: Clear repository @@ -30,12 +47,40 @@ jobs: - name: Images check run: | cd $GITHUB_WORKSPACE/tests/ci - python3 docker_images_check.py + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json + path: ${{ runner.temp }}/changed_images.json DocsRelease: needs: DockerHubPush runs-on: [self-hosted, func-tester] diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 68a3554741d..12117db2dc7 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -12,7 +12,24 @@ on: # yamllint disable-line rule:truthy - '23.[1-9][1-9]' - '24.[1-9][1-9]' jobs: - DockerHubPush: + DockerHubPushAarch64: + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_images_check.py --suffix aarch64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json + DockerHubPushAmd64: runs-on: [self-hosted, style-checker] steps: - name: Clear repository @@ -23,12 +40,40 @@ jobs: - name: Images check run: | cd $GITHUB_WORKSPACE/tests/ci - python3 docker_images_check.py + python3 docker_images_check.py --suffix amd64 + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }}/docker_images_check/changed_images_amd64.json + DockerHubPush: + needs: [DockerHubPushAmd64, DockerHubPushAarch64] + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Download changed aarch64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_aarch64 + path: ${{ runner.temp }} + - name: Download changed amd64 images + uses: actions/download-artifact@v2 + with: + name: changed_images_amd64 + path: ${{ runner.temp }} + - name: Images check + run: | + cd $GITHUB_WORKSPACE/tests/ci + python3 docker_manifests_merge.py --suffix amd64 --suffix aarch64 - name: Upload images files to artifacts uses: actions/upload-artifact@v2 with: name: changed_images - path: ${{ runner.temp }}/docker_images_check/changed_images.json + path: ${{ runner.temp }}/changed_images.json CompatibilityCheck: needs: [BuilderDebRelease] runs-on: [self-hosted, style-checker] @@ -109,6 +154,47 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH $CACHES_PATH + BuilderDebAarch64: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + CHECK_NAME=ClickHouse build check (actions) + BUILD_NAME=package_aarch64 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/images_path + - name: Check out repository code + uses: actions/checkout@v2 + with: + submodules: 'true' + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci && python3 build_check.py "$CHECK_NAME" $BUILD_NAME + - name: Upload build URLs to artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_NAME }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH $CACHES_PATH BuilderDebAsan: needs: [DockerHubPush] runs-on: [self-hosted, builder] @@ -340,6 +426,7 @@ jobs: BuilderReport: needs: - BuilderDebRelease + - BuilderDebAarch64 - BuilderDebAsan - BuilderDebTsan - BuilderDebUBsan @@ -413,6 +500,41 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH + FunctionalStatelessTestAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (aarch64, actions) + REPO_COPY=${{runner.temp}}/stateless_release/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH FunctionalStatelessTestAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -893,6 +1015,41 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH + FunctionalStatefulTestAarch64: + needs: [BuilderDebAarch64] + runs-on: [self-hosted, func-tester-aarch64] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateful_release + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateful tests (aarch64, actions) + REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse + KILL_TIMEOUT=3600 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH FunctionalStatefulTestAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, func-tester] @@ -1580,6 +1737,7 @@ jobs: - FunctionalStatelessTestDebug1 - FunctionalStatelessTestDebug2 - FunctionalStatelessTestRelease + - FunctionalStatelessTestAarch64 - FunctionalStatelessTestAsan0 - FunctionalStatelessTestAsan1 - FunctionalStatelessTestTsan0 @@ -1591,6 +1749,7 @@ jobs: - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease + - FunctionalStatefulTestAarch64 - FunctionalStatefulTestAsan - FunctionalStatefulTestTsan - FunctionalStatefulTestMsan diff --git a/base/base/StringRef.h b/base/base/StringRef.h index d0184dbc24c..eefc87121fc 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -48,7 +48,9 @@ struct StringRef std::string toString() const { return std::string(data, size); } explicit operator std::string() const { return toString(); } - constexpr explicit operator std::string_view() const { return {data, size}; } + std::string_view toView() const { return std::string_view(data, size); } + + constexpr explicit operator std::string_view() const { return std::string_view(data, size); } }; /// Here constexpr doesn't implicate inline, see https://www.viva64.com/en/w/v1043/ diff --git a/docker/docs/builder/Dockerfile b/docker/docs/builder/Dockerfile index 50e3fadf9ac..53c492c2677 100644 --- a/docker/docs/builder/Dockerfile +++ b/docker/docs/builder/Dockerfile @@ -1,3 +1,4 @@ +# rebuild in #33610 # docker build -t clickhouse/docs-build . FROM ubuntu:20.04 diff --git a/docker/docs/check/Dockerfile b/docker/docs/check/Dockerfile index 55647df5c3e..174be123eed 100644 --- a/docker/docs/check/Dockerfile +++ b/docker/docs/check/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/docs-check . -FROM clickhouse/docs-builder +ARG FROM_TAG=latest +FROM clickhouse/docs-builder:$FROM_TAG COPY run.sh / diff --git a/docker/docs/release/Dockerfile b/docker/docs/release/Dockerfile index 63765180a4c..3c661c1d3fd 100644 --- a/docker/docs/release/Dockerfile +++ b/docker/docs/release/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/docs-release . -FROM clickhouse/docs-builder +ARG FROM_TAG=latest +FROM clickhouse/docs-builder:$FROM_TAG COPY run.sh / diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 8f886ea357d..e3e2e689b17 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -1,4 +1,5 @@ -# docker build -t clickhouse/binary-builder . +# rebuild in #33610 +# docker build -t clickhouse/binary-builder . FROM ubuntu:20.04 # ARG for quick switch to a given ubuntu mirror diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 89c34846efa..76a5f1d91c0 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -1,3 +1,4 @@ +# rebuild in #33610 # docker build -t clickhouse/deb-builder . FROM ubuntu:20.04 @@ -28,12 +29,14 @@ RUN apt-get update \ software-properties-common \ --yes --no-install-recommends +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH + # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able # to compress files using pigz (https://zlib.net/pigz/) instead of gzip. # Significantly increase deb packaging speed and compatible with old systems -RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \ - && chmod +x dpkg-deb \ - && cp dpkg-deb /usr/bin +RUN arch=${TARGETARCH:-amd64} \ + && curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch} RUN apt-get update \ && apt-get install \ diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index a661f8875a2..6beab2e5bb7 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/test-base . -FROM clickhouse/test-util +ARG FROM_TAG=latest +FROM clickhouse/test-util:$FROM_TAG # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" @@ -28,12 +30,14 @@ RUN apt-get update \ software-properties-common \ --yes --no-install-recommends +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH + # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able # to compress files using pigz (https://zlib.net/pigz/) instead of gzip. # Significantly increase deb packaging speed and compatible with old systems -RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \ - && chmod +x dpkg-deb \ - && cp dpkg-deb /usr/bin +RUN arch=${TARGETARCH:-amd64} \ + && curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch} RUN apt-get update \ && apt-get install \ diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index d1059b3dacc..97f3f54ad98 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -1,12 +1,14 @@ +# rebuild in #33610 # docker build --network=host -t clickhouse/codebrowser . # docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output clickhouse/codebrowser -FROM clickhouse/binary-builder +ARG FROM_TAG=latest +FROM clickhouse/binary-builder:$FROM_TAG # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list -RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-13 libllvm13 libclang-13-dev +RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-13 libllvm13 libclang-13-dev libmlir-13-dev # repo versions doesn't work correctly with C++17 # also we push reports to s3, so we add index.html to subfolder urls diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 6fa5b0aa9db..46b74d89e13 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/fasttest . -FROM clickhouse/test-util +ARG FROM_TAG=latest +FROM clickhouse/test-util:$FROM_TAG # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" @@ -28,12 +30,14 @@ RUN apt-get update \ software-properties-common \ --yes --no-install-recommends +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH + # Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able # to compress files using pigz (https://zlib.net/pigz/) instead of gzip. # Significantly increase deb packaging speed and compatible with old systems -RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \ - && chmod +x dpkg-deb \ - && cp dpkg-deb /usr/bin +RUN arch=${TARGETARCH:-amd64} \ + && curl -Lo /usr/bin/dpkg-deb https://github.com/ClickHouse-Extras/dpkg/releases/download/1.21.1-clickhouse/dpkg-deb-${arch} RUN apt-get update \ && apt-get install \ diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index c602cba50aa..eb4b09c173f 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/fuzzer . -FROM clickhouse/test-base +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 89c2b19236e..91b26735fe5 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -1,44 +1,57 @@ +# rebuild in #33610 # docker build -t clickhouse/integration-test . -FROM clickhouse/test-base +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG SHELL ["/bin/bash", "-c"] RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get -y install \ - tzdata \ - python3 \ - libicu-dev \ bsdutils \ + curl \ + default-jre \ + g++ \ gdb \ - unixodbc \ - odbcinst \ + iproute2 \ + krb5-user \ + libicu-dev \ libsqlite3-dev \ libsqliteodbc \ - odbc-postgresql \ - sqlite3 \ - curl \ - tar \ - lz4 \ - krb5-user \ - iproute2 \ lsof \ - g++ \ - default-jre + lz4 \ + odbc-postgresql \ + odbcinst \ + python3 \ + rpm2cpio \ + sqlite3 \ + tar \ + tzdata \ + unixodbc \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -RUN rm -rf \ - /var/lib/apt/lists/* \ - /var/cache/debconf \ - /tmp/* \ -RUN apt-get clean +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH -# Install MySQL ODBC driver -RUN curl 'https://downloads.mysql.com/archives/get/p/10/file/mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit.tar.gz' --location --output 'mysql-connector.tar.gz' && tar -xzf mysql-connector.tar.gz && cd mysql-connector-odbc-8.0.21-linux-glibc2.12-x86-64bit/lib && mv * /usr/local/lib && ln -s /usr/local/lib/libmyodbc8a.so /usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so +# Install MySQL ODBC driver from RHEL rpm +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && cd /tmp \ + && curl -o mysql-odbc.rpm "https://cdn.mysql.com/Downloads/Connector-ODBC/8.0/mysql-connector-odbc-8.0.27-1.el8.${rarch}.rpm" \ + && rpm2archive mysql-odbc.rpm \ + && tar xf mysql-odbc.rpm.tgz -C / ./usr/lib64/ \ + && LINK_DIR=$(dpkg -L libodbc1 | grep '^/usr/lib/.*-linux-gnu/odbc$') \ + && ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR" \ + && ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR"/libmyodbc.so # Unfortunately this is required for a single test for conversion data from zookeeper to clickhouse-keeper. # ZooKeeper is not started by default, but consumes some space in containers. # 777 perms used to allow anybody to start/stop ZooKeeper ENV ZOOKEEPER_VERSION='3.6.3' -RUN curl -O "https://mirrors.estointernet.in/apache/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz" +RUN curl -O "https://dlcdn.apache.org/zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz" RUN tar -zxvf apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz && mv apache-zookeeper-${ZOOKEEPER_VERSION}-bin /opt/zookeeper && chmod -R 777 /opt/zookeeper && rm apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz RUN echo $'tickTime=2500 \n\ tickTime=2500 \n\ diff --git a/docker/test/keeper-jepsen/Dockerfile b/docker/test/keeper-jepsen/Dockerfile index 5bb7f9433c2..a794e076ec0 100644 --- a/docker/test/keeper-jepsen/Dockerfile +++ b/docker/test/keeper-jepsen/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/keeper-jepsen-test . -FROM clickhouse/test-base +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG ENV DEBIAN_FRONTEND=noninteractive ENV CLOJURE_VERSION=1.10.3.814 diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index c236b3a51d1..f484feecfd0 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -1,8 +1,14 @@ +# rebuild in #33610 # docker build -t clickhouse/pvs-test . -FROM clickhouse/binary-builder +ARG FROM_TAG=latest +FROM clickhouse/binary-builder:$FROM_TAG -RUN apt-get update --yes \ +# PVS studio doesn't support aarch64/arm64, so there is a check for it everywhere +# We'll produce an empty image for arm64 +ARG TARGETARCH + +RUN test x$TARGETARCH = xarm64 || ( apt-get update --yes \ && apt-get install \ bash \ wget \ @@ -15,7 +21,7 @@ RUN apt-get update --yes \ libprotoc-dev \ libgrpc++-dev \ libc-ares-dev \ - --yes --no-install-recommends + --yes --no-install-recommends ) #RUN wget -nv -O - http://files.viva64.com/etc/pubkey.txt | sudo apt-key add - #RUN sudo wget -nv -O /etc/apt/sources.list.d/viva64.list http://files.viva64.com/etc/viva64.list @@ -27,7 +33,7 @@ RUN apt-get update --yes \ ENV PKG_VERSION="pvs-studio-latest" -RUN set -x \ +RUN test x$TARGETARCH = xarm64 || ( set -x \ && export PUBKEY_HASHSUM="ad369a2e9d8b8c30f5a9f2eb131121739b79c78e03fef0f016ea51871a5f78cd4e6257b270dca0ac3be3d1f19d885516" \ && wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \ && echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \ @@ -35,7 +41,7 @@ RUN set -x \ && wget -nv "https://files.viva64.com/${PKG_VERSION}.deb" \ && { debsig-verify ${PKG_VERSION}.deb \ || echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \ - && dpkg -i "${PKG_VERSION}.deb" + && dpkg -i "${PKG_VERSION}.deb" ) ENV CCACHE_DIR=/test_output/ccache diff --git a/docker/test/split_build_smoke_test/Dockerfile b/docker/test/split_build_smoke_test/Dockerfile index 3cc2f26a507..5f84eb42216 100644 --- a/docker/test/split_build_smoke_test/Dockerfile +++ b/docker/test/split_build_smoke_test/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/split-build-smoke-test . -FROM clickhouse/binary-builder +ARG FROM_TAG=latest +FROM clickhouse/binary-builder:$FROM_TAG COPY run.sh /run.sh COPY process_split_build_smoke_test_result.py / diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index b6e9fad600c..7c16e69a99b 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/stateful-test . -FROM clickhouse/stateless-test +ARG FROM_TAG=latest +FROM clickhouse/stateless-test:$FROM_TAG RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 05d26924b15..9b7fde7d542 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -1,11 +1,10 @@ +# rebuild in #33610 # docker build -t clickhouse/stateless-test . -FROM clickhouse/test-base +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" -RUN echo "deb [trusted=yes] http://repo.mysql.com/apt/ubuntu/ bionic mysql-5.7" >> /etc/apt/sources.list \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 8C718D3B5072E1F5 - RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --yes --no-install-recommends \ @@ -30,7 +29,7 @@ RUN apt-get update -y \ tree \ unixodbc \ wget \ - mysql-client=5.7* \ + mysql-client=8.0* \ postgresql-client \ sqlite3 @@ -49,10 +48,13 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 ENV MAX_RUN_TIME=0 +ARG TARGETARCH + # Download Minio-related binaries -RUN wget 'https://dl.min.io/server/minio/release/linux-amd64/minio' \ +RUN arch=${TARGETARCH:-amd64} \ + && wget "https://dl.min.io/server/minio/release/linux-${arch}/minio" \ && chmod +x ./minio \ - && wget 'https://dl.min.io/client/mc/release/linux-amd64/mc' \ + && wget "https://dl.min.io/client/mc/release/linux-${arch}/mc" \ && chmod +x ./mc ENV MINIO_ROOT_USER="clickhouse" diff --git a/docker/test/stateless_pytest/Dockerfile b/docker/test/stateless_pytest/Dockerfile index c1e47523f6d..f692f8f39f0 100644 --- a/docker/test/stateless_pytest/Dockerfile +++ b/docker/test/stateless_pytest/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/stateless-pytest . -FROM clickhouse/test-base +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG RUN apt-get update -y && \ apt-get install -y --no-install-recommends \ diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index e28d25c9485..4e0b6741061 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/stress-test . -FROM clickhouse/stateful-test +ARG FROM_TAG=latest +FROM clickhouse/stateful-test:$FROM_TAG RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 2efb62689ff..4387d16ea7c 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -146,6 +146,7 @@ handle SIGUSR2 nostop noprint pass handle SIG$RTMIN nostop noprint pass info signals continue +gcore backtrace full info locals info registers @@ -263,3 +264,10 @@ done # Write check result into check_status.tsv clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%') LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv + +# Core dumps (see gcore) +# Default filename is 'core.PROCESS_ID' +for core in core.*; do + pigz $core + mv $core.gz /output/ +done diff --git a/docker/test/unit/Dockerfile b/docker/test/unit/Dockerfile index 20d67773363..b75bfb6661c 100644 --- a/docker/test/unit/Dockerfile +++ b/docker/test/unit/Dockerfile @@ -1,5 +1,7 @@ +# rebuild in #33610 # docker build -t clickhouse/unit-test . -FROM clickhouse/stateless-test +ARG FROM_TAG=latest +FROM clickhouse/stateless-test:$FROM_TAG RUN apt-get install gdb diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index d9894451528..d9827260acb 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -1,3 +1,4 @@ +# rebuild in #33610 # docker build -t clickhouse/test-util . FROM ubuntu:20.04 diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index 44e68d645b7..b696c441374 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -175,7 +175,7 @@ When we are going to read something from a part in `MergeTree`, we look at `prim When you `INSERT` a bunch of data into `MergeTree`, that bunch is sorted by primary key order and forms a new part. There are background threads that periodically select some parts and merge them into a single sorted part to keep the number of parts relatively low. That’s why it is called `MergeTree`. Of course, merging leads to “write amplification”. All parts are immutable: they are only created and deleted, but not modified. When SELECT is executed, it holds a snapshot of the table (a set of parts). After merging, we also keep old parts for some time to make a recovery after failure easier, so if we see that some merged part is probably broken, we can replace it with its source parts. -`MergeTree` is not an LSM tree because it does not contain “memtable” and “log”: inserted data is written directly to the filesystem. This makes it suitable only to INSERT data in batches, not by individual row and not very frequently – about once per second is ok, but a thousand times a second is not. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications. +`MergeTree` is not an LSM tree because it does not contain MEMTABLE and LOG: inserted data is written directly to the filesystem. This behavior makes MergeTree much more suitable to insert data in batches. Therefore frequently inserting small amounts of rows is not ideal for MergeTree. For example, a couple of rows per second is OK, but doing it a thousand times a second is not optimal for MergeTree. However, there is an async insert mode for small inserts to overcome this limitation. We did it this way for simplicity’s sake, and because we are already inserting data in batches in our applications There are MergeTree engines that are doing additional work during background merges. Examples are `CollapsingMergeTree` and `AggregatingMergeTree`. This could be treated as special support for updates. Keep in mind that these are not real updates because users usually have no control over the time when background merges are executed, and data in a `MergeTree` table is almost always stored in more than one part, not in completely merged form. diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 52fa307333c..ccf6da355b9 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -158,6 +158,8 @@ While inside the `build` directory, configure your build by running CMake. Befor export CC=clang CXX=clang++ cmake .. +If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-13 CXX=clang++-13`. The clang version will be in the script output. + The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building. For a faster build, you can resort to the `debug` build type - a build with no optimizations. For that supply the following parameter `-D CMAKE_BUILD_TYPE=Debug`: diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index 52876674475..475416ffb94 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -66,4 +66,14 @@ SELECT COUNT() FROM mongo_table; └─────────┘ ``` +You can also adjust connection timeout: + +``` sql +CREATE TABLE mongo_table +( + key UInt64, + data String +) ENGINE = MongoDB('mongo2:27017', 'test', 'simple_table', 'testuser', 'clickhouse', 'connectTimeoutMS=100000'); +``` + [Original article](https://clickhouse.com/docs/en/engines/table-engines/integrations/mongodb/) diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 048834806d1..2efe980a4cf 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -380,6 +380,42 @@ Result: └──────┘ ``` +## h3HexAreaKm2 {#h3hexareakm2} + +Returns average hexagon area in square kilometers at the given resolution. + +**Syntax** + +``` sql +h3HexAreaKm2(resolution) +``` + +**Parameter** + +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Area in square kilometers. + +Type: [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT h3HexAreaKm2(13) AS area; +``` + +Result: + +``` text +┌──────area─┐ +│ 0.0000439 │ +└───────────┘ +``` + ## h3IndexesAreNeighbors {#h3indexesareneighbors} Returns whether or not the provided [H3](#h3index) indexes are neighbors. @@ -704,4 +740,144 @@ Result: └───────┘ ``` +## h3DegsToRads {#h3degstorads} + +Converts degrees to radians. + +**Syntax** + +``` sql +h3DegsToRads(degrees) +``` + +**Parameter** + +- `degrees` — Input in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). + +**Returned values** + +- Radians. Type: [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT h3DegsToRads(180.0) AS radians; +``` + +Result: + +``` text +┌───────────radians─┐ +│ 3.141592653589793 │ +└───────────────────┘ +``` + +## h3RadsToDegs {#h3radstodegs} + +Converts radians to degrees. + +**Syntax** + +``` sql +h3RadsToDegs(radians) +``` + +**Parameter** + +- `radians` — Input in radians. Type: [Float64](../../../sql-reference/data-types/float.md). + +**Returned values** + +- Degrees. Type: [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT h3RadsToDegs(3.141592653589793) AS degrees; +``` + +Result: + +``` text +┌─degrees─┐ +│ 180 │ +└─────────┘ +``` + +## h3CellAreaM2 {#h3cellaream2} + +Returns the exact area of a specific cell in square meters corresponding to the given input H3 index. + +**Syntax** + +``` sql +h3CellAreaM2(index) +``` + +**Parameter** + +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Cell area in square meters. + +Type: [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT h3CellAreaM2(579205133326352383) AS area; +``` + +Result: + +``` text +┌───────────────area─┐ +│ 4106166334463.9233 │ +└────────────────────┘ +``` + +## h3CellAreaRads2 {#h3cellarearads2} + +Returns the exact area of a specific cell in square radians corresponding to the given input H3 index. + +**Syntax** + +``` sql +h3CellAreaRads2(index) +``` + +**Parameter** + +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned value** + +- Cell area in square radians. + +Type: [Float64](../../../sql-reference/data-types/float.md). + +**Example** + +Query: + +``` sql +SELECT h3CellAreaRads2(579205133326352383) AS area; +``` + +Result: + +``` text +┌────────────────area─┐ +│ 0.10116268528089567 │ +└─────────────────────┘ +``` + [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 4054f373cc1..30899cc2940 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -16,7 +16,7 @@ This query tries to initialize an unscheduled merge of data parts for tables. OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] ``` -The `OPTMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported. +The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported. When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all replicas (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `2`) or on current replica (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `1`). diff --git a/docs/zh/engines/table-engines/integrations/kafka.md b/docs/zh/engines/table-engines/integrations/kafka.md index 6784b366e85..ee6bbbe67fc 100644 --- a/docs/zh/engines/table-engines/integrations/kafka.md +++ b/docs/zh/engines/table-engines/integrations/kafka.md @@ -132,4 +132,33 @@ Kafka 特性: 有关详细配置选项列表,请参阅 [librdkafka配置参考](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md)。在 ClickHouse 配置中使用下划线 (`_`) ,并不是使用点 (`.`)。例如,`check.crcs=true` 将是 `true`。 +### Kerberos 支持 {#kafka-kerberos-zhi-chi} + +对于使用了kerberos的kafka, 将security_protocol 设置为sasl_plaintext就够了,如果kerberos的ticket是由操作系统获取和缓存的。 +clickhouse也支持自己使用keyfile的方式来维护kerbros的凭证。配置sasl_kerberos_service_name、sasl_kerberos_keytab、sasl_kerberos_principal三个子元素就可以。 + +示例: + +``` xml + + + SASL_PLAINTEXT + /home/kafkauser/kafkauser.keytab + kafkauser/kafkahost@EXAMPLE.COM + +``` + +## 虚拟列 + +- `_topic` – Kafka 主题。 +- `_key` – 信息的键。 +- `_offset` – 消息的偏移量。 +- `_timestamp ` – 消息的时间戳。 +- `_partition ` – Kafka 主题的分区。 + +**另请参阅** + +- [虚拟列](../../../engines/table-engines/index.md#table_engines-virtual_columns) +- [后台消息代理调度池大小](../../../operations/settings/settings.md#background_message_broker_schedule_pool_size) + [原始文章](https://clickhouse.com/docs/zh/operations/table_engines/kafka/) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index b1e1345cf71..a5e4517824d 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1,23 +1,17 @@ #include #include -#include #include #include -#include #include -#include -#include #include #include #include #include -#include #include #include #include "Client.h" #include "Core/Protocol.h" -#include #include #include diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b51fad2ca25..c9e9f736e0d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -566,6 +566,7 @@ if (ENABLE_TESTS AND USE_GTEST) clickhouse_parsers clickhouse_storages_system dbms + clickhouse_common_config clickhouse_common_zookeeper clickhouse_common_config string_utils) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 0938a9cfee5..747603d0e6d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -463,12 +463,13 @@ void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query) /// The query can specify output format or output file. if (const auto * query_with_output = dynamic_cast(parsed_query.get())) { + String out_file; if (query_with_output->out_file) { select_into_file = true; const auto & out_file_node = query_with_output->out_file->as(); - const auto & out_file = out_file_node.value.safeGet(); + out_file = out_file_node.value.safeGet(); std::string compression_method; if (query_with_output->compression) @@ -494,6 +495,12 @@ void ClientBase::initBlockOutputStream(const Block & block, ASTPtr parsed_query) const auto & id = query_with_output->format->as(); current_format = id.name(); } + else if (query_with_output->out_file) + { + const auto & format_name = FormatFactory::instance().getFormatFromFileName(out_file); + if (!format_name.empty()) + current_format = format_name; + } } if (has_vertical_output_suffix) @@ -1008,11 +1015,15 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des compression_method = compression_method_node.value.safeGet(); } + String current_format = parsed_insert_query->format; + if (current_format.empty()) + current_format = FormatFactory::instance().getFormatFromFileName(in_file); + /// Create temporary storage file, to support globs and parallel reading StorageFile::CommonArguments args{ WithContext(global_context), parsed_insert_query->table_id, - parsed_insert_query->format, + current_format, getFormatSettings(global_context), compression_method, columns_description_for_query, diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 8ad853950b2..528c38f9b76 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -214,6 +214,12 @@ bool LocalConnection::poll(size_t) if (next_packet_type) return true; + if (state->exception) + { + next_packet_type = Protocol::Server::Exception; + return true; + } + if (!state->is_finished) { if (send_progress && (state->after_send_progress.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) diff --git a/src/Common/CombinedCardinalityEstimator.h b/src/Common/CombinedCardinalityEstimator.h index 55afb028247..8cf35436840 100644 --- a/src/Common/CombinedCardinalityEstimator.h +++ b/src/Common/CombinedCardinalityEstimator.h @@ -323,7 +323,7 @@ private: UInt64 address = 0; }; static const UInt64 mask = 0xFFFFFFFFFFFFFFFC; - static const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; + static const UInt32 medium_set_size_max = 1ULL << medium_set_power2_max; }; } diff --git a/src/Common/HashTable/StringHashTable.h b/src/Common/HashTable/StringHashTable.h index d30271d65db..7d704e4bdc7 100644 --- a/src/Common/HashTable/StringHashTable.h +++ b/src/Common/HashTable/StringHashTable.h @@ -280,7 +280,7 @@ public: if ((reinterpret_cast(p) & 2048) == 0) { memcpy(&n[0], p, 8); - n[0] &= -1ul >> s; + n[0] &= -1ULL >> s; } else { diff --git a/src/Common/HashTable/TwoLevelStringHashTable.h b/src/Common/HashTable/TwoLevelStringHashTable.h index 93bbcb2835d..871becc86a4 100644 --- a/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/src/Common/HashTable/TwoLevelStringHashTable.h @@ -114,7 +114,7 @@ public: if ((reinterpret_cast(p) & 2048) == 0) { memcpy(&n[0], p, 8); - n[0] &= -1ul >> s; + n[0] &= -1ULL >> s; } else { diff --git a/src/Common/Throttler.cpp b/src/Common/Throttler.cpp index f02001e338a..95baf40f2c0 100644 --- a/src/Common/Throttler.cpp +++ b/src/Common/Throttler.cpp @@ -23,7 +23,7 @@ static constexpr auto NS = 1000000000UL; /// Tracking window. Actually the size is not really important. We just want to avoid /// throttles when there are no actions for a long period time. -static const double window_ns = 1UL * NS; +static const double window_ns = 1ULL * NS; void Throttler::add(size_t amount) { diff --git a/src/Common/tests/gtest_global_context.cpp b/src/Common/tests/gtest_global_context.cpp new file mode 100644 index 00000000000..19ba3cdc269 --- /dev/null +++ b/src/Common/tests/gtest_global_context.cpp @@ -0,0 +1,7 @@ +#include "gtest_global_context.h" + +const ContextHolder & getContext() +{ + static ContextHolder holder; + return holder; +} diff --git a/src/Common/tests/gtest_global_context.h b/src/Common/tests/gtest_global_context.h index 9bd7c2490d6..7756be7ce9b 100644 --- a/src/Common/tests/gtest_global_context.h +++ b/src/Common/tests/gtest_global_context.h @@ -18,8 +18,4 @@ struct ContextHolder ContextHolder(ContextHolder &&) = default; }; -inline const ContextHolder & getContext() -{ - static ContextHolder holder; - return holder; -} +const ContextHolder & getContext(); diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 4f71274291b..bd13a70252e 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -168,7 +168,7 @@ void deserializeKeeperStorageFromSnapshot(KeeperStorage & storage, const std::st auto max_session_id = deserializeSessionAndTimeout(storage, reader); LOG_INFO(log, "Sessions and timeouts deserialized"); - storage.session_id_counter = max_session_id; + storage.session_id_counter = max_session_id + 1; /// session_id_counter pointer to next slot deserializeACLMap(storage, reader); LOG_INFO(log, "ACLs deserialized"); diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 3f6cb49fda7..5cc334eaad4 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -156,13 +156,15 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String StorageMySQLConfiguration configuration; ASTs & arguments = engine->arguments->children; + MySQLSettings mysql_settings; - if (auto named_collection = getExternalDataSourceConfiguration(arguments, context, true)) + if (auto named_collection = getExternalDataSourceConfiguration(arguments, context, true, true, mysql_settings)) { - auto [common_configuration, storage_specific_args] = named_collection.value(); + auto [common_configuration, storage_specific_args, settings_changes] = named_collection.value(); configuration.set(common_configuration); configuration.addresses = {std::make_pair(configuration.host, configuration.port)}; + mysql_settings.applyChanges(settings_changes); if (!storage_specific_args.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -200,7 +202,6 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String if (engine_name == "MySQL") { auto mysql_database_settings = std::make_unique(); - MySQLSettings mysql_settings; auto mysql_pool = createMySQLPoolWithFailover(configuration, mysql_settings); mysql_database_settings->loadFromQueryContext(context); @@ -299,7 +300,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String if (auto named_collection = getExternalDataSourceConfiguration(engine_args, context, true)) { - auto [common_configuration, storage_specific_args] = named_collection.value(); + auto [common_configuration, storage_specific_args, _] = named_collection.value(); configuration.set(common_configuration); configuration.addresses = {std::make_pair(configuration.host, configuration.port)}; @@ -358,7 +359,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String if (auto named_collection = getExternalDataSourceConfiguration(engine_args, context, true)) { - auto [common_configuration, storage_specific_args] = named_collection.value(); + auto [common_configuration, storage_specific_args, _] = named_collection.value(); configuration.set(common_configuration); if (!storage_specific_args.empty()) diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index 5d699955ee5..dba8bf64798 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -50,12 +50,17 @@ DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL( , remote_database_name(postgres_database_name) , connection_info(connection_info_) , settings(std::move(settings_)) + , startup_task(getContext()->getSchedulePool().createTask("MaterializedPostgreSQLDatabaseStartup", [this]{ startSynchronization(); })) { } void DatabaseMaterializedPostgreSQL::startSynchronization() { + std::lock_guard lock(handler_mutex); + if (shutdown_called) + return; + replication_handler = std::make_unique( /* replication_identifier */database_name, remote_database_name, @@ -104,24 +109,14 @@ void DatabaseMaterializedPostgreSQL::startSynchronization() } LOG_TRACE(log, "Loaded {} tables. Starting synchronization", materialized_tables.size()); - replication_handler->startup(); + replication_handler->startup(/* delayed */false); } void DatabaseMaterializedPostgreSQL::startupTables(ThreadPool & thread_pool, bool force_restore, bool force_attach) { DatabaseAtomic::startupTables(thread_pool, force_restore, force_attach); - try - { - startSynchronization(); - } - catch (...) - { - tryLogCurrentException(log, "Cannot load nested database objects for PostgreSQL database engine."); - - if (!force_attach) - throw; - } + startup_task->activateAndSchedule(); } @@ -376,6 +371,7 @@ StoragePtr DatabaseMaterializedPostgreSQL::detachTable(ContextPtr context_, cons void DatabaseMaterializedPostgreSQL::shutdown() { + startup_task->deactivate(); stopReplication(); DatabaseAtomic::shutdown(); } @@ -387,6 +383,7 @@ void DatabaseMaterializedPostgreSQL::stopReplication() if (replication_handler) replication_handler->shutdown(); + shutdown_called = true; /// Clear wrappers over nested, all access is not done to nested tables directly. materialized_tables.clear(); } diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index 3b7f0f9d29d..40ff0d9262d 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -86,6 +86,9 @@ private: std::map materialized_tables; mutable std::mutex tables_mutex; mutable std::mutex handler_mutex; + + BackgroundSchedulePool::TaskHolder startup_task; + bool shutdown_called = false; }; } diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index a99bee60cfd..bd9a1f7776e 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -247,12 +247,13 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) if (named_collection) { - host = named_collection->host; - user = named_collection->username; - password = named_collection->password; - db = named_collection->database; - table = named_collection->table; - port = named_collection->port; + const auto & configuration = named_collection->configuration; + host = configuration.host; + user = configuration.username; + password = configuration.password; + db = configuration.database; + table = configuration.table; + port = configuration.port; } ClickHouseDictionarySource::Configuration configuration{ diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index 7c720691c3c..fb637263cf4 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -28,7 +28,7 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) auto named_collection = getExternalDataSourceConfiguration(config, config_prefix, context, has_config_key); if (named_collection) { - configuration = *named_collection; + configuration = named_collection->configuration; } else { diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 5bfb6273e8d..a291fcea47f 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -60,19 +60,25 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) auto settings_config_prefix = config_prefix + ".mysql"; std::shared_ptr pool; - auto has_config_key = [](const String & key) { return dictionary_allowed_keys.contains(key) || key.starts_with("replica"); }; + MySQLSettings mysql_settings; + auto has_config_key = [&](const String & key) + { + return dictionary_allowed_keys.contains(key) || key.starts_with("replica") || mysql_settings.has(key); + }; StorageMySQLConfiguration configuration; auto named_collection = created_from_ddl - ? getExternalDataSourceConfiguration(config, settings_config_prefix, global_context, has_config_key) + ? getExternalDataSourceConfiguration(config, settings_config_prefix, global_context, has_config_key, mysql_settings) : std::nullopt; if (named_collection) { - configuration.set(*named_collection); + mysql_settings.applyChanges(named_collection->settings_changes); + configuration.set(named_collection->configuration); configuration.addresses = {std::make_pair(configuration.host, configuration.port)}; - MySQLSettings mysql_settings; const auto & settings = global_context->getSettingsRef(); - mysql_settings.connect_timeout = settings.external_storage_connect_timeout_sec; - mysql_settings.read_write_timeout = settings.external_storage_rw_timeout_sec; + if (!mysql_settings.isChanged("connect_timeout")) + mysql_settings.connect_timeout = settings.external_storage_connect_timeout_sec; + if (!mysql_settings.isChanged("read_write_timeout")) + mysql_settings.read_write_timeout = settings.external_storage_rw_timeout_sec; pool = std::make_shared(createMySQLPoolWithFailover(configuration, mysql_settings)); } else diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 2068de0d01c..43c3eb54103 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -14,6 +14,8 @@ #include #include +#include + namespace DB { @@ -191,7 +193,8 @@ InputFormatPtr FormatFactory::getInput( ParallelParsingInputFormat::Params params{ - buf, sample, parser_creator, file_segmentation_engine, name, settings.max_threads, settings.min_chunk_bytes_for_parallel_parsing}; + buf, sample, parser_creator, file_segmentation_engine, name, settings.max_threads, settings.min_chunk_bytes_for_parallel_parsing, + context->getApplicationType() == Context::ApplicationType::SERVER}; return std::make_shared(params); } @@ -391,6 +394,30 @@ void FormatFactory::registerOutputFormat(const String & name, OutputCreator outp target = std::move(output_creator); } +void FormatFactory::registerFileExtension(const String & extension, const String & format_name) +{ + file_extension_formats[extension] = format_name; +} + +String FormatFactory::getFormatFromFileName(String file_name) +{ + CompressionMethod compression_method = chooseCompressionMethod(file_name, ""); + if (CompressionMethod::None != compression_method) + { + auto pos = file_name.find_last_of('.'); + if (pos != String::npos) + file_name = file_name.substr(0, pos); + } + + auto pos = file_name.find_last_of('.'); + if (pos == String::npos) + return ""; + + String file_extension = file_name.substr(pos + 1, String::npos); + boost::algorithm::to_lower(file_extension); + return file_extension_formats[file_extension]; +} + void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine) { auto & target = dict[name].file_segmentation_engine; diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index a62b32da0cc..229adbbb263 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -108,6 +109,7 @@ private: }; using FormatsDictionary = std::unordered_map; + using FileExtensionFormats = std::unordered_map; public: static FormatFactory & instance(); @@ -169,6 +171,10 @@ public: void registerInputFormat(const String & name, InputCreator input_creator); void registerOutputFormat(const String & name, OutputCreator output_creator); + /// Register file extension for format + void registerFileExtension(const String & extension, const String & format_name); + String getFormatFromFileName(String file_name); + /// Register schema readers for format its name. void registerSchemaReader(const String & name, SchemaReaderCreator schema_reader_creator); void registerExternalSchemaReader(const String & name, ExternalSchemaReaderCreator external_schema_reader_creator); @@ -192,6 +198,7 @@ public: private: FormatsDictionary dict; + FileExtensionFormats file_extension_formats; const Creators & getCreators(const String & name) const; diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 1349c9e3323..289b5965455 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -196,6 +196,16 @@ void registerFormats() registerTSKVSchemaReader(factory); registerValuesSchemaReader(factory); registerTemplateSchemaReader(factory); + + factory.registerFileExtension("csv", "CSV"); + factory.registerFileExtension("tsv", "TSV"); + factory.registerFileExtension("parquet", "Parquet"); + factory.registerFileExtension("orc", "ORC"); + factory.registerFileExtension("native", "Native"); + factory.registerFileExtension("json", "JSON"); + factory.registerFileExtension("ndjson", "JSONEachRow"); + factory.registerFileExtension("xml", "XML"); + factory.registerFileExtension("avro", "Avro"); } } diff --git a/src/Functions/h3CellAreaM2.cpp b/src/Functions/h3CellAreaM2.cpp new file mode 100644 index 00000000000..d110d0d92f9 --- /dev/null +++ b/src/Functions/h3CellAreaM2.cpp @@ -0,0 +1,90 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +class FunctionH3CellAreaM2 final : public IFunction +{ +public: + static constexpr auto name = "h3CellAreaM2"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * column = checkAndGetColumn(arguments[0].column.get()); + if (!column) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64.", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data = column->getData(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (size_t row = 0; row < input_rows_count; ++row) + { + const UInt64 index = data[row]; + Float64 res = cellAreaM2(index); + dst_data[row] = res; + } + + return dst; + } +}; + +} + +void registerFunctionH3CellAreaM2(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/h3CellAreaRads2.cpp b/src/Functions/h3CellAreaRads2.cpp new file mode 100644 index 00000000000..1a257b0d9d3 --- /dev/null +++ b/src/Functions/h3CellAreaRads2.cpp @@ -0,0 +1,90 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +class FunctionH3CellAreaRads2 final : public IFunction +{ +public: + static constexpr auto name = "h3CellAreaRads2"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), 1, getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * column = checkAndGetColumn(arguments[0].column.get()); + if (!column) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data = column->getData(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (size_t row = 0; row < input_rows_count; ++row) + { + const UInt64 index = data[row]; + Float64 res = cellAreaRads2(index); + dst_data[row] = res; + } + + return dst; + } +}; + +} + +void registerFunctionH3CellAreaRads2(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/h3DegsToRads.cpp b/src/Functions/h3DegsToRads.cpp new file mode 100644 index 00000000000..b3afc28f5a2 --- /dev/null +++ b/src/Functions/h3DegsToRads.cpp @@ -0,0 +1,90 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +class FunctionH3DegsToRads final : public IFunction +{ +public: + static constexpr auto name = "h3DegsToRads"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForConstants() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isFloat64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), 1, getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * column = checkAndGetColumn(arguments[0].column.get()); + + if (!column) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be Float64", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data = column->getData(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (size_t row = 0; row < input_rows_count; ++row) + { + const Float64 degrees = data[row]; + auto res = degsToRads(degrees); + dst_data[row] = res; + } + + return dst; + } +}; + +} + +void registerFunctionH3DegsToRads(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/h3HexAreaKm2.cpp b/src/Functions/h3HexAreaKm2.cpp new file mode 100644 index 00000000000..933fcf21424 --- /dev/null +++ b/src/Functions/h3HexAreaKm2.cpp @@ -0,0 +1,99 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int ARGUMENT_OUT_OF_BOUND; +extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +class FunctionH3HexAreaKm2 final : public IFunction +{ +public: + static constexpr auto name = "h3HexAreaKm2"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isUInt8()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arg->getName(), 1, getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * column = checkAndGetColumn(arguments[0].column.get()); + if (!column) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt8", + arguments[0].column->getName(), + 1, + getName()); + + const auto & data = column->getData(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (size_t row = 0; row < input_rows_count; ++row) + { + const UInt64 resolution = data[row]; + if (resolution > MAX_H3_RES) + throw Exception( + ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + resolution, + getName(), + MAX_H3_RES); + + Float64 res = getHexagonAreaAvgKm2(resolution); + dst_data[row] = res; + } + + return dst; + } +}; + +} + +void registerFunctionH3HexAreaKm2(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/h3RadsToDegs.cpp b/src/Functions/h3RadsToDegs.cpp new file mode 100644 index 00000000000..99b8969e13f --- /dev/null +++ b/src/Functions/h3RadsToDegs.cpp @@ -0,0 +1,88 @@ +#include "config_functions.h" + +#if USE_H3 + +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int ILLEGAL_COLUMN; +} + +namespace +{ + +class FunctionH3RadsToDegs final : public IFunction +{ +public: + static constexpr auto name = "h3RadsToDegs"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + std::string getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool useDefaultImplementationForConstants() const override { return true; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const auto * arg = arguments[0].get(); + if (!WhichDataType(arg).isFloat64()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument {} of function {}. Must be Float64", + arg->getName(), 1, getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const auto * column = checkAndGetColumn(arguments[0].column.get()); + if (!column) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be Float64", + arguments[0].type->getName(), + 1, + getName()); + + const auto & col_rads = column->getData(); + + auto dst = ColumnVector::create(); + auto & dst_data = dst->getData(); + dst_data.resize(input_rows_count); + + for (size_t row = 0; row < input_rows_count; ++row) + { + const Float64 rads = col_rads[row]; + auto res = radsToDegs(rads); + dst_data[row] = res; + } + return dst; + } +}; + +} + +void registerFunctionH3RadsToDegs(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index b24dc4cb9c0..d0bb47ea3d7 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -43,6 +43,12 @@ void registerFunctionH3HexAreaM2(FunctionFactory &); void registerFunctionH3IsResClassIII(FunctionFactory &); void registerFunctionH3IsPentagon(FunctionFactory &); void registerFunctionH3GetFaces(FunctionFactory &); +void registerFunctionH3DegsToRads(FunctionFactory &); +void registerFunctionH3RadsToDegs(FunctionFactory &); +void registerFunctionH3HexAreaKm2(FunctionFactory &); +void registerFunctionH3CellAreaM2(FunctionFactory &); +void registerFunctionH3CellAreaRads2(FunctionFactory &); + #endif #if USE_S2_GEOMETRY @@ -99,6 +105,11 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionH3IsResClassIII(factory); registerFunctionH3IsPentagon(factory); registerFunctionH3GetFaces(factory); + registerFunctionH3DegsToRads(factory); + registerFunctionH3RadsToDegs(factory); + registerFunctionH3HexAreaKm2(factory); + registerFunctionH3CellAreaM2(factory); + registerFunctionH3CellAreaRads2(factory); #endif #if USE_S2_GEOMETRY diff --git a/src/Functions/replicate.h b/src/Functions/replicate.h index 2455fda39c9..6012207980e 100644 --- a/src/Functions/replicate.h +++ b/src/Functions/replicate.h @@ -34,6 +34,8 @@ public: bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override; diff --git a/src/IO/examples/write_buffer.cpp b/src/IO/examples/write_buffer.cpp index 5587b8aa1a2..bca0be24b1a 100644 --- a/src/IO/examples/write_buffer.cpp +++ b/src/IO/examples/write_buffer.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 7049dfc03f6..8e0f73f0b31 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1977,6 +1977,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc if (!options.ignore_quota && (options.to_stage == QueryProcessingStage::Complete)) quota = context->getQuota(); + query_info.settings_limit_offset_done = options.settings_limit_offset_done; storage->read(query_plan, required_columns, metadata_snapshot, query_info, context, processing_stage, max_block_size, max_streams); if (context->hasQueryContext() && !options.is_internal) diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 6779093a53d..e4b3e62c358 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -83,7 +83,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( } } - if (num_children == 1 && settings_limit_offset_needed) + if (num_children == 1 && settings_limit_offset_needed && !options.settings_limit_offset_done) { const ASTPtr first_select_ast = ast->list_of_selects->children.at(0); ASTSelectQuery * select_query = dynamic_cast(first_select_ast.get()); @@ -127,7 +127,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(new_limit_length_ast)); } - settings_limit_offset_done = true; + options.settings_limit_offset_done = true; } } @@ -305,7 +305,7 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan) } } - if (settings_limit_offset_needed && !settings_limit_offset_done) + if (settings_limit_offset_needed && !options.settings_limit_offset_done) { if (settings.limit > 0) { diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp index 02af07bc00c..efa0e921527 100644 --- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp +++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp @@ -13,7 +13,7 @@ #include #include - +#if USE_MYSQL using namespace DB; static inline ASTPtr tryRewrittenCreateQuery(const String & query, ContextPtr context) @@ -255,3 +255,4 @@ TEST(MySQLCreateRewritten, QueryWithEnum) std::string(MATERIALIZEDMYSQL_TABLE_COLUMNS) + ") ENGINE = ReplacingMergeTree(_version) PARTITION BY intDiv(key, 4294967) ORDER BY tuple(key)"); } +#endif diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index e7e52142fc8..37b2992d657 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -76,6 +77,7 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as { std::unique_lock lock(mutex); + IAST::QueryKind query_kind = ast->getQueryKind(); const auto queue_max_wait_ms = settings.queue_max_wait_ms.totalMilliseconds(); if (!is_unlimited_query && max_size && processes.size() >= max_size) @@ -86,15 +88,14 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as throw Exception("Too many simultaneous queries. Maximum: " + toString(max_size), ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); } - String query_kind{ast->getQueryKindString()}; if (!is_unlimited_query) { - auto amount = getQueryKindAmount(query_kind); - if (max_insert_queries_amount && query_kind == "Insert" && amount >= max_insert_queries_amount) + QueryAmount amount = getQueryKindAmount(query_kind); + if (max_insert_queries_amount && query_kind == IAST::QueryKind::Insert && amount >= max_insert_queries_amount) throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous insert queries. Maximum: {}, current: {}", max_insert_queries_amount, amount); - if (max_select_queries_amount && query_kind == "Select" && amount >= max_select_queries_amount) + if (max_select_queries_amount && query_kind == IAST::QueryKind::Select && amount >= max_select_queries_amount) throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous select queries. Maximum: {}, current: {}", max_select_queries_amount, amount); @@ -258,7 +259,7 @@ ProcessListEntry::~ProcessListEntry() String user = it->getClientInfo().current_user; String query_id = it->getClientInfo().current_query_id; - String query_kind = it->query_kind; + IAST::QueryKind query_kind = it->query_kind; const QueryStatus * process_list_element_ptr = &*it; @@ -306,7 +307,7 @@ ProcessListEntry::~ProcessListEntry() QueryStatus::QueryStatus( - ContextPtr context_, const String & query_, const ClientInfo & client_info_, QueryPriorities::Handle && priority_handle_, const String & query_kind_) + ContextPtr context_, const String & query_, const ClientInfo & client_info_, QueryPriorities::Handle && priority_handle_, IAST::QueryKind query_kind_) : WithContext(context_) , query(query_) , client_info(client_info_) @@ -505,7 +506,7 @@ ProcessList::UserInfo ProcessList::getUserInfo(bool get_profile_events) const return per_user_infos; } -void ProcessList::increaseQueryKindAmount(const String & query_kind) +void ProcessList::increaseQueryKindAmount(const IAST::QueryKind & query_kind) { auto found = query_kind_amounts.find(query_kind); if (found == query_kind_amounts.end()) @@ -514,7 +515,7 @@ void ProcessList::increaseQueryKindAmount(const String & query_kind) found->second += 1; } -void ProcessList::decreaseQueryKindAmount(const String & query_kind) +void ProcessList::decreaseQueryKindAmount(const IAST::QueryKind & query_kind) { auto found = query_kind_amounts.find(query_kind); /// TODO: we could just rebuild the map, as we have saved all query_kind. @@ -524,9 +525,9 @@ void ProcessList::decreaseQueryKindAmount(const String & query_kind) throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong query kind amount: decrease to negative on '{}'", query_kind, found->second); else found->second -= 1; - } -ProcessList::QueryAmount ProcessList::getQueryKindAmount(const String & query_kind) + +ProcessList::QueryAmount ProcessList::getQueryKindAmount(const IAST::QueryKind & query_kind) const { auto found = query_kind_amounts.find(query_kind); if (found == query_kind_amounts.end()) diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index ada24c03275..545e5b07345 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -118,7 +119,7 @@ protected: ProcessListForUser * user_process_list = nullptr; - String query_kind; + IAST::QueryKind query_kind; public: @@ -127,7 +128,7 @@ public: const String & query_, const ClientInfo & client_info_, QueryPriorities::Handle && priority_handle_, - const String & query_kind_ + IAST::QueryKind query_kind_ ); ~QueryStatus(); @@ -270,7 +271,7 @@ public: /// User -> queries using UserToQueries = std::unordered_map; - using QueryKindToAmount = std::unordered_map; + using QueryKindAmounts = std::unordered_map; protected: friend class ProcessListEntry; @@ -301,11 +302,11 @@ protected: size_t max_select_queries_amount = 0; /// amount of queries by query kind. - QueryKindToAmount query_kind_amounts; + QueryKindAmounts query_kind_amounts; - void increaseQueryKindAmount(const String & query_kind); - void decreaseQueryKindAmount(const String & query_kind); - QueryAmount getQueryKindAmount(const String & query_kind); + void increaseQueryKindAmount(const IAST::QueryKind & query_kind); + void decreaseQueryKindAmount(const IAST::QueryKind & query_kind); + QueryAmount getQueryKindAmount(const IAST::QueryKind & query_kind) const; public: using EntryPtr = std::shared_ptr; diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index bc95a940c18..ee708b064bd 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -48,6 +48,7 @@ struct SelectQueryOptions bool is_internal = false; bool is_subquery = false; // non-subquery can also have subquery_depth > 0, e.g. insert select bool with_all_cols = false; /// asterisk include materialized and aliased columns + bool settings_limit_offset_done = false; /// These two fields are used to evaluate shardNum() and shardCount() function when /// prefer_localhost_replica == 1 and local instance is selected. They are needed because local @@ -58,8 +59,10 @@ struct SelectQueryOptions SelectQueryOptions( QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0, - bool is_subquery_ = false) - : to_stage(stage), subquery_depth(depth), is_subquery(is_subquery_) + bool is_subquery_ = false, + bool settings_limit_offset_done_ = false) + : to_stage(stage), subquery_depth(depth), is_subquery(is_subquery_), + settings_limit_offset_done(settings_limit_offset_done_) {} SelectQueryOptions copy() const { return *this; } diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index d6342e3973e..46254d0c3a2 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -168,6 +168,8 @@ public: void shutdown() override { stopFlushThread(); + + auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext()); if (table) table->flushAndShutdown(); } @@ -186,7 +188,6 @@ private: /* Saving thread data */ const StorageID table_id; const String storage_def; - StoragePtr table; String create_query; String old_create_query; bool is_prepared = false; @@ -525,7 +526,7 @@ void SystemLog::prepareTable() { String description = table_id.getNameForLogs(); - table = DatabaseCatalog::instance().tryGetTable(table_id, getContext()); + auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext()); if (table) { diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 7dcfc4b95b3..9770d1a988f 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -57,6 +57,8 @@ #include #include +#include + #include @@ -271,7 +273,7 @@ static void onExceptionBeforeStart(const String & query_for_logging, ContextPtr // Try log query_kind if ast is valid if (ast) { - elem.query_kind = ast->getQueryKindString(); + elem.query_kind = magic_enum::enum_name(ast->getQueryKind()); if (settings.log_formatted_queries) elem.formatted_query = queryToString(ast); } diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 2ae494854ec..f53c39b192f 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -11,6 +11,11 @@ namespace ErrorCodes extern const int UNEXPECTED_AST_STRUCTURE; } +String ASTAlterCommand::getID(char delim) const +{ + return String("AlterCommand") + delim + typeToString(type); +} + ASTPtr ASTAlterCommand::clone() const { auto res = std::make_shared(*this); @@ -75,6 +80,53 @@ ASTPtr ASTAlterCommand::clone() const return res; } +const char * ASTAlterCommand::typeToString(ASTAlterCommand::Type type) +{ + switch (type) + { + case ADD_COLUMN: return "ADD_COLUMN"; + case DROP_COLUMN: return "DROP_COLUMN"; + case MODIFY_COLUMN: return "MODIFY_COLUMN"; + case COMMENT_COLUMN: return "COMMENT_COLUMN"; + case RENAME_COLUMN: return "RENAME_COLUMN"; + case MATERIALIZE_COLUMN: return "MATERIALIZE_COLUMN"; + case MODIFY_ORDER_BY: return "MODIFY_ORDER_BY"; + case MODIFY_SAMPLE_BY: return "MODIFY_SAMPLE_BY"; + case MODIFY_TTL: return "MODIFY_TTL"; + case MATERIALIZE_TTL: return "MATERIALIZE_TTL"; + case MODIFY_SETTING: return "MODIFY_SETTING"; + case RESET_SETTING: return "RESET_SETTING"; + case MODIFY_QUERY: return "MODIFY_QUERY"; + case REMOVE_TTL: return "REMOVE_TTL"; + case REMOVE_SAMPLE_BY: return "REMOVE_SAMPLE_BY"; + case ADD_INDEX: return "ADD_INDEX"; + case DROP_INDEX: return "DROP_INDEX"; + case MATERIALIZE_INDEX: return "MATERIALIZE_INDEX"; + case ADD_CONSTRAINT: return "ADD_CONSTRAINT"; + case DROP_CONSTRAINT: return "DROP_CONSTRAINT"; + case ADD_PROJECTION: return "ADD_PROJECTION"; + case DROP_PROJECTION: return "DROP_PROJECTION"; + case MATERIALIZE_PROJECTION: return "MATERIALIZE_PROJECTION"; + case DROP_PARTITION: return "DROP_PARTITION"; + case DROP_DETACHED_PARTITION: return "DROP_DETACHED_PARTITION"; + case ATTACH_PARTITION: return "ATTACH_PARTITION"; + case MOVE_PARTITION: return "MOVE_PARTITION"; + case REPLACE_PARTITION: return "REPLACE_PARTITION"; + case FETCH_PARTITION: return "FETCH_PARTITION"; + case FREEZE_PARTITION: return "FREEZE_PARTITION"; + case FREEZE_ALL: return "FREEZE_ALL"; + case UNFREEZE_PARTITION: return "UNFREEZE_PARTITION"; + case UNFREEZE_ALL: return "UNFREEZE_ALL"; + case DELETE: return "DELETE"; + case UPDATE: return "UPDATE"; + case NO_TYPE: return "NO_TYPE"; + case LIVE_VIEW_REFRESH: return "LIVE_VIEW_REFRESH"; + case MODIFY_DATABASE_SETTING: return "MODIFY_DATABASE_SETTING"; + case MODIFY_COMMENT: return "MODIFY_COMMENT"; + } + __builtin_unreachable(); +} + void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { if (type == ASTAlterCommand::ADD_COLUMN) diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index b0980c20f5e..976ccd1e2bf 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -204,10 +204,12 @@ public: /// Which property user want to remove String remove_property; - String getID(char delim) const override { return "AlterCommand" + (delim + std::to_string(static_cast(type))); } + String getID(char delim) const override; ASTPtr clone() const override; + static const char * typeToString(Type type); + protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; @@ -246,7 +248,7 @@ public: return removeOnCluster(clone(), new_database); } - const char * getQueryKindString() const override { return "Alter"; } + virtual QueryKind getQueryKind() const override { return QueryKind::Alter; } protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 2e35731acad..fcc4107bb5f 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -119,7 +119,7 @@ public: bool isView() const { return is_ordinary_view || is_materialized_view || is_live_view || is_window_view; } - const char * getQueryKindString() const override { return "Create"; } + virtual QueryKind getQueryKind() const override { return QueryKind::Create; } protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTDropQuery.h b/src/Parsers/ASTDropQuery.h index 6e5fd5854d8..2e67eaf3692 100644 --- a/src/Parsers/ASTDropQuery.h +++ b/src/Parsers/ASTDropQuery.h @@ -45,7 +45,7 @@ public: return removeOnCluster(clone(), new_database); } - const char * getQueryKindString() const override { return "Drop"; } + virtual QueryKind getQueryKind() const override { return QueryKind::Drop; } protected: void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp index c733398a32b..7e1d48d7f55 100644 --- a/src/Parsers/ASTInsertQuery.cpp +++ b/src/Parsers/ASTInsertQuery.cpp @@ -79,6 +79,13 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s settings.ostr << ")"; } + if (infile) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM INFILE " << (settings.hilite ? hilite_none : "") << infile->as().value.safeGet(); + if (compression) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " COMPRESSION " << (settings.hilite ? hilite_none : "") << compression->as().value.safeGet(); + } + if (select) { settings.ostr << " "; @@ -91,12 +98,6 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s } else { - if (infile) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM INFILE " << (settings.hilite ? hilite_none : "") << infile->as().value.safeGet(); - if (compression) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " COMPRESSION " << (settings.hilite ? hilite_none : "") << compression->as().value.safeGet(); - } if (!format.empty()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " FORMAT " << (settings.hilite ? hilite_none : "") << format; diff --git a/src/Parsers/ASTInsertQuery.h b/src/Parsers/ASTInsertQuery.h index d539ad5fdb3..db9262ea794 100644 --- a/src/Parsers/ASTInsertQuery.h +++ b/src/Parsers/ASTInsertQuery.h @@ -66,7 +66,7 @@ public: return res; } - const char * getQueryKindString() const override { return "Insert"; } + virtual QueryKind getQueryKind() const override { return QueryKind::Insert; } protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTRenameQuery.h b/src/Parsers/ASTRenameQuery.h index dafc166f672..01ab0df9774 100644 --- a/src/Parsers/ASTRenameQuery.h +++ b/src/Parsers/ASTRenameQuery.h @@ -65,7 +65,7 @@ public: return query_ptr; } - const char * getQueryKindString() const override { return "Rename"; } + virtual QueryKind getQueryKind() const override { return QueryKind::Rename; } protected: void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override diff --git a/src/Parsers/ASTSelectIntersectExceptQuery.h b/src/Parsers/ASTSelectIntersectExceptQuery.h index 86475fcba5c..fa574b46c8d 100644 --- a/src/Parsers/ASTSelectIntersectExceptQuery.h +++ b/src/Parsers/ASTSelectIntersectExceptQuery.h @@ -22,7 +22,7 @@ public: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; - const char * getQueryKindString() const override { return "SelectIntersectExcept"; } + virtual QueryKind getQueryKind() const override { return QueryKind::SelectIntersectExcept; } ASTs getListOfSelects() const; diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 2b004e9e040..1c631783fdb 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -135,7 +135,7 @@ public: void setFinal(); - const char * getQueryKindString() const override { return "Select"; } + virtual QueryKind getQueryKind() const override { return QueryKind::Select; } protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index 3fc8f9171c0..bd45dd7fc05 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -17,7 +17,7 @@ public: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; - const char * getQueryKindString() const override { return "Select"; } + virtual QueryKind getQueryKind() const override { return QueryKind::Select; } SelectUnionMode union_mode; diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 16f8a3c118a..22488e35e12 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -107,7 +107,7 @@ public: return removeOnCluster(clone(), new_database); } - const char * getQueryKindString() const override { return "System"; } + virtual QueryKind getQueryKind() const override { return QueryKind::System; } protected: diff --git a/src/Parsers/Access/ASTGrantQuery.h b/src/Parsers/Access/ASTGrantQuery.h index f8ea9b478fe..934d619fc36 100644 --- a/src/Parsers/Access/ASTGrantQuery.h +++ b/src/Parsers/Access/ASTGrantQuery.h @@ -34,6 +34,6 @@ public: void replaceEmptyDatabase(const String & current_database); void replaceCurrentUserTag(const String & current_user_name) const; ASTPtr getRewrittenASTWithoutOnCluster(const std::string &) const override { return removeOnCluster(clone()); } - const char * getQueryKindString() const override { return is_revoke ? "Revoke" : "Grant"; } + virtual QueryKind getQueryKind() const override { return is_revoke ? QueryKind::Revoke : QueryKind::Grant; } }; } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index 2f30a1f7bee..fdf821c4a0b 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -245,10 +245,23 @@ public: void cloneChildren(); - // Return query_kind string representation of this AST query. - virtual const char * getQueryKindString() const { return ""; } + enum class QueryKind : uint8_t + { + None = 0, + Alter, + Create, + Drop, + Grant, + Insert, + Rename, + Revoke, + SelectIntersectExcept, + Select, + System, + }; + /// Return QueryKind of this AST query. + virtual QueryKind getQueryKind() const { return QueryKind::None; } -public: /// For syntax highlighting. static const char * hilite_keyword; static const char * hilite_identifier; diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index f2f8226c818..568f486a5cf 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -116,7 +116,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// Check if file is a source of data. if (s_from_infile.ignore(pos, expected)) { - /// Read its name to process it later + /// Read file name to process it later if (!infile_name_p.parse(pos, infile, expected)) return false; @@ -133,7 +133,8 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Pos before_values = pos; String format_str; - /// VALUES or FROM INFILE or FORMAT or SELECT + /// VALUES or FORMAT or SELECT or WITH or WATCH. + /// After FROM INFILE we expect FORMAT, SELECT, WITH or nothing. if (!infile && s_values.ignore(pos, expected)) { /// If VALUES is defined in query, everything except setting will be parsed as data @@ -162,21 +163,17 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) tryGetIdentifierNameInto(format, format_str); } - else if (s_watch.ignore(pos, expected)) + else if (!infile && s_watch.ignore(pos, expected)) { /// If WATCH is defined, return to position before WATCH and parse /// rest of query as WATCH query. pos = before_values; ParserWatchQuery watch_p; watch_p.parse(pos, watch, expected); - - /// FORMAT section is expected if we have input() in SELECT part - if (s_format.ignore(pos, expected) && !name_p.parse(pos, format, expected)) - return false; } - else + else if (!infile) { - /// If all previous conditions were false, query is incorrect + /// If all previous conditions were false and it's not FROM INFILE, query is incorrect return false; } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index aa181ea0b8b..1edf5432c98 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -80,6 +80,9 @@ static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { std::shared_ptr chunk = arrow_column->chunk(chunk_i); + if (chunk->length() == 0) + continue; + /// buffers[0] is a null bitmap and buffers[1] are actual values std::shared_ptr buffer = chunk->data()->buffers[1]; @@ -146,6 +149,9 @@ static ColumnWithTypeAndName readColumnWithBooleanData(std::shared_ptr(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) { arrow::BooleanArray & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); + if (chunk.length() == 0) + continue; + /// buffers[0] is a null bitmap and buffers[1] are actual values std::shared_ptr buffer = chunk.data()->buffers[1]; diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp index 4be91299bdb..213226c9d68 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp @@ -137,7 +137,8 @@ void ParallelParsingInputFormat::onBackgroundException(size_t offset) if (e->getLineNumber() != -1) e->setLineNumber(e->getLineNumber() + offset); } - tryLogCurrentException(__PRETTY_FUNCTION__); + if (is_server) + tryLogCurrentException(__PRETTY_FUNCTION__); parsing_finished = true; first_parser_finished.set(); reader_condvar.notify_all(); diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h index 264beba8589..5efdaf1b0b7 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h @@ -82,6 +82,7 @@ public: String format_name; size_t max_threads; size_t min_chunk_bytes; + bool is_server; }; explicit ParallelParsingInputFormat(Params params) @@ -90,6 +91,7 @@ public: , file_segmentation_engine(params.file_segmentation_engine) , format_name(params.format_name) , min_chunk_bytes(params.min_chunk_bytes) + , is_server(params.is_server) , pool(params.max_threads) { // One unit for each thread, including segmentator and reader, plus a @@ -203,6 +205,8 @@ private: std::atomic parsing_started{false}; std::atomic parsing_finished{false}; + const bool is_server; + /// There are multiple "parsers", that's why we use thread pool. ThreadPool pool; /// Reading and segmentating the file diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp new file mode 100644 index 00000000000..38d3fa30b42 --- /dev/null +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -0,0 +1,493 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +using namespace std::literals; + +namespace DB::ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_ELEMENT_IN_CONFIG; + extern const int NO_ELEMENTS_IN_CONFIG; + } + +namespace DB::Graphite +{ +static std::unordered_map ruleTypeMap = +{ + { RuleTypeAll, "all" }, + { RuleTypePlain, "plain" }, + { RuleTypeTagged, "tagged"}, + { RuleTypeTagList, "tag_list"} +}; + +const String & ruleTypeStr(RuleType rule_type) +{ + try + { + return ruleTypeMap.at(rule_type); + } + catch (...) + { + throw Exception("invalid rule type: " + std::to_string(rule_type), DB::ErrorCodes::BAD_ARGUMENTS); + } +} + +RuleType ruleType(const String & s) +{ + if (s == "all") + return RuleTypeAll; + else if (s == "plain") + return RuleTypePlain; + else if (s == "tagged") + return RuleTypeTagged; + else if (s == "tag_list") + return RuleTypeTagList; + else + throw Exception("invalid rule type: " + s, DB::ErrorCodes::BAD_ARGUMENTS); +} + +static const Graphite::Pattern undef_pattern = +{ /// empty pattern for selectPatternForPath + .rule_type = RuleTypeAll, + .regexp = nullptr, + .regexp_str = "", + .function = nullptr, + .retentions = Graphite::Retentions(), + .type = undef_pattern.TypeUndef, +}; + +inline static const Patterns & selectPatternsForMetricType(const Graphite::Params & params, const StringRef path) +{ + if (params.patterns_typed) + { + std::string_view path_view = path.toView(); + if (path_view.find("?"sv) == path_view.npos) + return params.patterns_plain; + else + return params.patterns_tagged; + } + else + { + return params.patterns; + } +} + +Graphite::RollupRule selectPatternForPath( + const Graphite::Params & params, + const StringRef path) +{ + const Graphite::Pattern * first_match = &undef_pattern; + + const Patterns & patterns_check = selectPatternsForMetricType(params, path); + + for (const auto & pattern : patterns_check) + { + if (!pattern.regexp) + { + /// Default pattern + if (first_match->type == first_match->TypeUndef && pattern.type == pattern.TypeAll) + { + /// There is only default pattern for both retention and aggregation + return std::pair(&pattern, &pattern); + } + if (pattern.type != first_match->type) + { + if (first_match->type == first_match->TypeRetention) + { + return std::pair(first_match, &pattern); + } + if (first_match->type == first_match->TypeAggregation) + { + return std::pair(&pattern, first_match); + } + } + } + else + { + if (pattern.regexp->match(path.data, path.size)) + { + /// General pattern with matched path + if (pattern.type == pattern.TypeAll) + { + /// Only for not default patterns with both function and retention parameters + return std::pair(&pattern, &pattern); + } + if (first_match->type == first_match->TypeUndef) + { + first_match = &pattern; + continue; + } + if (pattern.type != first_match->type) + { + if (first_match->type == first_match->TypeRetention) + { + return std::pair(first_match, &pattern); + } + if (first_match->type == first_match->TypeAggregation) + { + return std::pair(&pattern, first_match); + } + } + } + } + } + + return {nullptr, nullptr}; +} + +/** Is used to order Graphite::Retentions by age and precision descending. + * Throws exception if not both age and precision are less or greater then another. + */ +static bool compareRetentions(const Retention & a, const Retention & b) +{ + if (a.age > b.age && a.precision > b.precision) + { + return true; + } + else if (a.age < b.age && a.precision < b.precision) + { + return false; + } + String error_msg = "age and precision should only grow up: " + + std::to_string(a.age) + ":" + std::to_string(a.precision) + " vs " + + std::to_string(b.age) + ":" + std::to_string(b.precision); + throw Exception( + error_msg, + DB::ErrorCodes::BAD_ARGUMENTS); +} + +bool operator==(const Retention & a, const Retention & b) +{ + return a.age == b.age && a.precision == b.precision; +} + +std::ostream & operator<<(std::ostream & stream, const Retentions & a) +{ + stream << "{ "; + for (size_t i = 0; i < a.size(); i++) + { + if (i > 0) + stream << ","; + stream << " { age = " << a[i].age << ", precision = " << a[i].precision << " }"; + } + stream << " }"; + + return stream; +} + +bool operator==(const Pattern & a, const Pattern & b) +{ + // equal + // Retentions retentions; /// Must be ordered by 'age' descending. + if (a.type != b.type || a.regexp_str != b.regexp_str || a.rule_type != b.rule_type) + return false; + + if (a.function == nullptr) + { + if (b.function != nullptr) + return false; + } + else if (b.function == nullptr) + { + return false; + } + else if (a.function->getName() != b.function->getName()) + { + return false; + } + + return a.retentions == b.retentions; +} + +std::ostream & operator<<(std::ostream & stream, const Pattern & a) +{ + stream << "{ rule_type = " << ruleTypeStr(a.rule_type); + if (!a.regexp_str.empty()) + stream << ", regexp = '" << a.regexp_str << "'"; + if (a.function != nullptr) + stream << ", function = " << a.function->getName(); + if (!a.retentions.empty()) + { + stream << ",\n retentions = {\n"; + for (size_t i = 0; i < a.retentions.size(); i++) + { + stream << " { " << a.retentions[i].age << ", " << a.retentions[i].precision << " }"; + if (i < a.retentions.size() - 1) + stream << ","; + stream << "\n"; + } + stream << " }\n"; + } + else + stream << " "; + + stream << "}"; + return stream; +} + +std::string buildTaggedRegex(std::string regexp_str) +{ + /* + * tags list in format (for name or any value can use regexp, alphabet sorting not needed) + * spaces are not stiped and used as tag and value part + * name must be first (if used) + * + * tag1=value1; tag2=VALUE2_REGEX;tag3=value3 + * or + * name;tag1=value1;tag2=VALUE2_REGEX;tag3=value3 + * or for one tag + * tag1=value1 + * + * Resulting regex against metric like + * name?tag1=value1&tag2=value2 + * + * So, + * + * name + * produce + * name\? + * + * tag2=val2 + * produce + * [\?&]tag2=val2(&.*)?$ + * + * nam.* ; tag1=val1 ; tag2=val2 + * produce + * nam.*\?(.*&)?tag1=val1&(.*&)?tag2=val2(&.*)?$ + */ + + std::vector tags; + + splitInto<';'>(tags, regexp_str); + /* remove empthy elements */ + using namespace std::string_literals; + tags.erase(std::remove(tags.begin(), tags.end(), ""s), tags.end()); + if (tags[0].find('=') == tags[0].npos) + { + if (tags.size() == 1) /* only name */ + return "^" + tags[0] + "\\?"; + /* start with name value */ + regexp_str = "^" + tags[0] + "\\?(.*&)?"; + tags.erase(std::begin(tags)); + } + else + regexp_str = "[\\?&]"; + + std::sort(std::begin(tags), std::end(tags)); /* sorted tag keys */ + regexp_str += fmt::format( + "{}{}", + fmt::join(tags, "&(.*&)?"), + "(&.*)?$" /* close regex */ + ); + + return regexp_str; +} + +/** Read the settings for Graphite rollup from config. + * Example + * + * + * Path + * + * click_cost + * any + * + * 0 + * 3600 + * + * + * 86400 + * 60 + * + * + * + * max + * + * 0 + * 60 + * + * + * 3600 + * 300 + * + * + * 86400 + * 3600 + * + * + * + */ +static const Pattern & +appendGraphitePattern( + const Poco::Util::AbstractConfiguration & config, + const String & config_element, Patterns & patterns, + bool default_rule, + ContextPtr context) +{ + Pattern pattern; + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_element, keys); + + for (const auto & key : keys) + { + if (key == "regexp") + { + pattern.regexp_str = config.getString(config_element + ".regexp"); + } + else if (key == "function") + { + String aggregate_function_name_with_params = config.getString(config_element + ".function"); + String aggregate_function_name; + Array params_row; + getAggregateFunctionNameAndParametersArray( + aggregate_function_name_with_params, aggregate_function_name, params_row, "GraphiteMergeTree storage initialization", context); + + /// TODO Not only Float64 + AggregateFunctionProperties properties; + pattern.function = AggregateFunctionFactory::instance().get( + aggregate_function_name, {std::make_shared()}, params_row, properties); + } + else if (key == "rule_type") + { + String rule_type = config.getString(config_element + ".rule_type"); + pattern.rule_type = ruleType(rule_type); + } + else if (startsWith(key, "retention")) + { + pattern.retentions.emplace_back(Graphite::Retention{ + .age = config.getUInt(config_element + "." + key + ".age"), + .precision = config.getUInt(config_element + "." + key + ".precision")}); + } + else + throw Exception("Unknown element in config: " + key, DB::ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + } + + if (!pattern.regexp_str.empty()) + { + if (pattern.rule_type == RuleTypeTagList) + { + // construct tagged regexp + pattern.regexp_str = buildTaggedRegex(pattern.regexp_str); + pattern.rule_type = RuleTypeTagged; + } + pattern.regexp = std::make_shared(pattern.regexp_str); + } + + if (!pattern.function && pattern.retentions.empty()) + throw Exception( + "At least one of an aggregate function or retention rules is mandatory for rollup patterns in GraphiteMergeTree", + DB::ErrorCodes::NO_ELEMENTS_IN_CONFIG); + + if (default_rule && pattern.rule_type != RuleTypeAll) + { + throw Exception( + "Default must have rule_type all for rollup patterns in GraphiteMergeTree", + DB::ErrorCodes::BAD_ARGUMENTS); + } + + if (!pattern.function) + { + pattern.type = pattern.TypeRetention; + } + else if (pattern.retentions.empty()) + { + pattern.type = pattern.TypeAggregation; + } + else + { + pattern.type = pattern.TypeAll; + } + + if (pattern.type & pattern.TypeAggregation) /// TypeAggregation or TypeAll + if (pattern.function->allocatesMemoryInArena()) + throw Exception( + "Aggregate function " + pattern.function->getName() + " isn't supported in GraphiteMergeTree", DB::ErrorCodes::NOT_IMPLEMENTED); + + /// retention should be in descending order of age. + if (pattern.type & pattern.TypeRetention) /// TypeRetention or TypeAll + std::sort(pattern.retentions.begin(), pattern.retentions.end(), compareRetentions); + + patterns.emplace_back(pattern); + return patterns.back(); +} + +void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params) +{ + const auto & config = context->getConfigRef(); + + if (!config.has(config_element)) + throw Exception("No '" + config_element + "' element in configuration file", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + + params.config_name = config_element; + params.path_column_name = config.getString(config_element + ".path_column_name", "Path"); + params.time_column_name = config.getString(config_element + ".time_column_name", "Time"); + params.value_column_name = config.getString(config_element + ".value_column_name", "Value"); + params.version_column_name = config.getString(config_element + ".version_column_name", "Timestamp"); + + params.patterns_typed = false; + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_element, keys); + + for (const auto & key : keys) + { + if (startsWith(key, "pattern")) + { + if (appendGraphitePattern(config, config_element + "." + key, params.patterns, false, context).rule_type != RuleTypeAll) + params.patterns_typed = true; + } + else if (key == "default") + { + /// See below. + } + else if (key == "path_column_name" || key == "time_column_name" || key == "value_column_name" || key == "version_column_name") + { + /// See above. + } + else + throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + } + + if (config.has(config_element + ".default")) + appendGraphitePattern(config, config_element + "." + ".default", params.patterns, true, context); + + for (const auto & pattern : params.patterns) + { + if (pattern.rule_type == RuleTypeAll) + { + if (params.patterns_typed) + { + params.patterns_plain.push_back(pattern); + params.patterns_tagged.push_back(pattern); + } + } + else if (pattern.rule_type == RuleTypePlain) + { + params.patterns_plain.push_back(pattern); + } + else if (pattern.rule_type == RuleTypeTagged) + { + params.patterns_tagged.push_back(pattern); + } + else + { + throw Exception("Unhandled rule_type in config: " + ruleTypeStr(pattern.rule_type), ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + } + } +} + +} diff --git a/src/Processors/Merges/Algorithms/Graphite.h b/src/Processors/Merges/Algorithms/Graphite.h index ecb1aeb9804..dc39cb46386 100644 --- a/src/Processors/Merges/Algorithms/Graphite.h +++ b/src/Processors/Merges/Algorithms/Graphite.h @@ -1,13 +1,8 @@ #pragma once + +#include #include - -namespace DB -{ - -class IAggregateFunction; -using AggregateFunctionPtr = std::shared_ptr; - -} +#include /** Intended for implementation of "rollup" - aggregation (rounding) of older data * for a table with Graphite data (Graphite is the system for time series monitoring). @@ -97,16 +92,32 @@ using AggregateFunctionPtr = std::shared_ptr; namespace DB::Graphite { +// sync with rule_types_str +enum RuleType +{ + RuleTypeAll = 0, // default, with regex, compatible with old scheme + RuleTypePlain = 1, // plain metrics, with regex, compatible with old scheme + RuleTypeTagged = 2, // tagged metrics, with regex, compatible with old scheme + RuleTypeTagList = 3 // tagged metrics, with regex (converted to RuleTypeTagged from string like 'retention=10min ; env=(staging|prod)') +}; + +const String & ruleTypeStr(RuleType rule_type); + struct Retention { UInt32 age; UInt32 precision; }; +bool operator==(const Retention & a, const Retention & b); + using Retentions = std::vector; +std::ostream &operator<<(std::ostream & stream, const Retentions & a); + struct Pattern { + RuleType rule_type = RuleTypeAll; std::shared_ptr regexp; std::string regexp_str; AggregateFunctionPtr function; @@ -114,6 +125,9 @@ struct Pattern enum { TypeUndef, TypeRetention, TypeAggregation, TypeAll } type = TypeAll; /// The type of defined pattern, filled automatically }; +bool operator==(const Pattern & a, const Pattern & b); +std::ostream &operator<<(std::ostream & stream, const Pattern & a); + using Patterns = std::vector; using RetentionPattern = Pattern; using AggregationPattern = Pattern; @@ -125,9 +139,16 @@ struct Params String time_column_name; String value_column_name; String version_column_name; + bool patterns_typed; Graphite::Patterns patterns; + Graphite::Patterns patterns_plain; + Graphite::Patterns patterns_tagged; }; using RollupRule = std::pair; +Graphite::RollupRule selectPatternForPath(const Graphite::Params & params, const StringRef path); + +void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params); + } diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index d5a35fef7bd..6464f10ca58 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -52,62 +53,6 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm( columns_definition = defineColumns(header, params); } -Graphite::RollupRule GraphiteRollupSortedAlgorithm::selectPatternForPath(StringRef path) const -{ - const Graphite::Pattern * first_match = &undef_pattern; - - for (const auto & pattern : params.patterns) - { - if (!pattern.regexp) - { - /// Default pattern - if (first_match->type == first_match->TypeUndef && pattern.type == pattern.TypeAll) - { - /// There is only default pattern for both retention and aggregation - return std::pair(&pattern, &pattern); - } - if (pattern.type != first_match->type) - { - if (first_match->type == first_match->TypeRetention) - { - return std::pair(first_match, &pattern); - } - if (first_match->type == first_match->TypeAggregation) - { - return std::pair(&pattern, first_match); - } - } - } - else if (pattern.regexp->match(path.data, path.size)) - { - /// General pattern with matched path - if (pattern.type == pattern.TypeAll) - { - /// Only for not default patterns with both function and retention parameters - return std::pair(&pattern, &pattern); - } - if (first_match->type == first_match->TypeUndef) - { - first_match = &pattern; - continue; - } - if (pattern.type != first_match->type) - { - if (first_match->type == first_match->TypeRetention) - { - return std::pair(first_match, &pattern); - } - if (first_match->type == first_match->TypeAggregation) - { - return std::pair(&pattern, first_match); - } - } - } - } - - return {nullptr, nullptr}; -} - UInt32 GraphiteRollupSortedAlgorithm::selectPrecision(const Graphite::Retentions & retentions, time_t time) const { static_assert(is_signed_v, "time_t must be signed type"); @@ -188,7 +133,7 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() Graphite::RollupRule next_rule = merged_data.currentRule(); if (new_path) - next_rule = selectPatternForPath(next_path); + next_rule = selectPatternForPath(this->params, next_path); const Graphite::RetentionPattern * retention_pattern = std::get<0>(next_rule); time_t next_time_rounded; diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h index 0155b73b238..4968cbfc470 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h @@ -102,16 +102,6 @@ private: time_t current_time = 0; time_t current_time_rounded = 0; - const Graphite::Pattern undef_pattern = - { /// temporary empty pattern for selectPatternForPath - .regexp = nullptr, - .regexp_str = "", - .function = nullptr, - .retentions = DB::Graphite::Retentions(), - .type = undef_pattern.TypeUndef, - }; - - Graphite::RollupRule selectPatternForPath(StringRef path) const; UInt32 selectPrecision(const Graphite::Retentions & retentions, time_t time) const; /// Insert the values into the resulting columns, which will not be changed in the future. diff --git a/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp b/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp new file mode 100644 index 00000000000..1d739bf566a --- /dev/null +++ b/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp @@ -0,0 +1,597 @@ +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +using namespace DB; + +static int regAggregateFunctions = 0; + +void tryRegisterAggregateFunctions() +{ + if (!regAggregateFunctions) + { + registerAggregateFunctions(); + regAggregateFunctions = 1; + } +} + +static ConfigProcessor::LoadedConfig loadConfiguration(const std::string & config_path) +{ + ConfigProcessor config_processor(config_path, true, true); + ConfigProcessor::LoadedConfig config = config_processor.loadConfig(false); + return config; +} + +static ConfigProcessor::LoadedConfig loadConfigurationFromString(std::string & s) +{ + char tmp_file[19]; + strcpy(tmp_file, "/tmp/rollup-XXXXXX"); + int fd = mkstemp(tmp_file); + if (fd == -1) + { + throw std::runtime_error(strerror(errno)); + } + try { + if (write(fd, s.c_str(), s.size()) < s.size()) + { + throw std::runtime_error("unable write to temp file"); + } + if (write(fd, "\n", 1) != 1) + { + throw std::runtime_error("unable write to temp file"); + } + close(fd); + auto config_path = std::string(tmp_file) + ".xml"; + if (std::rename(tmp_file, config_path.c_str())) + { + int err = errno; + remove(tmp_file); + throw std::runtime_error(strerror(err)); + } + ConfigProcessor::LoadedConfig config = loadConfiguration(config_path); + remove(tmp_file); + return config; + } + catch (...) + { + remove(tmp_file); + throw; + } +} + +static Graphite::Params setGraphitePatterns(ContextMutablePtr context, ConfigProcessor::LoadedConfig & config) +{ + context->setConfig(config.configuration); + + Graphite::Params params; + setGraphitePatternsFromConfig(context, "graphite_rollup", params); + + return params; +} + +struct PatternForCheck +{ + Graphite::RuleType rule_type; + std::string regexp_str; + String function; + Graphite::Retentions retentions; +}; + + +bool checkRule(const Graphite::Pattern & pattern, const struct PatternForCheck & pattern_check, + const std::string & typ, const std::string & path, std::string & message) +{ + bool rule_type_eq = (pattern.rule_type == pattern_check.rule_type); + bool regexp_eq = (pattern.regexp_str == pattern_check.regexp_str); + bool function_eq = (pattern.function == nullptr && pattern_check.function.empty()) + || (pattern.function != nullptr && pattern.function->getName() == pattern_check.function); + bool retentions_eq = (pattern.retentions == pattern_check.retentions); + + if (rule_type_eq && regexp_eq && function_eq && retentions_eq) + return true; + + message = typ + " rollup rule mismatch for '" + path + "'," + + (rule_type_eq ? "" : "rule_type ") + + (regexp_eq ? "" : "regexp ") + + (function_eq ? "" : "function ") + + (retentions_eq ? "" : "retentions "); + return false; +} + +std::ostream & operator<<(std::ostream & stream, const PatternForCheck & a) +{ + stream << "{ rule_type = " << ruleTypeStr(a.rule_type); + if (!a.regexp_str.empty()) + stream << ", regexp = '" << a.regexp_str << "'"; + if (!a.function.empty()) + stream << ", function = " << a.function; + if (!a.retentions.empty()) + { + stream << ",\n retentions = {\n"; + for (size_t i = 0; i < a.retentions.size(); i++) + { + stream << " { " << a.retentions[i].age << ", " << a.retentions[i].precision << " }"; + if (i < a.retentions.size() - 1) + stream << ","; + stream << "\n"; + } + stream << " }\n"; + } + else + stream << " "; + + stream << "}"; + return stream; +} + +struct PatternsForPath +{ + std::string path; + PatternForCheck retention_want; + PatternForCheck aggregation_want; +}; + +TEST(GraphiteTest, testSelectPattern) +{ + tryRegisterAggregateFunctions(); + + using namespace std::literals; + + std::string + xml(R"END( + + + \.sum$ + sum + + + ^((.*)|.)sum\? + sum + + + \.max$ + max + + + ^((.*)|.)max\? + max + + + \.min$ + min + + + ^((.*)|.)min\? + min + + + \.(count|sum|sum_sq)$ + sum + + + ^((.*)|.)(count|sum|sum_sq)\? + sum + + + ^retention\. + + 0 + 60 + + + 86400 + 3600 + + + + avg + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + +)END"); + + // Retentions must be ordered by 'age' descending. + std::vector tests + { + { + "test.sum", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(\.sum$)END", "sum", { } } + }, + { + "val.sum?env=test&tag=Fake3", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(^((.*)|.)sum\?)END", "sum", { } } + }, + { + "test.max", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(\.max$)END", "max", { } }, + }, + { + "val.max?env=test&tag=Fake4", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(^((.*)|.)max\?)END", "max", { } }, + }, + { + "test.min", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(\.min$)END", "min", { } }, + }, + { + "val.min?env=test&tag=Fake5", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(^((.*)|.)min\?)END", "min", { } }, + }, + { + "retention.count", + { Graphite::RuleTypeAll, R"END(^retention\.)END", "", { { 86400, 3600 }, { 0, 60 } } }, // ^retention + { Graphite::RuleTypeAll, R"END(\.(count|sum|sum_sq)$)END", "sum", { } }, + }, + { + "val.retention.count?env=test&tag=Fake5", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "val.count?env=test&tag=Fake5", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "test.p95", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + }, + { + "val.p95?env=test&tag=FakeNo", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + }, + { + "default", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + }, + { + "val.default?env=test&tag=FakeNo", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + } + }; + + auto config = loadConfigurationFromString(xml); + ContextMutablePtr context = getContext().context; + Graphite::Params params = setGraphitePatterns(context, config); + + for (const auto & t : tests) + { + auto rule = DB::Graphite::selectPatternForPath(params, t.path); + std:: string message; + if (!checkRule(*rule.first, t.retention_want, "retention", t.path, message)) + ADD_FAILURE() << message << ", got\n" << *rule.first << "\n, want\n" << t.retention_want << "\n"; + if (!checkRule(*rule.second, t.aggregation_want, "aggregation", t.path, message)) + ADD_FAILURE() << message << ", got\n" << *rule.second << "\n, want\n" << t.aggregation_want << "\n"; + } +} + + +namespace DB::Graphite +{ + std::string buildTaggedRegex(std::string regexp_str); +} + +struct RegexCheck +{ + std::string regex; + std::string regex_want; + std::string match; + std::string nomatch; +}; + +TEST(GraphiteTest, testBuildTaggedRegex) +{ + std::vector tests + { + { + "cpu\\.loadavg;project=DB.*;env=st.*", + R"END(^cpu\.loadavg\?(.*&)?env=st.*&(.*&)?project=DB.*(&.*)?$)END", + R"END(cpu.loadavg?env=staging&project=DBAAS)END", + R"END(cpu.loadavg?env=staging&project=D)END" + }, + { + R"END(project=DB.*;env=staging;)END", + R"END([\?&]env=staging&(.*&)?project=DB.*(&.*)?$)END", + R"END(cpu.loadavg?env=staging&project=DBPG)END", + R"END(cpu.loadavg?env=stagingN&project=DBAAS)END" + }, + { + "env=staging;", + R"END([\?&]env=staging(&.*)?$)END", + R"END(cpu.loadavg?env=staging&project=DPG)END", + R"END(cpu.loadavg?env=stagingN)END" + }, + { + " env = staging ;", // spaces are allowed, + R"END([\?&] env = staging (&.*)?$)END", + R"END(cpu.loadavg? env = staging &project=DPG)END", + R"END(cpu.loadavg?env=stagingN)END" + }, + { + "name;", + R"END(^name\?)END", + R"END(name?env=staging&project=DPG)END", + R"END(nameN?env=stagingN)END", + }, + { + "name", + R"END(^name\?)END", + R"END(name?env=staging&project=DPG)END", + R"END(nameN?env=stagingN)END", + } + }; + for (const auto & t : tests) + { + auto s = DB::Graphite::buildTaggedRegex(t.regex); + EXPECT_EQ(t.regex_want, s) << "result for '" << t.regex_want << "' mismatch"; + auto regexp = OptimizedRegularExpression(s); + EXPECT_TRUE(regexp.match(t.match.data(), t.match.size())) << t.match << " match for '" << s << "' failed"; + EXPECT_FALSE(regexp.match(t.nomatch.data(), t.nomatch.size())) << t.nomatch << " ! match for '" << s << "' failed"; + } +} + +TEST(GraphiteTest, testSelectPatternTyped) +{ + tryRegisterAggregateFunctions(); + + using namespace std::literals; + + std::string + xml(R"END( + + + plain + \.sum$ + sum + + + tagged + ^((.*)|.)sum\? + sum + + + plain + \.max$ + max + + + tagged + ^((.*)|.)max\? + max + + + plain + \.min$ + min + + + tagged + ^((.*)|.)min\? + min + + + plain + \.(count|sum|sum_sq)$ + sum + + + tagged + ^((.*)|.)(count|sum|sum_sq)\? + sum + + + plain + ^retention\. + + 0 + 60 + + + 86400 + 3600 + + + + tagged + + + 0 + 60 + + + 86400 + 3600 + + + + tag_list + retention=10min;env=staging + + 0 + 600 + + + 86400 + 3600 + + + + tag_list + retention=10min;env=[A-Za-z-]+rod[A-Za-z-]+ + + 0 + 600 + + + 86400 + 3600 + + + + tag_list + cpu\.loadavg + + 0 + 600 + + + 86400 + 3600 + + + + avg + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + +)END"); + + // Retentions must be ordered by 'age' descending. + std::vector tests + { + { + "test.sum", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypePlain, R"END(\.sum$)END", "sum", { } } + }, + { + "val.sum?env=test&tag=Fake3", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeTagged, R"END(^((.*)|.)sum\?)END", "sum", { } } + }, + { + "test.max", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypePlain, R"END(\.max$)END", "max", { } }, + }, + { + "val.max?env=test&tag=Fake4", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeTagged, R"END(^((.*)|.)max\?)END", "max", { } }, + }, + { + "test.min", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypePlain, R"END(\.min$)END", "min", { } }, + }, + { + "val.min?env=test&tag=Fake5", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeTagged, R"END(^((.*)|.)min\?)END", "min", { } }, + }, + { + "retention.count", + { Graphite::RuleTypePlain, R"END(^retention\.)END", "", { { 86400, 3600 }, { 0, 60 } } }, // ^retention + { Graphite::RuleTypePlain, R"END(\.(count|sum|sum_sq)$)END", "sum", { } }, + }, + { + "val.count?env=test&retention=hour&tag=Fake5", + { Graphite::RuleTypeTagged, R"END([\?&]retention=hour(&.*)?$)END", "", { { 86400, 3600 }, { 0, 60 } } }, // tagged retention=hour + { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "val.count?env=test&retention=hour", + { Graphite::RuleTypeTagged, R"END([\?&]retention=hour(&.*)?$)END", "", { { 86400, 3600 }, { 0, 60 } } }, // tagged retention=hour + { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "val.count?env=staging&retention=10min", + { Graphite::RuleTypeTagged, R"END([\?&]env=staging&(.*&)?retention=10min(&.*)?$)END", "", { { 86400, 3600 }, { 0, 600 } } }, // retention=10min ; env=staging + { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "val.count?env=production&retention=10min", + { Graphite::RuleTypeTagged, R"END([\?&]env=[A-Za-z-]+rod[A-Za-z-]+&(.*&)?retention=10min(&.*)?$)END", "", { { 86400, 3600 }, { 0, 600 } } }, // retention=10min ; env=[A-Za-z-]+rod[A-Za-z-]+ + { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "val.count?env=test&tag=Fake5", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "cpu.loadavg?env=test&tag=FakeNo", + { Graphite::RuleTypeTagged, R"END(^cpu\.loadavg\?)END", "", { { 86400, 3600 }, { 0, 600 } } }, // name=cpu\.loadavg + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, + }, + { + "test.p95", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + }, + { + "val.p95?env=test&tag=FakeNo", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + }, + { + "default", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + }, + { + "val.default?env=test&tag=FakeNo", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + } + }; + + auto config = loadConfigurationFromString(xml); + ContextMutablePtr context = getContext().context; + Graphite::Params params = setGraphitePatterns(context, config); + + for (const auto & t : tests) + { + auto rule = DB::Graphite::selectPatternForPath(params, t.path); + std:: string message; + if (!checkRule(*rule.first, t.retention_want, "retention", t.path, message)) + ADD_FAILURE() << message << ", got\n" << *rule.first << "\n, want\n" << t.retention_want << "\n"; + if (!checkRule(*rule.second, t.aggregation_want, "aggregation", t.path, message)) + ADD_FAILURE() << message << ", got\n" << *rule.second << "\n, want\n" << t.aggregation_want << "\n"; + } +} diff --git a/src/Processors/Transforms/TTLTransform.cpp b/src/Processors/Transforms/TTLTransform.cpp index 7d0da3dca91..e79dcb34c41 100644 --- a/src/Processors/Transforms/TTLTransform.cpp +++ b/src/Processors/Transforms/TTLTransform.cpp @@ -139,8 +139,10 @@ void TTLTransform::finalize() if (delete_algorithm) { - size_t rows_removed = all_data_dropped ? data_part->rows_count : delete_algorithm->getNumberOfRemovedRows(); - LOG_DEBUG(log, "Removed {} rows with expired TTL from part {}", rows_removed, data_part->name); + if (all_data_dropped) + LOG_DEBUG(log, "Removed all rows from part {} due to expired TTL", data_part->name); + else + LOG_DEBUG(log, "Removed {} rows with expired TTL from part {}", delete_algorithm->getNumberOfRemovedRows(), data_part->name); } } diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp index 265587d2b1a..2d4b05c51b5 100644 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ b/src/Storages/ExternalDataSourceConfiguration.cpp @@ -15,6 +15,9 @@ #if USE_RDKAFKA #include #endif +#if USE_MYSQL +#include +#endif #include @@ -26,12 +29,31 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +IMPLEMENT_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS) + static const std::unordered_set dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", "replica", "update_field", "update_tag", "invalidate_query", "query", "where", "name", "secure", "uri", "collection"}; + +template +SettingsChanges getSettingsChangesFromConfig( + const BaseSettings & settings, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +{ + SettingsChanges config_settings; + for (const auto & setting : settings.all()) + { + const auto & setting_name = setting.getName(); + auto setting_value = config.getString(config_prefix + '.' + setting_name, ""); + if (!setting_value.empty()) + config_settings.emplace_back(setting_name, setting_value); + } + return config_settings; +} + + String ExternalDataSourceConfiguration::toString() const { WriteBufferFromOwnString configuration_info; @@ -67,7 +89,9 @@ void ExternalDataSourceConfiguration::set(const ExternalDataSourceConfiguration } -std::optional getExternalDataSourceConfiguration(const ASTs & args, ContextPtr context, bool is_database_engine, bool throw_on_no_collection) +template +std::optional getExternalDataSourceConfiguration( + const ASTs & args, ContextPtr context, bool is_database_engine, bool throw_on_no_collection, const BaseSettings & storage_settings) { if (args.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments"); @@ -90,6 +114,8 @@ std::optional getExternalDataSourceConfiguration(const throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection->name()); } + SettingsChanges config_settings = getSettingsChangesFromConfig(storage_settings, config, collection_prefix); + configuration.host = config.getString(collection_prefix + ".host", ""); configuration.port = config.getInt(collection_prefix + ".port", 0); configuration.username = config.getString(collection_prefix + ".user", ""); @@ -131,6 +157,7 @@ std::optional getExternalDataSourceConfiguration(const if (arg_value_literal) { auto arg_value = arg_value_literal->value; + if (arg_name == "host") configuration.host = arg_value.safeGet(); else if (arg_name == "port") @@ -147,6 +174,8 @@ std::optional getExternalDataSourceConfiguration(const configuration.schema = arg_value.safeGet(); else if (arg_name == "addresses_expr") configuration.addresses_expr = arg_value.safeGet(); + else if (storage_settings.has(arg_name)) + config_settings.emplace_back(arg_name, arg_value); else non_common_args.emplace_back(std::make_pair(arg_name, arg_value_ast)); } @@ -161,8 +190,7 @@ std::optional getExternalDataSourceConfiguration(const } } - ExternalDataSourceConfig source_config{ .configuration = configuration, .specific_args = non_common_args }; - return source_config; + return ExternalDataSourceInfo{ .configuration = configuration, .specific_args = non_common_args, .settings_changes = config_settings }; } return std::nullopt; } @@ -179,9 +207,10 @@ static void validateConfigKeys( } } -std::optional getExternalDataSourceConfiguration( +template +std::optional getExternalDataSourceConfiguration( const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key) + ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings) { validateConfigKeys(dict_config, dict_config_prefix, has_config_key); ExternalDataSourceConfiguration configuration; @@ -192,6 +221,10 @@ std::optional getExternalDataSourceConfiguratio const auto & config = context->getConfigRef(); const auto & collection_prefix = fmt::format("named_collections.{}", collection_name); validateConfigKeys(dict_config, collection_prefix, has_config_key); + auto config_settings = getSettingsChangesFromConfig(settings, config, collection_prefix); + auto dict_settings = getSettingsChangesFromConfig(settings, dict_config, dict_config_prefix); + /// dictionary config settings override collection settings. + config_settings.insert(config_settings.end(), dict_settings.begin(), dict_settings.end()); if (!config.has(collection_prefix)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name); @@ -210,7 +243,7 @@ std::optional getExternalDataSourceConfiguratio throw Exception(ErrorCodes::BAD_ARGUMENTS, "Named collection of connection parameters is missing some of the parameters and dictionary parameters are not added"); } - return configuration; + return ExternalDataSourceInfo{ .configuration = configuration, .specific_args = {}, .settings_changes = config_settings }; } return std::nullopt; } @@ -225,7 +258,7 @@ ExternalDataSourcesByPriority getExternalDataSourceConfigurationByPriority( auto named_collection = getExternalDataSourceConfiguration(dict_config, dict_config_prefix, context, has_config_key); if (named_collection) { - common_configuration = *named_collection; + common_configuration = named_collection->configuration; } else { @@ -391,6 +424,7 @@ std::optional getURLBasedDataSourceConfiguration(const return std::nullopt; } + template bool getExternalDataSourceConfiguration(const ASTs & args, BaseSettings & settings, ContextPtr context) { @@ -405,14 +439,7 @@ bool getExternalDataSourceConfiguration(const ASTs & args, BaseSettings & set if (!config.has(config_prefix)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection->name()); - SettingsChanges config_settings; - for (const auto & setting : settings.all()) - { - const auto & setting_name = setting.getName(); - auto setting_value = config.getString(config_prefix + '.' + setting_name, ""); - if (!setting_value.empty()) - config_settings.emplace_back(setting_name, setting_value); - } + auto config_settings = getSettingsChangesFromConfig(settings, config, config_prefix); /// Check key-value arguments. for (size_t i = 1; i < args.size(); ++i) @@ -450,4 +477,32 @@ bool getExternalDataSourceConfiguration(const ASTs & args, BaseSettings & settings, ContextPtr context); #endif + +template +std::optional getExternalDataSourceConfiguration( + const ASTs & args, ContextPtr context, bool is_database_engine, bool throw_on_no_collection, const BaseSettings & storage_settings); + +template +std::optional getExternalDataSourceConfiguration( + const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, + ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings); + +template +SettingsChanges getSettingsChangesFromConfig( + const BaseSettings & settings, const Poco::Util::AbstractConfiguration & config, const String & config_prefix); + +#if USE_MYSQL +template +std::optional getExternalDataSourceConfiguration( + const ASTs & args, ContextPtr context, bool is_database_engine, bool throw_on_no_collection, const BaseSettings & storage_settings); + +template +std::optional getExternalDataSourceConfiguration( + const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, + ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings); + +template +SettingsChanges getSettingsChangesFromConfig( + const BaseSettings & settings, const Poco::Util::AbstractConfiguration & config, const String & config_prefix); +#endif } diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h index 930b83ffc71..926ad64b515 100644 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ b/src/Storages/ExternalDataSourceConfiguration.h @@ -7,6 +7,11 @@ namespace DB { +#define EMPTY_SETTINGS(M) +DECLARE_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS) + +struct EmptySettings : public BaseSettings {}; + struct ExternalDataSourceConfiguration { String host; @@ -46,10 +51,11 @@ struct StorageMongoDBConfiguration : ExternalDataSourceConfiguration using StorageSpecificArgs = std::vector>; -struct ExternalDataSourceConfig +struct ExternalDataSourceInfo { ExternalDataSourceConfiguration configuration; StorageSpecificArgs specific_args; + SettingsChanges settings_changes; }; /* If there is a storage engine's configuration specified in the named_collections, @@ -62,13 +68,16 @@ struct ExternalDataSourceConfig * Any key-value engine argument except common (`host`, `port`, `username`, `password`, `database`) * is returned in EngineArgs struct. */ -std::optional getExternalDataSourceConfiguration(const ASTs & args, ContextPtr context, bool is_database_engine = false, bool throw_on_no_collection = true); +template +std::optional getExternalDataSourceConfiguration( + const ASTs & args, ContextPtr context, bool is_database_engine = false, bool throw_on_no_collection = true, const BaseSettings & storage_settings = {}); using HasConfigKeyFunc = std::function; -std::optional getExternalDataSourceConfiguration( +template +std::optional getExternalDataSourceConfiguration( const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key); + ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings = {}); /// Highest priority is 0, the bigger the number in map, the less the priority. diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index fc3eff7459b..1cf701492a9 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -22,17 +23,13 @@ #include #include -#include namespace DB { namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; extern const int BAD_ARGUMENTS; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int UNKNOWN_ELEMENT_IN_CONFIG; - extern const int NO_ELEMENTS_IN_CONFIG; extern const int UNKNOWN_STORAGE; extern const int NO_REPLICA_NAME_GIVEN; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; @@ -63,171 +60,6 @@ static Names extractColumnNames(const ASTPtr & node) } } -/** Is used to order Graphite::Retentions by age and precision descending. - * Throws exception if not both age and precision are less or greater then another. - */ -static bool compareRetentions(const Graphite::Retention & a, const Graphite::Retention & b) -{ - if (a.age > b.age && a.precision > b.precision) - { - return true; - } - else if (a.age < b.age && a.precision < b.precision) - { - return false; - } - String error_msg = "age and precision should only grow up: " - + std::to_string(a.age) + ":" + std::to_string(a.precision) + " vs " - + std::to_string(b.age) + ":" + std::to_string(b.precision); - throw Exception( - error_msg, - ErrorCodes::BAD_ARGUMENTS); -} - -/** Read the settings for Graphite rollup from config. - * Example - * - * - * Path - * - * click_cost - * any - * - * 0 - * 3600 - * - * - * 86400 - * 60 - * - * - * - * max - * - * 0 - * 60 - * - * - * 3600 - * 300 - * - * - * 86400 - * 3600 - * - * - * - */ -static void appendGraphitePattern( - const Poco::Util::AbstractConfiguration & config, - const String & config_element, - Graphite::Patterns & out_patterns, - ContextPtr context) -{ - Graphite::Pattern pattern; - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_element, keys); - - for (const auto & key : keys) - { - if (key == "regexp") - { - pattern.regexp_str = config.getString(config_element + ".regexp"); - pattern.regexp = std::make_shared(pattern.regexp_str); - } - else if (key == "function") - { - String aggregate_function_name_with_params = config.getString(config_element + ".function"); - String aggregate_function_name; - Array params_row; - getAggregateFunctionNameAndParametersArray( - aggregate_function_name_with_params, aggregate_function_name, params_row, "GraphiteMergeTree storage initialization", context); - - /// TODO Not only Float64 - AggregateFunctionProperties properties; - pattern.function = AggregateFunctionFactory::instance().get( - aggregate_function_name, {std::make_shared()}, params_row, properties); - } - else if (startsWith(key, "retention")) - { - pattern.retentions.emplace_back(Graphite::Retention{ - .age = config.getUInt(config_element + "." + key + ".age"), - .precision = config.getUInt(config_element + "." + key + ".precision")}); - } - else - throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); - } - - if (!pattern.function && pattern.retentions.empty()) - throw Exception( - "At least one of an aggregate function or retention rules is mandatory for rollup patterns in GraphiteMergeTree", - ErrorCodes::NO_ELEMENTS_IN_CONFIG); - - if (!pattern.function) - { - pattern.type = pattern.TypeRetention; - } - else if (pattern.retentions.empty()) - { - pattern.type = pattern.TypeAggregation; - } - else - { - pattern.type = pattern.TypeAll; - } - - if (pattern.type & pattern.TypeAggregation) /// TypeAggregation or TypeAll - if (pattern.function->allocatesMemoryInArena()) - throw Exception( - "Aggregate function " + pattern.function->getName() + " isn't supported in GraphiteMergeTree", ErrorCodes::NOT_IMPLEMENTED); - - /// retention should be in descending order of age. - if (pattern.type & pattern.TypeRetention) /// TypeRetention or TypeAll - std::sort(pattern.retentions.begin(), pattern.retentions.end(), compareRetentions); - - out_patterns.emplace_back(pattern); -} - -static void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params) -{ - const auto & config = context->getConfigRef(); - - if (!config.has(config_element)) - throw Exception("No '" + config_element + "' element in configuration file", ErrorCodes::NO_ELEMENTS_IN_CONFIG); - - params.config_name = config_element; - params.path_column_name = config.getString(config_element + ".path_column_name", "Path"); - params.time_column_name = config.getString(config_element + ".time_column_name", "Time"); - params.value_column_name = config.getString(config_element + ".value_column_name", "Value"); - params.version_column_name = config.getString(config_element + ".version_column_name", "Timestamp"); - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_element, keys); - - for (const auto & key : keys) - { - if (startsWith(key, "pattern")) - { - appendGraphitePattern(config, config_element + "." + key, params.patterns, context); - } - else if (key == "default") - { - /// See below. - } - else if (key == "path_column_name" || key == "time_column_name" || key == "value_column_name" || key == "version_column_name") - { - /// See above. - } - else - throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); - } - - if (config.has(config_element + ".default")) - appendGraphitePattern(config, config_element + "." + ".default", params.patterns, context); -} - - static String getMergeTreeVerboseHelp(bool) { using namespace std::string_literals; diff --git a/src/Storages/MySQL/MySQLHelpers.cpp b/src/Storages/MySQL/MySQLHelpers.cpp index e7745e6c0bb..edeb4ffca8a 100644 --- a/src/Storages/MySQL/MySQLHelpers.cpp +++ b/src/Storages/MySQL/MySQLHelpers.cpp @@ -8,9 +8,17 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + mysqlxx::PoolWithFailover createMySQLPoolWithFailover(const StorageMySQLConfiguration & configuration, const MySQLSettings & mysql_settings) { + if (!mysql_settings.connection_pool_size) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Connection pool cannot have zero size"); + return mysqlxx::PoolWithFailover( configuration.database, configuration.addresses, configuration.username, configuration.password, MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, diff --git a/src/Storages/MySQL/MySQLSettings.h b/src/Storages/MySQL/MySQLSettings.h index aa2c2703d6b..be1e09c12e6 100644 --- a/src/Storages/MySQL/MySQLSettings.h +++ b/src/Storages/MySQL/MySQLSettings.h @@ -25,11 +25,14 @@ class ASTStorage; DECLARE_SETTINGS_TRAITS(MySQLSettingsTraits, LIST_OF_MYSQL_SETTINGS) +using MySQLBaseSettings = BaseSettings; + /** Settings for the MySQL family of engines. */ -struct MySQLSettings : public BaseSettings +struct MySQLSettings : public MySQLBaseSettings { void loadFromQuery(ASTStorage & storage_def); }; + } diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index a669504b2d7..e7d72de2056 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -104,11 +104,16 @@ void PostgreSQLReplicationHandler::addStorage(const std::string & table_name, St } -void PostgreSQLReplicationHandler::startup() +void PostgreSQLReplicationHandler::startup(bool delayed) { - /// We load tables in a separate thread, because this database is not created yet. - /// (will get "database is currently dropped or renamed") - startup_task->activateAndSchedule(); + if (delayed) + { + startup_task->activateAndSchedule(); + } + else + { + startSynchronization(/* throw_on_error */ true); + } } diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h index 4403fb57aca..263095ec9c2 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.h @@ -29,7 +29,7 @@ public: bool is_materialized_postgresql_database_); /// Activate task to be run from a separate thread: wait until connection is available and call startReplication(). - void startup(); + void startup(bool delayed); /// Stop replication without cleanup. void shutdown(); diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index b312f7284c3..fe81b322bdb 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -87,14 +87,8 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL( *replication_settings, /* is_materialized_postgresql_database */false); - if (!is_attach) - { - replication_handler->addStorage(remote_table_name, this); - /// Start synchronization preliminary setup immediately and throw in case of failure. - /// It should be guaranteed that if MaterializedPostgreSQL table was created successfully, then - /// its nested table was also created. - replication_handler->startSynchronization(/* throw_on_error */ true); - } + replication_handler->addStorage(remote_table_name, this); + replication_handler->startup(/* delayed */is_attach); } @@ -234,19 +228,6 @@ void StorageMaterializedPostgreSQL::set(StoragePtr nested_storage) } -void StorageMaterializedPostgreSQL::startup() -{ - /// replication_handler != nullptr only in case of single table engine MaterializedPostgreSQL. - if (replication_handler && is_attach) - { - replication_handler->addStorage(remote_table_name, this); - /// In case of attach table use background startup in a separate thread. First wait until connection is reachable, - /// then check for nested table -- it should already be created. - replication_handler->startup(); - } -} - - void StorageMaterializedPostgreSQL::shutdown() { if (replication_handler) diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index 9e11f314738..ff9b95cad7c 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -74,8 +74,6 @@ public: String getName() const override { return "MaterializedPostgreSQL"; } - void startup() override; - void shutdown() override; /// Used only for single MaterializedPostgreSQL storage. diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index e53f5adec52..5df50ab9a7c 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -169,6 +169,7 @@ struct SelectQueryInfo bool ignore_projections = false; bool is_projection_query = false; bool merge_tree_empty_result = false; + bool settings_limit_offset_done = false; Block minmax_count_projection_block; MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr; }; diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index 927c070826b..40a2ad0b85e 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -272,7 +272,7 @@ void registerStorageExternalDistributed(StorageFactory & factory) ExternalDataSourceConfiguration configuration; if (auto named_collection = getExternalDataSourceConfiguration(inner_engine_args, args.getLocalContext())) { - auto [common_configuration, storage_specific_args] = named_collection.value(); + auto [common_configuration, storage_specific_args, _] = named_collection.value(); configuration.set(common_configuration); for (const auto & [name, value] : storage_specific_args) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 56844192ee9..49111e02b11 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -412,6 +412,11 @@ StoragePtr StorageMaterializedView::tryGetTargetTable() const return DatabaseCatalog::instance().tryGetTable(target_table_id, getContext()); } +NamesAndTypesList StorageMaterializedView::getVirtuals() const +{ + return getTargetTable()->getVirtuals(); +} + Strings StorageMaterializedView::getDataPaths() const { if (auto table = tryGetTargetTable()) diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index c110d0b211c..395560c1ca7 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -71,6 +71,9 @@ public: StoragePtr getTargetTable() const; StoragePtr tryGetTargetTable() const; + /// Get the virtual column of the target table; + NamesAndTypesList getVirtuals() const override; + ActionLock getActionLock(StorageActionBlockType type) override; Pipe read( diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 2c1b44d8685..9b25b44c0e7 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -117,7 +117,7 @@ StorageMongoDBConfiguration StorageMongoDB::getConfiguration(ASTs engine_args, C StorageMongoDBConfiguration configuration; if (auto named_collection = getExternalDataSourceConfiguration(engine_args, context)) { - auto [common_configuration, storage_specific_args] = named_collection.value(); + auto [common_configuration, storage_specific_args, _] = named_collection.value(); configuration.set(common_configuration); for (const auto & [arg_name, arg_value] : storage_specific_args) diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 66adf3ae272..83cf2b07b21 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -238,15 +238,17 @@ SinkToStoragePtr StorageMySQL::write(const ASTPtr & /*query*/, const StorageMeta } -StorageMySQLConfiguration StorageMySQL::getConfiguration(ASTs engine_args, ContextPtr context_) +StorageMySQLConfiguration StorageMySQL::getConfiguration(ASTs engine_args, ContextPtr context_, MySQLBaseSettings & storage_settings) { StorageMySQLConfiguration configuration; - if (auto named_collection = getExternalDataSourceConfiguration(engine_args, context_)) + if (auto named_collection = getExternalDataSourceConfiguration( + engine_args, context_, /* is_database_engine */false, /* throw_on_no_collection */true, storage_settings)) { - auto [common_configuration, storage_specific_args] = named_collection.value(); + auto [common_configuration, storage_specific_args, settings_changes] = named_collection.value(); configuration.set(common_configuration); configuration.addresses = {std::make_pair(configuration.host, configuration.port)}; + storage_settings.applyChanges(settings_changes); for (const auto & [arg_name, arg_value] : storage_specific_args) { @@ -298,9 +300,9 @@ void registerStorageMySQL(StorageFactory & factory) { factory.registerStorage("MySQL", [](const StorageFactory::Arguments & args) { - auto configuration = StorageMySQL::getConfiguration(args.engine_args, args.getLocalContext()); - MySQLSettings mysql_settings; /// TODO: move some arguments from the arguments to the SETTINGS. + auto configuration = StorageMySQL::getConfiguration(args.engine_args, args.getLocalContext(), mysql_settings); + if (args.storage_def->settings) mysql_settings.loadFromQuery(*args.storage_def); diff --git a/src/Storages/StorageMySQL.h b/src/Storages/StorageMySQL.h index cc3673e50ca..fe2ee8439bc 100644 --- a/src/Storages/StorageMySQL.h +++ b/src/Storages/StorageMySQL.h @@ -53,7 +53,7 @@ public: SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; - static StorageMySQLConfiguration getConfiguration(ASTs engine_args, ContextPtr context_); + static StorageMySQLConfiguration getConfiguration(ASTs engine_args, ContextPtr context_, MySQLBaseSettings & storage_settings); private: friend class StorageMySQLSink; diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 8327bb92a38..5042f911149 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -390,7 +390,7 @@ StoragePostgreSQLConfiguration StoragePostgreSQL::getConfiguration(ASTs engine_a StoragePostgreSQLConfiguration configuration; if (auto named_collection = getExternalDataSourceConfiguration(engine_args, context)) { - auto [common_configuration, storage_specific_args] = named_collection.value(); + auto [common_configuration, storage_specific_args, _] = named_collection.value(); configuration.set(common_configuration); configuration.addresses = {std::make_pair(configuration.host, configuration.port)}; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index dcf664db6fe..bcf7d7856cf 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -140,7 +140,8 @@ void StorageView::read( current_inner_query = query_info.view_query->clone(); } - InterpreterSelectWithUnionQuery interpreter(current_inner_query, context, {}, column_names); + auto options = SelectQueryOptions(QueryProcessingStage::Complete, 0, false, query_info.settings_limit_offset_done); + InterpreterSelectWithUnionQuery interpreter(current_inner_query, context, options, column_names); interpreter.buildQueryPlan(query_plan); /// It's expected that the columns read from storage are not constant. diff --git a/src/Storages/System/StorageSystemGraphite.cpp b/src/Storages/System/StorageSystemGraphite.cpp index dd592600d18..8711162385f 100644 --- a/src/Storages/System/StorageSystemGraphite.cpp +++ b/src/Storages/System/StorageSystemGraphite.cpp @@ -10,6 +10,7 @@ NamesAndTypesList StorageSystemGraphite::getNamesAndTypes() { return { {"config_name", std::make_shared()}, + {"rule_type", std::make_shared()}, {"regexp", std::make_shared()}, {"function", std::make_shared()}, {"age", std::make_shared()}, @@ -85,6 +86,7 @@ void StorageSystemGraphite::fillData(MutableColumns & res_columns, ContextPtr co bool is_default = pattern.regexp == nullptr; String regexp; String function; + const String & rule_type = ruleTypeStr(pattern.rule_type); if (is_default) { @@ -107,6 +109,7 @@ void StorageSystemGraphite::fillData(MutableColumns & res_columns, ContextPtr co { size_t i = 0; res_columns[i++]->insert(config.first); + res_columns[i++]->insert(rule_type); res_columns[i++]->insert(regexp); res_columns[i++]->insert(function); res_columns[i++]->insert(retention.age); @@ -121,6 +124,7 @@ void StorageSystemGraphite::fillData(MutableColumns & res_columns, ContextPtr co { size_t i = 0; res_columns[i++]->insert(config.first); + res_columns[i++]->insert(rule_type); res_columns[i++]->insert(regexp); res_columns[i++]->insert(function); res_columns[i++]->insertDefault(); diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index e959fa754c9..cfed24caef6 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -37,8 +37,8 @@ void TableFunctionMySQL::parseArguments(const ASTPtr & ast_function, ContextPtr if (!args_func.arguments) throw Exception("Table function 'mysql' must have arguments.", ErrorCodes::LOGICAL_ERROR); - configuration = StorageMySQL::getConfiguration(args_func.arguments->children, context); MySQLSettings mysql_settings; + configuration = StorageMySQL::getConfiguration(args_func.arguments->children, context, mysql_settings); const auto & settings = context->getSettingsRef(); mysql_settings.connect_timeout = settings.external_storage_connect_timeout_sec; mysql_settings.read_write_timeout = settings.external_storage_rw_timeout_sec; diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index f7af6bee7d9..85857011616 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -60,7 +60,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr * Specific args (remote): sharding_key, or database (in case it is not ASTLiteral). * None of the common arguments is empty at this point, it is checked in getExternalDataSourceConfiguration. */ - auto [common_configuration, storage_specific_args] = named_collection.value(); + auto [common_configuration, storage_specific_args, _] = named_collection.value(); configuration.set(common_configuration); for (const auto & [arg_name, arg_value] : storage_specific_args) diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index f37ea49e387..e21e7d0138d 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -6,80 +6,98 @@ import json import os import sys import time -from github import Github +from typing import List, Optional, Tuple from env_helper import REPO_COPY, TEMP_PATH, CACHES_PATH, IMAGES_PATH from s3_helper import S3Helper from pr_info import PRInfo -from get_robot_token import get_best_robot_token -from version_helper import get_version_from_repo, update_version_local +from version_helper import ( + ClickHouseVersion, + get_version_from_repo, + update_version_local, +) from ccache_utils import get_ccache_if_not_exists, upload_ccache -from ci_config import CI_CONFIG +from ci_config import CI_CONFIG, BuildConfig from docker_pull_helper import get_image_with_version from tee_popen import TeePopen -def get_build_config(build_check_name, build_name): - if build_check_name == 'ClickHouse build check (actions)': - build_config_name = 'build_config' +def get_build_config(build_check_name: str, build_name: str) -> BuildConfig: + if build_check_name == "ClickHouse build check (actions)": + build_config_name = "build_config" else: raise Exception(f"Unknown build check name {build_check_name}") return CI_CONFIG[build_config_name][build_name] -def _can_export_binaries(build_config): - if build_config['package_type'] != 'deb': +def _can_export_binaries(build_config: BuildConfig) -> bool: + if build_config["package_type"] != "deb": return False - if build_config['bundled'] != "bundled": + if build_config["bundled"] != "bundled": return False - if build_config['splitted'] == 'splitted': + if build_config["splitted"] == "splitted": return False - if build_config['sanitizer'] != '': + if build_config["sanitizer"] != "": return True - if build_config['build_type'] != '': + if build_config["build_type"] != "": return True return False -def get_packager_cmd(build_config, packager_path, output_path, build_version, image_version, ccache_path, pr_info): - package_type = build_config['package_type'] - comp = build_config['compiler'] - cmd = f"cd {packager_path} && ./packager --output-dir={output_path} --package-type={package_type} --compiler={comp}" +def get_packager_cmd( + build_config: BuildConfig, + packager_path: str, + output_path: str, + build_version: str, + image_version: str, + ccache_path: str, + pr_info: PRInfo, +) -> str: + package_type = build_config["package_type"] + comp = build_config["compiler"] + cmd = ( + f"cd {packager_path} && ./packager --output-dir={output_path} " + f"--package-type={package_type} --compiler={comp}" + ) - if build_config['build_type']: - cmd += ' --build-type={}'.format(build_config['build_type']) - if build_config['sanitizer']: - cmd += ' --sanitizer={}'.format(build_config['sanitizer']) - if build_config['splitted'] == 'splitted': - cmd += ' --split-binary' - if build_config['tidy'] == 'enable': - cmd += ' --clang-tidy' + if build_config["build_type"]: + cmd += " --build-type={}".format(build_config["build_type"]) + if build_config["sanitizer"]: + cmd += " --sanitizer={}".format(build_config["sanitizer"]) + if build_config["splitted"] == "splitted": + cmd += " --split-binary" + if build_config["tidy"] == "enable": + cmd += " --clang-tidy" - cmd += ' --cache=ccache' - cmd += ' --ccache_dir={}'.format(ccache_path) + cmd += " --cache=ccache" + cmd += " --ccache_dir={}".format(ccache_path) - if 'alien_pkgs' in build_config and build_config['alien_pkgs']: - if pr_info.number == 0 or 'release' in pr_info.labels: - cmd += ' --alien-pkgs rpm tgz' + if "alien_pkgs" in build_config and build_config["alien_pkgs"]: + if pr_info.number == 0 or "release" in pr_info.labels: + cmd += " --alien-pkgs rpm tgz" - cmd += ' --docker-image-version={}'.format(image_version) - cmd += ' --version={}'.format(build_version) + cmd += " --docker-image-version={}".format(image_version) + cmd += " --version={}".format(build_version) if _can_export_binaries(build_config): - cmd += ' --with-binaries=tests' + cmd += " --with-binaries=tests" return cmd -def get_image_name(build_config): - if build_config['package_type'] != 'deb': - return 'clickhouse/binary-builder' + +def get_image_name(build_config: BuildConfig) -> str: + if build_config["package_type"] != "deb": + return "clickhouse/binary-builder" else: - return 'clickhouse/deb-builder' + return "clickhouse/deb-builder" -def build_clickhouse(packager_cmd, logs_path, build_output_path): - build_log_path = os.path.join(logs_path, 'build_log.log') +def build_clickhouse( + packager_cmd: str, logs_path: str, build_output_path: str +) -> Tuple[str, bool]: + build_log_path = os.path.join(logs_path, "build_log.log") + success = False with TeePopen(packager_cmd, build_log_path) as process: retcode = process.wait() if os.path.exists(build_output_path): @@ -88,16 +106,21 @@ def build_clickhouse(packager_cmd, logs_path, build_output_path): build_results = [] if retcode == 0: - if len(build_results) != 0: + if len(build_results) > 0: + success = True logging.info("Built successfully") else: - logging.info("Success exit code, but no build artifacts => build failed") + logging.info( + "Success exit code, but no build artifacts => build failed" + ) else: logging.info("Build failed") - return build_log_path, retcode == 0 and len(build_results) > 0 + return build_log_path, success -def get_build_results_if_exists(s3_helper, s3_prefix): +def get_build_results_if_exists( + s3_helper: S3Helper, s3_prefix: str +) -> Optional[List[str]]: try: content = s3_helper.list_prefix(s3_prefix) return content @@ -105,8 +128,19 @@ def get_build_results_if_exists(s3_helper, s3_prefix): logging.info("Got exception %s listing %s", ex, s3_prefix) return None -def create_json_artifact(temp_path, build_name, log_url, build_urls, build_config, elapsed, success): - subprocess.check_call(f"echo 'BUILD_NAME=build_urls_{build_name}' >> $GITHUB_ENV", shell=True) + +def create_json_artifact( + temp_path: str, + build_name: str, + log_url: str, + build_urls: List[str], + build_config: BuildConfig, + elapsed: int, + success: bool, +): + subprocess.check_call( + f"echo 'BUILD_NAME=build_urls_{build_name}' >> $GITHUB_ENV", shell=True + ) result = { "log_url": log_url, @@ -116,48 +150,76 @@ def create_json_artifact(temp_path, build_name, log_url, build_urls, build_confi "status": success, } - json_name = "build_urls_" + build_name + '.json' + json_name = "build_urls_" + build_name + ".json" - print ("Dump json report", result, "to", json_name, "with env", "build_urls_{build_name}") + print( + "Dump json report", + result, + "to", + json_name, + "with env", + "build_urls_{build_name}", + ) - with open(os.path.join(temp_path, json_name), 'w') as build_links: + with open(os.path.join(temp_path, json_name), "w") as build_links: json.dump(result, build_links) -if __name__ == "__main__": +def get_release_or_pr( + pr_info: PRInfo, build_config: BuildConfig, version: ClickHouseVersion +) -> str: + if "release" in pr_info.labels or "release-lts" in pr_info.labels: + # for release pull requests we use branch names prefixes, not pr numbers + return pr_info.head_ref + elif pr_info.number == 0 and build_config["package_type"] != "performance": + # for pushes to master - major version, but not for performance builds + # they havily relies on a fixed path for build package and nobody going + # to deploy them somewhere, so it's ok. + return ".".join(version.as_tuple()[:2]) + # PR number for anything else + return str(pr_info.number) + + +def upload_master_static_binaries( + pr_info: PRInfo, + build_config: BuildConfig, + s3_helper: S3Helper, + build_output_path: str, +): + """Upload binary artifacts to a static S3 links""" + static_binary_name = build_config.get("static_binary_name", False) + if pr_info.number != 0: + return + elif not static_binary_name: + return + elif pr_info.base_ref != "master": + return + + s3_path = "/".join((pr_info.base_ref, static_binary_name, "clickhouse")) + binary = os.path.join(build_output_path, "clickhouse") + url = s3_helper.upload_build_file_to_s3(binary, s3_path) + print(f"::notice ::Binary static URL: {url}") + + +def main(): logging.basicConfig(level=logging.INFO) - repo_path = REPO_COPY - temp_path = TEMP_PATH - caches_path = CACHES_PATH build_check_name = sys.argv[1] build_name = sys.argv[2] build_config = get_build_config(build_check_name, build_name) - if not os.path.exists(temp_path): - os.makedirs(temp_path) + if not os.path.exists(TEMP_PATH): + os.makedirs(TEMP_PATH) pr_info = PRInfo() - logging.info("Repo copy path %s", repo_path) + logging.info("Repo copy path %s", REPO_COPY) - gh = Github(get_best_robot_token()) - s3_helper = S3Helper('https://s3.amazonaws.com') + s3_helper = S3Helper("https://s3.amazonaws.com") - version = get_version_from_repo(repo_path) - release_or_pr = None - if 'release' in pr_info.labels or 'release-lts' in pr_info.labels: - # for release pull requests we use branch names prefixes, not pr numbers - release_or_pr = pr_info.head_ref - elif pr_info.number == 0 and build_config['package_type'] != "performance": - # for pushes to master - major version, but not for performance builds - # they havily relies on a fixed path for build package and nobody going - # to deploy them somewhere, so it's ok. - release_or_pr = ".".join(version.as_tuple()[:2]) - else: - # PR number for anything else - release_or_pr = str(pr_info.number) + version = get_version_from_repo(REPO_COPY) + release_or_pr = get_release_or_pr(pr_info, build_config, version) s3_path_prefix = "/".join((release_or_pr, pr_info.sha, build_name)) @@ -167,14 +229,27 @@ if __name__ == "__main__": if build_results is not None and len(build_results) > 0: logging.info("Some build results found %s", build_results) build_urls = [] - log_url = '' + log_url = "" for url in build_results: - if 'build_log.log' in url: - log_url = 'https://s3.amazonaws.com/clickhouse-builds/' + url.replace('+', '%2B').replace(' ', '%20') + if "build_log.log" in url: + log_url = "https://s3.amazonaws.com/clickhouse-builds/" + url.replace( + "+", "%2B" + ).replace(" ", "%20") else: - build_urls.append('https://s3.amazonaws.com/clickhouse-builds/' + url.replace('+', '%2B').replace(' ', '%20')) - create_json_artifact(temp_path, build_name, log_url, build_urls, build_config, 0, len(build_urls) > 0) - sys.exit(0) + build_urls.append( + "https://s3.amazonaws.com/clickhouse-builds/" + + url.replace("+", "%2B").replace(" ", "%20") + ) + create_json_artifact( + TEMP_PATH, + build_name, + log_url, + build_urls, + build_config, + 0, + len(build_urls) > 0, + ) + return image_name = get_image_name(build_config) docker_image = get_image_with_version(IMAGES_PATH, image_name) @@ -182,65 +257,93 @@ if __name__ == "__main__": logging.info("Got version from repo %s", version.get_version_string()) - version_type = 'testing' - if 'release' in pr_info.labels or 'release-lts' in pr_info.labels: - version_type = 'stable' + version_type = "testing" + if "release" in pr_info.labels or "release-lts" in pr_info.labels: + version_type = "stable" - update_version_local(repo_path, pr_info.sha, version, version_type) + update_version_local(REPO_COPY, pr_info.sha, version, version_type) logging.info("Updated local files with version") logging.info("Build short name %s", build_name) - build_output_path = os.path.join(temp_path, build_name) + build_output_path = os.path.join(TEMP_PATH, build_name) if not os.path.exists(build_output_path): os.makedirs(build_output_path) - ccache_path = os.path.join(caches_path, build_name + '_ccache') + ccache_path = os.path.join(CACHES_PATH, build_name + "_ccache") logging.info("Will try to fetch cache for our build") - get_ccache_if_not_exists(ccache_path, s3_helper, pr_info.number, temp_path) + get_ccache_if_not_exists(ccache_path, s3_helper, pr_info.number, TEMP_PATH) if not os.path.exists(ccache_path): logging.info("cache was not fetched, will create empty dir") os.makedirs(ccache_path) - if build_config['package_type'] == "performance" and pr_info.number != 0: + if build_config["package_type"] == "performance" and pr_info.number != 0: # because perf tests store some information about git commits - subprocess.check_call(f"cd {repo_path} && git fetch origin master:master", shell=True) + subprocess.check_call( + f"cd {REPO_COPY} && git fetch origin master:master", shell=True + ) - packager_cmd = get_packager_cmd(build_config, os.path.join(repo_path, "docker/packager"), build_output_path, version.get_version_string(), image_version, ccache_path, pr_info) + packager_cmd = get_packager_cmd( + build_config, + os.path.join(REPO_COPY, "docker/packager"), + build_output_path, + version.get_version_string(), + image_version, + ccache_path, + pr_info, + ) logging.info("Going to run packager with %s", packager_cmd) - build_clickhouse_log = os.path.join(temp_path, "build_log") + build_clickhouse_log = os.path.join(TEMP_PATH, "build_log") if not os.path.exists(build_clickhouse_log): os.makedirs(build_clickhouse_log) start = time.time() - log_path, success = build_clickhouse(packager_cmd, build_clickhouse_log, build_output_path) + log_path, success = build_clickhouse( + packager_cmd, build_clickhouse_log, build_output_path + ) elapsed = int(time.time() - start) - subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {build_output_path}", shell=True) + subprocess.check_call( + f"sudo chown -R ubuntu:ubuntu {build_output_path}", shell=True + ) subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {ccache_path}", shell=True) logging.info("Build finished with %s, log path %s", success, log_path) - logging.info("Will upload cache") - upload_ccache(ccache_path, s3_helper, pr_info.number, temp_path) + upload_ccache(ccache_path, s3_helper, pr_info.number, TEMP_PATH) if os.path.exists(log_path): - log_url = s3_helper.upload_build_file_to_s3(log_path, s3_path_prefix + "/" + os.path.basename(log_path)) + log_url = s3_helper.upload_build_file_to_s3( + log_path, s3_path_prefix + "/" + os.path.basename(log_path) + ) logging.info("Log url %s", log_url) else: logging.info("Build log doesn't exist") - build_urls = s3_helper.upload_build_folder_to_s3(build_output_path, s3_path_prefix, keep_dirs_in_s3_path=False, upload_symlinks=False) + build_urls = s3_helper.upload_build_folder_to_s3( + build_output_path, + s3_path_prefix, + keep_dirs_in_s3_path=False, + upload_symlinks=False, + ) logging.info("Got build URLs %s", build_urls) - print("::notice ::Build URLs: {}".format('\n'.join(build_urls))) + print("::notice ::Build URLs: {}".format("\n".join(build_urls))) print("::notice ::Log URL: {}".format(log_url)) - create_json_artifact(temp_path, build_name, log_url, build_urls, build_config, elapsed, success) + create_json_artifact( + TEMP_PATH, build_name, log_url, build_urls, build_config, elapsed, success + ) + + upload_master_static_binaries(pr_info, build_config, s3_helper, build_output_path) # Fail build job if not successeded if not success: sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index d5f8757ffdf..70fdf06d40b 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1,5 +1,10 @@ #!/usr/bin/env python3 +from typing import Dict, TypeVar + +ConfValue = TypeVar("ConfValue", str, bool) +BuildConfig = Dict[str, ConfValue] + CI_CONFIG = { "build_config": { "package_release": { @@ -99,6 +104,7 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", + "static_binary_name": "amd64", "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", @@ -109,6 +115,7 @@ CI_CONFIG = { "build_type": "debug", "sanitizer": "", "package_type": "binary", + "static_binary_name": "debug-amd64", "bundled": "bundled", "splitted": "unsplitted", "tidy": "enable", @@ -129,6 +136,7 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", + "static_binary_name": "macos", "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", @@ -139,6 +147,7 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", + "static_binary_name": "aarch64", "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", @@ -149,6 +158,7 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", + "static_binary_name": "freebsd", "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", @@ -159,6 +169,7 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", + "static_binary_name": "macos-aarch64", "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", @@ -198,6 +209,8 @@ CI_CONFIG = { ], }, "tests_config": { + # required_build - build name for artifacts + # force_tests - force success status for tests "Stateful tests (address, actions)": { "required_build": "package_asan", }, @@ -216,6 +229,10 @@ CI_CONFIG = { "Stateful tests (release, actions)": { "required_build": "package_release", }, + "Stateful tests (aarch64, actions)": { + "required_build": "package_aarch64", + "force_tests": True, + }, "Stateful tests (release, DatabaseOrdinary, actions)": { "required_build": "package_release", }, @@ -240,6 +257,10 @@ CI_CONFIG = { "Stateless tests (release, actions)": { "required_build": "package_release", }, + "Stateless tests (aarch64, actions)": { + "required_build": "package_aarch64", + "force_tests": True, + }, "Stateless tests (release, wide parts enabled, actions)": { "required_build": "package_release", }, @@ -334,4 +355,4 @@ CI_CONFIG = { "required_build": "performance", }, }, -} +} # type: dict diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 58fd8c4aece..0d8aee552f5 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -3,7 +3,7 @@ import time import logging import json -import requests +import requests # type: ignore from get_robot_token import get_parameter_from_ssm class ClickHouseHelper: diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 8396303c5a3..dd57f742ff1 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -2,10 +2,17 @@ import time from env_helper import GITHUB_REPOSITORY +from ci_config import CI_CONFIG RETRY = 5 +def override_status(status, check_name): + if CI_CONFIG["tests_config"][check_name].get("force_tests", False): + return "success" + return status + + def get_commit(gh, commit_sha, retry_count=RETRY): for i in range(retry_count): try: @@ -25,7 +32,12 @@ def post_commit_status(gh, sha, check_name, description, state, report_url): for i in range(RETRY): try: commit = get_commit(gh, sha, 1) - commit.create_status(context=check_name, description=description, state=state, target_url=report_url) + commit.create_status( + context=check_name, + description=description, + state=state, + target_url=report_url, + ) break except Exception as ex: if i == RETRY - 1: diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index e389d612f44..d698d18a58b 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -1,10 +1,13 @@ #!/usr/bin/env python3 -import subprocess -import logging +import argparse import json +import logging import os -import time import shutil +import subprocess +import time +from typing import List, Tuple + from github import Github from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP @@ -12,40 +15,52 @@ from s3_helper import S3Helper from pr_info import PRInfo from get_robot_token import get_best_robot_token, get_parameter_from_ssm from upload_result_helper import upload_results -from commit_status_helper import get_commit +from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch NAME = "Push to Dockerhub (actions)" -def get_changed_docker_images(pr_info, repo_path, image_file_path): +TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check") + + +def get_changed_docker_images( + pr_info: PRInfo, repo_path: str, image_file_path: str +) -> List[Tuple[str, str]]: images_dict = {} path_to_images_file = os.path.join(repo_path, image_file_path) if os.path.exists(path_to_images_file): - with open(path_to_images_file, 'r') as dict_file: + with open(path_to_images_file, "r") as dict_file: images_dict = json.load(dict_file) else: - logging.info("Image file %s doesnt exists in repo %s", image_file_path, repo_path) + logging.info( + "Image file %s doesnt exists in repo %s", image_file_path, repo_path + ) - dockerhub_repo_name = 'yandex' if not images_dict: - return [], dockerhub_repo_name + return [] files_changed = pr_info.changed_files - logging.info("Changed files for PR %s @ %s: %s", pr_info.number, pr_info.sha, str(files_changed)) + logging.info( + "Changed files for PR %s @ %s: %s", + pr_info.number, + pr_info.sha, + str(files_changed), + ) changed_images = [] for dockerfile_dir, image_description in images_dict.items(): - if image_description['name'].startswith('clickhouse/'): - dockerhub_repo_name = 'clickhouse' - for f in files_changed: if f.startswith(dockerfile_dir): logging.info( - "Found changed file '%s' which affects docker image '%s' with path '%s'", - f, image_description['name'], dockerfile_dir) + "Found changed file '%s' which affects " + "docker image '%s' with path '%s'", + f, + image_description["name"], + dockerfile_dir, + ) changed_images.append(dockerfile_dir) break @@ -54,15 +69,20 @@ def get_changed_docker_images(pr_info, repo_path, image_file_path): index = 0 while index < len(changed_images): image = changed_images[index] - for dependent in images_dict[image]['dependent']: + for dependent in images_dict[image]["dependent"]: logging.info( - "Marking docker image '%s' as changed because it depends on changed docker image '%s'", - dependent, image) + "Marking docker image '%s' as changed because it " + "depends on changed docker image '%s'", + dependent, + image, + ) changed_images.append(dependent) index += 1 - if index > 100: + if index > 5 * len(images_dict): # Sanity check to prevent infinite loop. - raise RuntimeError("Too many changed docker images, this is a bug." + str(changed_images)) + raise RuntimeError( + f"Too many changed docker images, this is a bug. {changed_images}" + ) # If a dependent image was already in the list because its own files # changed, but then it was added as a dependent of a changed base, we @@ -76,140 +96,248 @@ def get_changed_docker_images(pr_info, repo_path, image_file_path): seen.add(x) no_dups_reversed.append(x) - result = [(x, images_dict[x]['name']) for x in reversed(no_dups_reversed)] - logging.info("Changed docker images for PR %s @ %s: '%s'", pr_info.number, pr_info.sha, result) - return result, dockerhub_repo_name + result = [(x, images_dict[x]["name"]) for x in reversed(no_dups_reversed)] + logging.info( + "Changed docker images for PR %s @ %s: '%s'", + pr_info.number, + pr_info.sha, + result, + ) + return result -def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_string): - logging.info("Building docker image %s with version %s from path %s", image_name, version_string, path_to_dockerfile_folder) - build_log = None - push_log = None - with open('build_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: - cmd = "docker build --network=host -t {im}:{ver} {path}".format(im=image_name, ver=version_string, path=path_to_dockerfile_folder) - retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() - build_log = str(pl.name) + +def build_and_push_one_image( + path_to_dockerfile_folder: str, image_name: str, version_string: str, push: bool +) -> Tuple[bool, str]: + path = path_to_dockerfile_folder + logging.info( + "Building docker image %s with version %s from path %s", + image_name, + version_string, + path, + ) + build_log = os.path.join( + TEMP_PATH, + "build_and_push_log_{}_{}".format( + str(image_name).replace("/", "_"), version_string + ), + ) + push_arg = "" + if push: + push_arg = "--push " + + with open(build_log, "w") as bl: + cmd = ( + "docker buildx build --builder default " + f"--build-arg FROM_TAG={version_string} " + f"--build-arg BUILDKIT_INLINE_CACHE=1 " + f"--tag {image_name}:{version_string} " + f"--cache-from type=registry,ref={image_name}:{version_string} " + f"{push_arg}" + f"--progress plain {path}" + ) + logging.info("Docker command to run: %s", cmd) + retcode = subprocess.Popen(cmd, shell=True, stderr=bl, stdout=bl).wait() if retcode != 0: - return False, build_log, None - - with open('tag_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: - cmd = "docker build --network=host -t {im} {path}".format(im=image_name, path=path_to_dockerfile_folder) - retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() - build_log = str(pl.name) - if retcode != 0: - return False, build_log, None - - logging.info("Pushing image %s to dockerhub", image_name) - - with open('push_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: - cmd = "docker push {im}:{ver}".format(im=image_name, ver=version_string) - retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() - push_log = str(pl.name) - if retcode != 0: - return False, build_log, push_log + return False, build_log logging.info("Processing of %s successfully finished", image_name) - return True, build_log, push_log + return True, build_log -def process_single_image(versions, path_to_dockerfile_folder, image_name): - logging.info("Image will be pushed with versions %s", ', '.join(versions)) + +def process_single_image( + versions: List[str], path_to_dockerfile_folder: str, image_name: str, push: bool +) -> List[Tuple[str, str, str]]: + logging.info("Image will be pushed with versions %s", ", ".join(versions)) result = [] for ver in versions: for i in range(5): - success, build_log, push_log = build_and_push_one_image(path_to_dockerfile_folder, image_name, ver) + success, build_log = build_and_push_one_image( + path_to_dockerfile_folder, image_name, ver, push + ) if success: - result.append((image_name + ":" + ver, build_log, push_log, 'OK')) + result.append((image_name + ":" + ver, build_log, "OK")) break - logging.info("Got error will retry %s time and sleep for %s seconds", i, i * 5) + logging.info( + "Got error will retry %s time and sleep for %s seconds", i, i * 5 + ) time.sleep(i * 5) else: - result.append((image_name + ":" + ver, build_log, push_log, 'FAIL')) + result.append((image_name + ":" + ver, build_log, "FAIL")) logging.info("Processing finished") return result -def process_test_results(s3_client, test_results, s3_path_prefix): - overall_status = 'success' +def process_test_results( + s3_client: S3Helper, test_results: List[Tuple[str, str, str]], s3_path_prefix: str +) -> Tuple[str, List[Tuple[str, str]]]: + overall_status = "success" processed_test_results = [] - for image, build_log, push_log, status in test_results: - if status != 'OK': - overall_status = 'failure' - url_part = '' + for image, build_log, status in test_results: + if status != "OK": + overall_status = "failure" + url_part = "" if build_log is not None and os.path.exists(build_log): build_url = s3_client.upload_test_report_to_s3( - build_log, - s3_path_prefix + "/" + os.path.basename(build_log)) + build_log, s3_path_prefix + "/" + os.path.basename(build_log) + ) url_part += 'build_log'.format(build_url) - if push_log is not None and os.path.exists(push_log): - push_url = s3_client.upload_test_report_to_s3( - push_log, - s3_path_prefix + "/" + os.path.basename(push_log)) - if url_part: - url_part += ', ' - url_part += 'push_log'.format(push_url) if url_part: - test_name = image + ' (' + url_part + ')' + test_name = image + " (" + url_part + ")" else: test_name = image processed_test_results.append((test_name, status)) return overall_status, processed_test_results -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Program to build changed or given docker images with all " + "dependant images. Example for local running: " + "python docker_images_check.py --no-push-images --no-reports " + "--image-path docker/packager/binary", + ) + + parser.add_argument( + "--suffix", + type=str, + help="suffix for all built images tags and resulting json file; the parameter " + "significantly changes the script behavior, e.g. changed_images.json is called " + "changed_images_{suffix}.json and contains list of all tags", + ) + parser.add_argument( + "--repo", + type=str, + default="clickhouse", + help="docker hub repository prefix", + ) + parser.add_argument( + "--image-path", + type=str, + action="append", + help="list of image paths to build instead of using pr_info + diff URL, " + "e.g. 'docker/packager/binary'", + ) + parser.add_argument( + "--no-reports", + action="store_true", + help="don't push reports to S3 and github", + ) + parser.add_argument( + "--no-push-images", + action="store_true", + help="don't push images to docker hub", + ) + + return parser.parse_args() + + +def main(): + logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() + args = parse_args() + if args.suffix: + global NAME + NAME += f" {args.suffix}" + changed_json = os.path.join(TEMP_PATH, f"changed_images_{args.suffix}.json") + else: + changed_json = os.path.join(TEMP_PATH, "changed_images.json") + + push = not args.no_push_images + if push: + subprocess.check_output( # pylint: disable=unexpected-keyword-arg + "docker login --username 'robotclickhouse' --password-stdin", + input=get_parameter_from_ssm("dockerhub_robot_password"), + encoding="utf-8", + shell=True, + ) + repo_path = GITHUB_WORKSPACE - temp_path = os.path.join(RUNNER_TEMP, 'docker_images_check') - dockerhub_password = get_parameter_from_ssm('dockerhub_robot_password') - if os.path.exists(temp_path): - shutil.rmtree(temp_path) + if os.path.exists(TEMP_PATH): + shutil.rmtree(TEMP_PATH) + os.makedirs(TEMP_PATH) - if not os.path.exists(temp_path): - os.makedirs(temp_path) + if args.image_path: + pr_info = PRInfo() + pr_info.changed_files = set(i for i in args.image_path) + else: + pr_info = PRInfo(need_changed_files=True) - pr_info = PRInfo(need_changed_files=True) - changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") - logging.info("Has changed images %s", ', '.join([str(image[0]) for image in changed_images])) - pr_commit_version = str(pr_info.number) + '-' + pr_info.sha + changed_images = get_changed_docker_images(pr_info, repo_path, "docker/images.json") + logging.info( + "Has changed images %s", ", ".join([str(image[0]) for image in changed_images]) + ) + pr_commit_version = str(pr_info.number) + "-" + pr_info.sha + # The order is important, PR number is used as cache during the build versions = [str(pr_info.number), pr_commit_version] + result_version = pr_commit_version if pr_info.number == 0: - versions.append("latest") + # First get the latest for cache + versions.insert(0, "latest") - subprocess.check_output("docker login --username 'robotclickhouse' --password '{}'".format(dockerhub_password), shell=True) + if args.suffix: + # We should build architecture specific images separately and merge a + # manifest lately in a different script + versions = [f"{v}-{args.suffix}" for v in versions] + # changed_images_{suffix}.json should contain all changed images + result_version = versions result_images = {} images_processing_result = [] for rel_path, image_name in changed_images: full_path = os.path.join(repo_path, rel_path) - images_processing_result += process_single_image(versions, full_path, image_name) - result_images[image_name] = pr_commit_version + images_processing_result += process_single_image( + versions, full_path, image_name, push + ) + result_images[image_name] = result_version if changed_images: - description = "Updated " + ','.join([im[1] for im in changed_images]) + description = "Updated " + ",".join([im[1] for im in changed_images]) else: description = "Nothing to update" if len(description) >= 140: description = description[:136] + "..." - s3_helper = S3Helper('https://s3.amazonaws.com') - - s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') - status, test_results = process_test_results(s3_helper, images_processing_result, s3_path_prefix) - - ch_helper = ClickHouseHelper() - url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) - - with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: + with open(changed_json, "w") as images_file: json.dump(result_images, images_file) - print("::notice ::Report url: {}".format(url)) - print("::set-output name=url_output::\"{}\"".format(url)) - gh = Github(get_best_robot_token()) - commit = get_commit(gh, pr_info.sha) - commit.create_status(context=NAME, description=description, state=status, target_url=url) + s3_helper = S3Helper("https://s3.amazonaws.com") - prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, status, stopwatch.duration_seconds, stopwatch.start_time_str, url, NAME) + s3_path_prefix = ( + str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(" ", "_") + ) + status, test_results = process_test_results( + s3_helper, images_processing_result, s3_path_prefix + ) + + url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) + + print("::notice ::Report url: {}".format(url)) + print('::set-output name=url_output::"{}"'.format(url)) + + if args.no_reports: + return + + gh = Github(get_best_robot_token()) + post_commit_status(gh, pr_info.sha, NAME, description, status, url) + + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + test_results, + status, + stopwatch.duration_seconds, + stopwatch.start_time_str, + url, + NAME, + ) + ch_helper = ClickHouseHelper() ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py new file mode 100644 index 00000000000..c6814b911ff --- /dev/null +++ b/tests/ci/docker_manifests_merge.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 + +import argparse +import json +import logging +import os +import subprocess + +from typing import List, Dict, Tuple +from github import Github + +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from commit_status_helper import post_commit_status +from env_helper import RUNNER_TEMP +from get_robot_token import get_best_robot_token, get_parameter_from_ssm +from pr_info import PRInfo +from s3_helper import S3Helper +from stopwatch import Stopwatch +from upload_result_helper import upload_results + +NAME = "Push multi-arch images to Dockerhub (actions)" +CHANGED_IMAGES = "changed_images_{}.json" +Images = Dict[str, List[str]] + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="The program gets images from changed_images_*.json, merges imeges " + "with different architectures into one manifest and pushes back to docker hub", + ) + + parser.add_argument( + "--suffix", + dest="suffixes", + type=str, + required=True, + action="append", + help="suffixes for existing images' tags. More than two should be given", + ) + parser.add_argument( + "--path", + type=str, + default=RUNNER_TEMP, + help="path to changed_images_*.json files", + ) + parser.add_argument( + "--no-reports", + action="store_true", + help="don't push reports to S3 and github", + ) + parser.add_argument( + "--no-push-images", + action="store_true", + help="don't push images to docker hub", + ) + + args = parser.parse_args() + if len(args.suffixes) < 2: + raise parser.error("more than two --suffix should be given") + + return args + + +def load_images(path: str, suffix: str) -> Images: + with open(os.path.join(path, CHANGED_IMAGES.format(suffix)), "r") as images: + return json.load(images) + + +def strip_suffix(suffix: str, images: Images) -> Images: + result = {} + for image, versions in images.items(): + for v in versions: + if not v.endswith(f"-{suffix}"): + raise ValueError( + f"version {image}:{v} does not contain suffix {suffix}" + ) + result[image] = [v[: -len(suffix) - 1] for v in versions] + + return result + + +def check_sources(to_merge: Dict[str, Images]) -> Images: + result = {} # type: Images + first_suffix = "" + for suffix, images in to_merge.items(): + if not result: + first_suffix = suffix + result = strip_suffix(suffix, images) + continue + if not result == strip_suffix(suffix, images): + raise ValueError( + f"images in {images} are not equal to {to_merge[first_suffix]}" + ) + + return result + + +def get_changed_images(images: Images) -> Dict[str, str]: + """The original json format is {"image": "tag"}, so the output artifact is + produced here. The latest version is {PR_NUMBER}-{SHA1} + """ + return {k: v[-1] for k, v in images.items()} + + +def merge_images(to_merge: Dict[str, Images]) -> Dict[str, List[List[str]]]: + """The function merges image-name:version-suffix1 and image-name:version-suffix2 + into image-name:version""" + suffixes = to_merge.keys() + result_images = check_sources(to_merge) + merge = {} # type: Dict[str, List[List[str]]] + + for image, versions in result_images.items(): + merge[image] = [] + for i, v in enumerate(versions): + merged_v = [v] # type: List[str] + for suf in suffixes: + merged_v.append(to_merge[suf][image][i]) + merge[image].append(merged_v) + + return merge + + +def create_manifest(image: str, tags: List[str], push: bool) -> Tuple[str, str]: + tag = tags[0] + manifest = f"{image}:{tag}" + cmd = "docker manifest create --amend {}".format( + " ".join((f"{image}:{t}" for t in tags)) + ) + logging.info("running: %s", cmd) + popen = subprocess.Popen( + cmd, + shell=True, + stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + universal_newlines=True, + ) + retcode = popen.wait() + if retcode != 0: + output = popen.stdout.read() # type: ignore + logging.error("failed to create manifest for %s:\n %s\n", manifest, output) + return manifest, "FAIL" + if not push: + return manifest, "OK" + + cmd = f"docker manifest push {manifest}" + logging.info("running: %s", cmd) + popen = subprocess.Popen( + cmd, + shell=True, + stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + universal_newlines=True, + ) + retcode = popen.wait() + if retcode != 0: + output = popen.stdout.read() # type: ignore + logging.error("failed to push %s:\n %s\n", manifest, output) + return manifest, "FAIL" + + return manifest, "OK" + + +def main(): + logging.basicConfig(level=logging.INFO) + stopwatch = Stopwatch() + + args = parse_args() + push = not args.no_push_images + if push: + subprocess.check_output( # pylint: disable=unexpected-keyword-arg + "docker login --username 'robotclickhouse' --password-stdin", + input=get_parameter_from_ssm("dockerhub_robot_password"), + encoding="utf-8", + shell=True, + ) + + to_merge = {} + for suf in args.suffixes: + to_merge[suf] = load_images(args.path, suf) + + changed_images = get_changed_images(check_sources(to_merge)) + + os.environ["DOCKER_CLI_EXPERIMENTAL"] = "enabled" + merged = merge_images(to_merge) + + status = "success" + test_results = [] # type: List[Tuple[str, str]] + for image, versions in merged.items(): + for tags in versions: + manifest, test_result = create_manifest(image, tags, push) + test_results.append((manifest, test_result)) + if test_result != "OK": + status = "failure" + + with open(os.path.join(args.path, "changed_images.json"), "w") as ci: + json.dump(changed_images, ci) + + pr_info = PRInfo() + s3_helper = S3Helper("https://s3.amazonaws.com") + + url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) + + print("::notice ::Report url: {}".format(url)) + print('::set-output name=url_output::"{}"'.format(url)) + + if args.no_reports: + return + + if changed_images: + description = "Updated " + ", ".join(changed_images.keys()) + else: + description = "Nothing to update" + + if len(description) >= 140: + description = description[:136] + "..." + + gh = Github(get_best_robot_token()) + post_commit_status(gh, pr_info.sha, NAME, description, status, url) + + prepared_events = prepare_tests_results_for_clickhouse( + pr_info, + test_results, + status, + stopwatch.duration_seconds, + stopwatch.start_time_str, + url, + NAME, + ) + ch_helper = ClickHouseHelper() + ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 4419ba1c920..7220b86a482 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -15,7 +15,7 @@ from pr_info import PRInfo from build_download_helper import download_all_deb_packages from upload_result_helper import upload_results from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status, get_commit +from commit_status_helper import post_commit_status, get_commit, override_status from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch from rerun_helper import RerunHelper @@ -197,7 +197,9 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) s3_helper = S3Helper('https://s3.amazonaws.com') + state, description, test_results, additional_logs = process_results(result_path, server_log_path) + state = override_status(state, check_name) ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py index db37ee311c5..fae277fe319 100644 --- a/tests/ci/get_robot_token.py +++ b/tests/ci/get_robot_token.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import boto3 -from github import Github +import boto3 # type: ignore +from github import Github # type: ignore def get_parameter_from_ssm(name, decrypt=True, client=None): if not client: diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/metrics_lambda/app.py index dff0a7d715e..8e06771433d 100644 --- a/tests/ci/metrics_lambda/app.py +++ b/tests/ci/metrics_lambda/app.py @@ -9,20 +9,23 @@ import time from collections import namedtuple import boto3 + def get_dead_runners_in_ec2(runners): - ids = {runner.name: runner for runner in runners if runner.offline == True and runner.busy == False} + ids = { + runner.name: runner for runner in runners if runner.offline and not runner.busy + } if not ids: return [] - client = boto3.client('ec2') + client = boto3.client("ec2") print("Checking ids", list(ids.keys())) instances_statuses = client.describe_instance_status(InstanceIds=list(ids.keys())) found_instances = set([]) print("Response", instances_statuses) - for instance_status in instances_statuses['InstanceStatuses']: - if instance_status['InstanceState']['Name'] in ('pending', 'running'): - found_instances.add(instance_status['InstanceId']) + for instance_status in instances_statuses["InstanceStatuses"]: + if instance_status["InstanceState"]["Name"] in ("pending", "running"): + found_instances.add(instance_status["InstanceId"]) print("Found instances", found_instances) result_to_delete = [] @@ -32,23 +35,25 @@ def get_dead_runners_in_ec2(runners): result_to_delete.append(runner) return result_to_delete + def get_key_and_app_from_aws(): import boto3 + secret_name = "clickhouse_github_secret_key" session = boto3.session.Session() client = session.client( - service_name='secretsmanager', + service_name="secretsmanager", ) - get_secret_value_response = client.get_secret_value( - SecretId=secret_name - ) - data = json.loads(get_secret_value_response['SecretString']) - return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + get_secret_value_response = client.get_secret_value(SecretId=secret_name) + data = json.loads(get_secret_value_response["SecretString"]) + return data["clickhouse-app-key"], int(data["clickhouse-app-id"]) + def handler(event, context): private_key, app_id = get_key_and_app_from_aws() main(private_key, app_id, True, True) + def get_installation_id(jwt_token): headers = { "Authorization": f"Bearer {jwt_token}", @@ -57,54 +62,81 @@ def get_installation_id(jwt_token): response = requests.get("https://api.github.com/app/installations", headers=headers) response.raise_for_status() data = response.json() - return data[0]['id'] + return data[0]["id"] + def get_access_token(jwt_token, installation_id): headers = { "Authorization": f"Bearer {jwt_token}", "Accept": "application/vnd.github.v3+json", } - response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response = requests.post( + f"https://api.github.com/app/installations/{installation_id}/access_tokens", + headers=headers, + ) response.raise_for_status() data = response.json() - return data['token'] + return data["token"] -RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy']) +RunnerDescription = namedtuple( + "RunnerDescription", ["id", "name", "tags", "offline", "busy"] +) + def list_runners(access_token): headers = { "Authorization": f"token {access_token}", "Accept": "application/vnd.github.v3+json", } - response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100", headers=headers) + response = requests.get( + "https://api.github.com/orgs/ClickHouse/actions/runners?per_page=100", + headers=headers, + ) response.raise_for_status() data = response.json() - total_runners = data['total_count'] - runners = data['runners'] + total_runners = data["total_count"] + runners = data["runners"] total_pages = int(total_runners / 100 + 1) print("Total pages", total_pages) for i in range(2, total_pages + 1): - response = requests.get(f"https://api.github.com/orgs/ClickHouse/actions/runners?page={i}&per_page=100", headers=headers) + response = requests.get( + "https://api.github.com/orgs/ClickHouse/actions/runners" + f"?page={i}&per_page=100", + headers=headers, + ) response.raise_for_status() data = response.json() - runners += data['runners'] + runners += data["runners"] print("Total runners", len(runners)) result = [] for runner in runners: - tags = [tag['name'] for tag in runner['labels']] - desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, - offline=runner['status']=='offline', busy=runner['busy']) + tags = [tag["name"] for tag in runner["labels"]] + desc = RunnerDescription( + id=runner["id"], + name=runner["name"], + tags=tags, + offline=runner["status"] == "offline", + busy=runner["busy"], + ) result.append(desc) return result + def group_runners_by_tag(listed_runners): result = {} - RUNNER_TYPE_LABELS = ['style-checker', 'builder', 'func-tester', 'stress-tester', 'fuzzer-unit-tester'] + RUNNER_TYPE_LABELS = [ + "builder", + "func-tester", + "func-tester-aarch64", + "fuzzer-unit-tester", + "stress-tester", + "style-checker", + ] for runner in listed_runners: for tag in runner.tags: if tag in RUNNER_TYPE_LABELS: @@ -113,57 +145,72 @@ def group_runners_by_tag(listed_runners): result[tag].append(runner) break else: - if 'unlabeled' not in result: - result['unlabeled'] = [] - result['unlabeled'].append(runner) + if "unlabeled" not in result: + result["unlabeled"] = [] + result["unlabeled"].append(runner) return result def push_metrics_to_cloudwatch(listed_runners, namespace): - client = boto3.client('cloudwatch') + client = boto3.client("cloudwatch") metrics_data = [] - busy_runners = sum(1 for runner in listed_runners if runner.busy and not runner.offline) - metrics_data.append({ - 'MetricName': 'BusyRunners', - 'Value': busy_runners, - 'Unit': 'Count', - }) + busy_runners = sum( + 1 for runner in listed_runners if runner.busy and not runner.offline + ) + metrics_data.append( + { + "MetricName": "BusyRunners", + "Value": busy_runners, + "Unit": "Count", + } + ) total_active_runners = sum(1 for runner in listed_runners if not runner.offline) - metrics_data.append({ - 'MetricName': 'ActiveRunners', - 'Value': total_active_runners, - 'Unit': 'Count', - }) + metrics_data.append( + { + "MetricName": "ActiveRunners", + "Value": total_active_runners, + "Unit": "Count", + } + ) total_runners = len(listed_runners) - metrics_data.append({ - 'MetricName': 'TotalRunners', - 'Value': total_runners, - 'Unit': 'Count', - }) + metrics_data.append( + { + "MetricName": "TotalRunners", + "Value": total_runners, + "Unit": "Count", + } + ) if total_active_runners == 0: busy_ratio = 100 else: busy_ratio = busy_runners / total_active_runners * 100 - metrics_data.append({ - 'MetricName': 'BusyRunnersRatio', - 'Value': busy_ratio, - 'Unit': 'Percent', - }) + metrics_data.append( + { + "MetricName": "BusyRunnersRatio", + "Value": busy_ratio, + "Unit": "Percent", + } + ) client.put_metric_data(Namespace=namespace, MetricData=metrics_data) + def delete_runner(access_token, runner): headers = { "Authorization": f"token {access_token}", "Accept": "application/vnd.github.v3+json", } - response = requests.delete(f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers) + response = requests.delete( + f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", + headers=headers, + ) response.raise_for_status() print(f"Response code deleting {runner.name} is {response.status_code}") return response.status_code == 204 + def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_runners): payload = { "iat": int(time.time()) - 60, @@ -179,11 +226,11 @@ def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_ru for group, group_runners in grouped_runners.items(): if push_to_cloudwatch: print(group) - push_metrics_to_cloudwatch(group_runners, 'RunnersMetrics/' + group) + push_metrics_to_cloudwatch(group_runners, "RunnersMetrics/" + group) else: print(group, f"({len(group_runners)})") for runner in group_runners: - print('\t', runner) + print("\t", runner) if delete_offline_runners: print("Going to delete offline runners") @@ -192,26 +239,43 @@ def main(github_secret_key, github_app_id, push_to_cloudwatch, delete_offline_ru print("Deleting runner", runner) delete_runner(access_token, runner) + if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Get list of runners and their states') - parser.add_argument('-p', '--private-key-path', help='Path to file with private key') - parser.add_argument('-k', '--private-key', help='Private key') - parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) - parser.add_argument('--push-to-cloudwatch', action='store_true', help='Store received token in parameter store') - parser.add_argument('--delete-offline', action='store_true', help='Remove offline runners') + parser = argparse.ArgumentParser(description="Get list of runners and their states") + parser.add_argument( + "-p", "--private-key-path", help="Path to file with private key" + ) + parser.add_argument("-k", "--private-key", help="Private key") + parser.add_argument( + "-a", "--app-id", type=int, help="GitHub application ID", required=True + ) + parser.add_argument( + "--push-to-cloudwatch", + action="store_true", + help="Store received token in parameter store", + ) + parser.add_argument( + "--delete-offline", action="store_true", help="Remove offline runners" + ) args = parser.parse_args() if not args.private_key_path and not args.private_key: - print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + print( + "Either --private-key-path or --private-key must be specified", + file=sys.stderr, + ) if args.private_key_path and args.private_key: - print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + print( + "Either --private-key-path or --private-key must be specified", + file=sys.stderr, + ) if args.private_key: private_key = args.private_key else: - with open(args.private_key_path, 'r') as key_file: + with open(args.private_key_path, "r") as key_file: private_key = key_file.read() main(private_key, args.app_id, args.push_to_cloudwatch, args.delete_offline) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 48464439dbc..a155786d815 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -2,8 +2,8 @@ import json import os -import requests -from unidiff import PatchSet +import requests # type: ignore +from unidiff import PatchSet # type: ignore from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL, GITHUB_RUN_ID, GITHUB_EVENT_PATH @@ -38,9 +38,18 @@ class PRInfo: with open(GITHUB_EVENT_PATH, 'r', encoding='utf-8') as event_file: github_event = json.load(event_file) else: - github_event = {'commits': 1, 'after': 'HEAD', 'ref': None} + github_event = { + 'commits': 1, + 'before': 'HEAD~', + 'after': 'HEAD', + 'ref': None, + } self.event = github_event self.changed_files = set([]) + self.body = "" + ref = github_event.get("ref", "refs/head/master") + if ref.startswith('refs/heads/'): + ref = ref[11:] # workflow completed event, used for PRs only if 'action' in github_event and github_event['action'] == 'completed': @@ -67,6 +76,7 @@ class PRInfo: self.base_name = github_event['pull_request']['base']['repo']['full_name'] self.head_ref = github_event['pull_request']['head']['ref'] self.head_name = github_event['pull_request']['head']['repo']['full_name'] + self.body = github_event['pull_request']['body'] if labels_from_api: response = requests.get(f"https://api.github.com/repos/{GITHUB_REPOSITORY}/issues/{self.number}/labels") @@ -90,13 +100,14 @@ class PRInfo: self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.repo_full_name = GITHUB_REPOSITORY - if pull_request is None or pull_request['state'] == 'closed': # it's merged PR to master + if pull_request is None or pull_request['state'] == 'closed': + # it's merged PR to master self.number = 0 self.labels = {} - self.pr_html_url = f"{repo_prefix}/commits/master" - self.base_ref = "master" + self.pr_html_url = f"{repo_prefix}/commits/{ref}" + self.base_ref = ref self.base_name = self.repo_full_name - self.head_ref = "master" + self.head_ref = ref self.head_name = self.repo_full_name self.diff_url = \ f"https://api.github.com/repos/{GITHUB_REPOSITORY}/compare/{github_event['before']}...{self.sha}" @@ -126,10 +137,10 @@ class PRInfo: self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.repo_full_name = GITHUB_REPOSITORY - self.pr_html_url = f"{repo_prefix}/commits/master" - self.base_ref = "master" + self.pr_html_url = f"{repo_prefix}/commits/{ref}" + self.base_ref = ref self.base_name = self.repo_full_name - self.head_ref = "master" + self.head_ref = ref self.head_name = self.repo_full_name if need_changed_files: diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index c7156dbef26..78c1f35031a 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import sys import logging +import re from github import Github from env_helper import GITHUB_RUN_ID, GITHUB_REPOSITORY, GITHUB_SERVER_URL @@ -8,10 +9,10 @@ from pr_info import PRInfo from get_robot_token import get_best_robot_token from commit_status_helper import get_commit -NAME = 'Run Check (actions)' +NAME = "Run Check (actions)" TRUSTED_ORG_IDS = { - 7409213, # yandex + 7409213, # yandex 28471076, # altinity 54801242, # clickhouse } @@ -22,55 +23,58 @@ DO_NOT_TEST_LABEL = "do not test" # Individual trusted contirbutors who are not in any trusted organization. # Can be changed in runtime: we will append users that we learned to be in # a trusted org, to save GitHub API calls. -TRUSTED_CONTRIBUTORS = {e.lower() for e in [ - "achimbab", - "adevyatova ", # DOCSUP - "Algunenano", # Raúl Marín, Tinybird - "AnaUvarova", # DOCSUP - "anauvarova", # technical writer, Yandex - "annvsh", # technical writer, Yandex - "atereh", # DOCSUP - "azat", - "bharatnc", # Newbie, but already with many contributions. - "bobrik", # Seasoned contributor, CloundFlare - "BohuTANG", - "codyrobert", # Flickerbox engineer - "cwurm", # Employee - "damozhaeva", # DOCSUP - "den-crane", - "flickerbox-tom", # Flickerbox - "gyuton", # technical writer, Yandex - "hagen1778", # Roman Khavronenko, seasoned contributor - "hczhcz", - "hexiaoting", # Seasoned contributor - "ildus", # adjust, ex-pgpro - "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto - "ka1bi4", # DOCSUP - "kirillikoff", # DOCSUP - "kitaisreal", # Seasoned contributor - "kreuzerkrieg", - "lehasm", # DOCSUP - "michon470", # DOCSUP - "MyroTk", # Tester in Altinity - "myrrc", # Michael Kot, Altinity - "nikvas0", - "nvartolomei", - "olgarev", # DOCSUP - "otrazhenia", # Yandex docs contractor - "pdv-ru", # DOCSUP - "podshumok", # cmake expert from QRator Labs - "s-mx", # Maxim Sabyanin, former employee, present contributor - "sevirov", # technical writer, Yandex - "spongedu", # Seasoned contributor - "ucasFL", # Amos Bird's friend - "vdimir", # Employee - "vzakaznikov", - "YiuRULE", - "zlobober", # Developer of YT - "ilejn", # Arenadata, responsible for Kerberized Kafka - "thomoco", # ClickHouse - "BoloniniD", # Seasoned contributor, HSE -]} +TRUSTED_CONTRIBUTORS = { + e.lower() + for e in [ + "achimbab", + "adevyatova ", # DOCSUP + "Algunenano", # Raúl Marín, Tinybird + "AnaUvarova", # DOCSUP + "anauvarova", # technical writer, Yandex + "annvsh", # technical writer, Yandex + "atereh", # DOCSUP + "azat", + "bharatnc", # Newbie, but already with many contributions. + "bobrik", # Seasoned contributor, CloundFlare + "BohuTANG", + "codyrobert", # Flickerbox engineer + "cwurm", # Employee + "damozhaeva", # DOCSUP + "den-crane", + "flickerbox-tom", # Flickerbox + "gyuton", # technical writer, Yandex + "hagen1778", # Roman Khavronenko, seasoned contributor + "hczhcz", + "hexiaoting", # Seasoned contributor + "ildus", # adjust, ex-pgpro + "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto + "ka1bi4", # DOCSUP + "kirillikoff", # DOCSUP + "kitaisreal", # Seasoned contributor + "kreuzerkrieg", + "lehasm", # DOCSUP + "michon470", # DOCSUP + "MyroTk", # Tester in Altinity + "myrrc", # Michael Kot, Altinity + "nikvas0", + "nvartolomei", + "olgarev", # DOCSUP + "otrazhenia", # Yandex docs contractor + "pdv-ru", # DOCSUP + "podshumok", # cmake expert from QRator Labs + "s-mx", # Maxim Sabyanin, former employee, present contributor + "sevirov", # technical writer, Yandex + "spongedu", # Seasoned contributor + "ucasFL", # Amos Bird's friend + "vdimir", # Employee + "vzakaznikov", + "YiuRULE", + "zlobober", # Developer of YT + "ilejn", # Arenadata, responsible for Kerberized Kafka + "thomoco", # ClickHouse + "BoloniniD", # Seasoned contributor, HSE + ] +} def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): @@ -82,33 +86,120 @@ def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): for org_id in pr_user_orgs: if org_id in TRUSTED_ORG_IDS: - logging.info("Org '%s' is trusted; will mark user %s as trusted", org_id, pr_user_login) + logging.info( + "Org '%s' is trusted; will mark user %s as trusted", + org_id, + pr_user_login, + ) return True logging.info("Org '%s' is not trusted", org_id) return False + # Returns whether we should look into individual checks for this PR. If not, it # can be skipped entirely. def should_run_checks_for_pr(pr_info): # Consider the labels and whether the user is trusted. print("Got labels", pr_info.labels) - force_labels = set(['force tests']).intersection(pr_info.labels) + force_labels = set(["force tests"]).intersection(pr_info.labels) if force_labels: - return True, "Labeled '{}'".format(', '.join(force_labels)) + return True, "Labeled '{}'".format(", ".join(force_labels)) - if 'do not test' in pr_info.labels: + if "do not test" in pr_info.labels: return False, "Labeled 'do not test'" - if 'can be tested' not in pr_info.labels and not pr_is_by_trusted_user(pr_info.user_login, pr_info.user_orgs): + if "can be tested" not in pr_info.labels and not pr_is_by_trusted_user( + pr_info.user_login, pr_info.user_orgs + ): return False, "Needs 'can be tested' label" - if 'release' in pr_info.labels or 'pr-backport' in pr_info.labels or 'pr-cherrypick' in pr_info.labels: + if ( + "release" in pr_info.labels + or "pr-backport" in pr_info.labels + or "pr-cherrypick" in pr_info.labels + ): return False, "Don't try new checks for release/backports/cherry-picks" return True, "No special conditions apply" +def check_pr_description(pr_info): + description = pr_info.body + + lines = list( + map(lambda x: x.strip(), description.split("\n") if description else []) + ) + lines = [re.sub(r"\s+", " ", line) for line in lines] + + category = "" + entry = "" + + i = 0 + while i < len(lines): + if re.match(r"(?i)^[>*_ ]*change\s*log\s*category", lines[i]): + i += 1 + if i >= len(lines): + break + # Can have one empty line between header and the category + # itself. Filter it out. + if not lines[i]: + i += 1 + if i >= len(lines): + break + category = re.sub(r"^[-*\s]*", "", lines[i]) + i += 1 + + # Should not have more than one category. Require empty line + # after the first found category. + if i >= len(lines): + break + if lines[i]: + second_category = re.sub(r"^[-*\s]*", "", lines[i]) + result_status = ( + "More than one changelog category specified: '" + + category + + "', '" + + second_category + + "'" + ) + return result_status[:140] + + elif re.match( + r"(?i)^[>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] + ): + i += 1 + # Can have one empty line between header and the entry itself. + # Filter it out. + if i < len(lines) and not lines[i]: + i += 1 + # All following lines until empty one are the changelog entry. + entry_lines = [] + while i < len(lines) and lines[i]: + entry_lines.append(lines[i]) + i += 1 + entry = " ".join(entry_lines) + # Don't accept changelog entries like '...'. + entry = re.sub(r"[#>*_.\- ]", "", entry) + else: + i += 1 + + if not category: + return "Changelog category is empty" + + # Filter out the PR categories that are not for changelog. + if re.match( + r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)", + category, + ): + return "" + + if not entry: + return f"Changelog entry required for category '{category}'" + + return "" + + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) @@ -116,15 +207,40 @@ if __name__ == "__main__": can_run, description = should_run_checks_for_pr(pr_info) gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) + + description_report = check_pr_description(pr_info)[:139] + if description_report: + print("::notice ::Cannot run, description does not match the template") + url = ( + f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/" + "blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1" + ) + commit.create_status( + context=NAME, + description=description_report, + state="failure", + target_url=url, + ) + sys.exit(1) + url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" if not can_run: print("::notice ::Cannot run") - commit.create_status(context=NAME, description=description, state="failure", target_url=url) + commit.create_status( + context=NAME, description=description, state="failure", target_url=url + ) sys.exit(1) else: - if 'pr-documentation' in pr_info.labels or 'pr-doc-fix' in pr_info.labels: - commit.create_status(context=NAME, description="Skipping checks for documentation", state="success", target_url=url) + if "pr-documentation" in pr_info.labels or "pr-doc-fix" in pr_info.labels: + commit.create_status( + context=NAME, + description="Skipping checks for documentation", + state="success", + target_url=url, + ) print("::notice ::Can run, but it's documentation PR, skipping") else: print("::notice ::Can run") - commit.create_status(context=NAME, description=description, state="pending", target_url=url) + commit.create_status( + context=NAME, description=description, state="pending", target_url=url + ) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py index 753f036a8d7..902b97fdb95 100644 --- a/tests/ci/s3_helper.py +++ b/tests/ci/s3_helper.py @@ -7,7 +7,7 @@ import shutil import time from multiprocessing.dummy import Pool -import boto3 +import boto3 # type: ignore from env_helper import S3_TEST_REPORTS_BUCKET, S3_BUILDS_BUCKET, RUNNER_TEMP, CI from compress_files import compress_file_fast diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index cbb915e6de7..20302dacb97 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -15,11 +15,19 @@ class TeePopen: self.command = command self.log_file = log_file self.env = env + self.process = None def __enter__(self): - # pylint: disable=W0201 - self.process = Popen(self.command, shell=True, universal_newlines=True, env=self.env, stderr=STDOUT, stdout=PIPE, bufsize=1) - self.log_file = open(self.log_file, 'w', encoding='utf-8') + self.process = Popen( + self.command, + shell=True, + universal_newlines=True, + env=self.env, + stderr=STDOUT, + stdout=PIPE, + bufsize=1, + ) + self.log_file = open(self.log_file, "w", encoding="utf-8") return self def __exit__(self, t, value, traceback): diff --git a/tests/ci/worker/ubuntu_ami_for_ci.sh b/tests/ci/worker/ubuntu_ami_for_ci.sh index 3fabbb1f8a4..23d3b18c810 100644 --- a/tests/ci/worker/ubuntu_ami_for_ci.sh +++ b/tests/ci/worker/ubuntu_ami_for_ci.sh @@ -28,6 +28,7 @@ apt-get update apt-get install --yes --no-install-recommends \ apt-transport-https \ + binfmt-support \ build-essential \ ca-certificates \ curl \ @@ -37,6 +38,7 @@ apt-get install --yes --no-install-recommends \ pigz \ python3-dev \ python3-pip \ + qemu-user-static \ unzip curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg @@ -61,6 +63,10 @@ EOT systemctl restart docker +# buildx builder is user-specific +sudo -u ubuntu docker buildx version +sudo -u ubuntu docker buildx create --use --name default-builder + pip install boto3 pygithub requests urllib3 unidiff dohq-artifactory mkdir -p $RUNNER_HOME && cd $RUNNER_HOME diff --git a/tests/integration/helpers/postgres_utility.py b/tests/integration/helpers/postgres_utility.py new file mode 100644 index 00000000000..16461ea3310 --- /dev/null +++ b/tests/integration/helpers/postgres_utility.py @@ -0,0 +1,273 @@ +import psycopg2 +import time +from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT + +postgres_table_template = """ + CREATE TABLE IF NOT EXISTS "{}" ( + key Integer NOT NULL, value Integer, PRIMARY KEY(key)) + """ +postgres_table_template_2 = """ + CREATE TABLE IF NOT EXISTS "{}" ( + key Integer NOT NULL, value1 Integer, value2 Integer, value3 Integer, PRIMARY KEY(key)) + """ +postgres_table_template_3 = """ + CREATE TABLE IF NOT EXISTS "{}" ( + key1 Integer NOT NULL, value1 Integer, key2 Integer NOT NULL, value2 Integer NOT NULL) + """ +postgres_table_template_4 = """ + CREATE TABLE IF NOT EXISTS "{}"."{}" ( + key Integer NOT NULL, value Integer, PRIMARY KEY(key)) + """ +postgres_table_template_5 = """ + CREATE TABLE IF NOT EXISTS "{}" ( + key Integer NOT NULL, value UUID, PRIMARY KEY(key)) + """ + +def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name='postgres_database', replication=False): + if database == True: + conn_string = f"host={ip} port={port} dbname='{database_name}' user='postgres' password='mysecretpassword'" + else: + conn_string = f"host={ip} port={port} user='postgres' password='mysecretpassword'" + + if replication: + conn_string += " replication='database'" + + conn = psycopg2.connect(conn_string) + if auto_commit: + conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + conn.autocommit = True + return conn + +def create_replication_slot(conn, slot_name='user_slot'): + cursor = conn.cursor() + cursor.execute(f'CREATE_REPLICATION_SLOT {slot_name} LOGICAL pgoutput EXPORT_SNAPSHOT') + result = cursor.fetchall() + print(result[0][0]) # slot name + print(result[0][1]) # start lsn + print(result[0][2]) # snapshot + return result[0][2] + +def drop_replication_slot(conn, slot_name='user_slot'): + cursor = conn.cursor() + cursor.execute(f"select pg_drop_replication_slot('{slot_name}')") + + +def create_postgres_schema(cursor, schema_name): + drop_postgres_schema(cursor, schema_name) + cursor.execute(f'CREATE SCHEMA {schema_name}') + +def drop_postgres_schema(cursor, schema_name): + cursor.execute(f'DROP SCHEMA IF EXISTS {schema_name} CASCADE') + + +def create_postgres_table(cursor, table_name, replica_identity_full=False, template=postgres_table_template): + drop_postgres_table(cursor, table_name) + cursor.execute(template.format(table_name)) + if replica_identity_full: + cursor.execute(f'ALTER TABLE {table_name} REPLICA IDENTITY FULL;') + +def drop_postgres_table(cursor, table_name): + cursor.execute(f"""DROP TABLE IF EXISTS "{table_name}" """) + + +def create_postgres_table_with_schema(cursor, schema_name, table_name): + drop_postgres_table_with_schema(cursor, schema_name, table_name) + cursor.execute(postgres_table_template_4.format(schema_name, table_name)) + +def drop_postgres_table_with_schema(cursor, schema_name, table_name): + cursor.execute(f"""DROP TABLE IF EXISTS "{schema_name}"."{table_name}" """) + + +class PostgresManager: + def __init__(self): + self.created_postgres_db_list = set() + self.created_materialized_postgres_db_list = set() + self.created_ch_postgres_db_list = set() + + def init(self, instance, ip, port): + self.instance = instance + self.ip = ip + self.port = port + self.conn = get_postgres_conn(ip=self.ip, port=self.port) + self.prepare() + + def restart(self): + try: + self.clear() + self.prepare() + except Exception as ex: + self.prepare() + raise ex + + def prepare(self): + conn = get_postgres_conn(ip=self.ip, port=self.port) + cursor = conn.cursor() + self.create_postgres_db(cursor, 'postgres_database') + self.create_clickhouse_postgres_db(ip=self.ip, port=self.port) + + def clear(self): + if self.conn.closed == 0: + self.conn.close() + for db in self.created_materialized_postgres_db_list.copy(): + self.drop_materialized_db(db); + for db in self.created_ch_postgres_db_list.copy(): + self.drop_clickhouse_postgres_db(db) + if len(self.created_postgres_db_list) > 0: + conn = get_postgres_conn(ip=self.ip, port=self.port) + cursor = conn.cursor() + for db in self.created_postgres_db_list.copy(): + self.drop_postgres_db(cursor, db) + + def get_db_cursor(self): + self.conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) + return self.conn.cursor() + + def create_postgres_db(self, cursor, name='postgres_database'): + self.drop_postgres_db(cursor, name) + self.created_postgres_db_list.add(name) + cursor.execute(f"CREATE DATABASE {name}") + + def drop_postgres_db(self, cursor, name='postgres_database'): + cursor.execute(f"DROP DATABASE IF EXISTS {name}") + if name in self.created_postgres_db_list: + self.created_postgres_db_list.remove(name) + + def create_clickhouse_postgres_db(self, ip, port, name='postgres_database', database_name='postgres_database', schema_name=''): + self.drop_clickhouse_postgres_db(name) + self.created_ch_postgres_db_list.add(name) + + if len(schema_name) == 0: + self.instance.query(f''' + CREATE DATABASE {name} + ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword')''') + else: + self.instance.query(f''' + CREATE DATABASE {name} + ENGINE = PostgreSQL('{ip}:{port}', '{database_name}', 'postgres', 'mysecretpassword', '{schema_name}')''') + + def drop_clickhouse_postgres_db(self, name='postgres_database'): + self.instance.query(f'DROP DATABASE IF EXISTS {name}') + if name in self.created_ch_postgres_db_list: + self.created_ch_postgres_db_list.remove(name) + + + def create_materialized_db(self, ip, port, + materialized_database='test_database', postgres_database='postgres_database', + settings=[], table_overrides=''): + self.created_materialized_postgres_db_list.add(materialized_database) + self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database}") + + create_query = f"CREATE DATABASE {materialized_database} ENGINE = MaterializedPostgreSQL('{ip}:{port}', '{postgres_database}', 'postgres', 'mysecretpassword')" + if len(settings) > 0: + create_query += " SETTINGS " + for i in range(len(settings)): + if i != 0: + create_query += ', ' + create_query += settings[i] + create_query += table_overrides + self.instance.query(create_query) + assert materialized_database in self.instance.query('SHOW DATABASES') + + def drop_materialized_db(self, materialized_database='test_database'): + self.instance.query(f'DROP DATABASE IF EXISTS {materialized_database} NO DELAY') + if materialized_database in self.created_materialized_postgres_db_list: + self.created_materialized_postgres_db_list.remove(materialized_database) + assert materialized_database not in self.instance.query('SHOW DATABASES') + + def create_and_fill_postgres_table(self, table_name): + conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) + cursor = conn.cursor() + self.create_and_fill_postgres_table_from_cursor(cursor, table_name) + + def create_and_fill_postgres_table_from_cursor(self, cursor, table_name): + create_postgres_table(cursor, table_name); + self.instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)") + + def create_and_fill_postgres_tables(self, tables_num, numbers=50): + conn = get_postgres_conn(ip=self.ip, port=self.port, database=True) + cursor = conn.cursor() + self.create_and_fill_postgres_tables_from_cursor(cursor, tables_num, numbers=numbers) + + def create_and_fill_postgres_tables_from_cursor(self, cursor, tables_num, numbers=50): + for i in range(tables_num): + table_name = f'postgresql_replica_{i}' + create_postgres_table(cursor, table_name); + if numbers > 0: + self.instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers({numbers})") + + +queries = [ + 'INSERT INTO postgresql_replica_{} select i, i from generate_series(0, 10000) as t(i);', + 'DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;', + 'UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;', + "UPDATE postgresql_replica_{} SET key=key+20000 WHERE key%2=0", + 'INSERT INTO postgresql_replica_{} select i, i from generate_series(40000, 50000) as t(i);', + 'DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;', + 'UPDATE postgresql_replica_{} SET value = value + 101 WHERE key % 2 = 1;', + "UPDATE postgresql_replica_{} SET key=key+80000 WHERE key%2=1", + 'DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;', + 'UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;', + 'INSERT INTO postgresql_replica_{} select i, i from generate_series(200000, 250000) as t(i);', + 'DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;', + 'UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;', + "UPDATE postgresql_replica_{} SET key=key+500000 WHERE key%2=1", + 'INSERT INTO postgresql_replica_{} select i, i from generate_series(1000000, 1050000) as t(i);', + 'DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;', + "UPDATE postgresql_replica_{} SET key=key+10000000", + 'UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;', + 'DELETE FROM postgresql_replica_{} WHERE value%5 = 0;' + ] + + +def assert_nested_table_is_created(instance, table_name, materialized_database='test_database', schema_name=''): + if len(schema_name) == 0: + table = table_name + else: + table = schema_name + "." + table_name + + print(f'Checking table {table} exists in {materialized_database}') + database_tables = instance.query(f'SHOW TABLES FROM {materialized_database}') + + while table not in database_tables: + time.sleep(0.2) + database_tables = instance.query(f'SHOW TABLES FROM {materialized_database}') + + assert(table in database_tables) + + +def assert_number_of_columns(instance, expected, table_name, database_name='test_database'): + result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')") + while (int(result) != expected): + time.sleep(1) + result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')") + print('Number of columns ok') + + +def check_tables_are_synchronized(instance, table_name, order_by='key', postgres_database='postgres_database', materialized_database='test_database', schema_name=''): + assert_nested_table_is_created(instance, table_name, materialized_database, schema_name) + + table_path = '' + if len(schema_name) == 0: + table_path = f'{materialized_database}.{table_name}' + else: + table_path = f'{materialized_database}.`{schema_name}.{table_name}`' + + print(f"Checking table is synchronized: {table_path}") + result_query = f'select * from {table_path} order by {order_by};' + + expected = instance.query(f'select * from {postgres_database}.{table_name} order by {order_by};') + result = instance.query(result_query) + + for _ in range(30): + if result == expected: + break + else: + time.sleep(0.5) + result = instance.query(result_query) + + assert(result == expected) + + +def check_several_tables_are_synchronized(instance, tables_num, order_by='key', postgres_database='postgres_database', materialized_database='test_database', schema_name=''): + for i in range(tables_num): + check_tables_are_synchronized(instance, f'postgresql_replica_{i}'); diff --git a/tests/integration/helpers/test_tools.py b/tests/integration/helpers/test_tools.py index 3577553be34..ec3841f79d7 100644 --- a/tests/integration/helpers/test_tools.py +++ b/tests/integration/helpers/test_tools.py @@ -100,3 +100,19 @@ def exec_query_with_retry(instance, query, retry_count=40, sleep_time=0.5, silen time.sleep(sleep_time) else: raise exception + +def csv_compare(result, expected): + csv_result = TSV(result) + csv_expected = TSV(expected) + mismatch = [] + max_len = len(csv_result) if len(csv_result) > len(csv_expected) else len(csv_expected) + for i in range(max_len): + if i >= len(csv_result): + mismatch.append("-[%d]=%s" % (i, csv_expected.lines[i])) + elif i >= len(csv_expected): + mismatch.append("+[%d]=%s" % (i, csv_result.lines[i])) + elif csv_expected.lines[i] != csv_result.lines[i]: + mismatch.append("-[%d]=%s" % (i, csv_expected.lines[i])) + mismatch.append("+[%d]=%s" % (i, csv_result.lines[i])) + + return "\n".join(mismatch) diff --git a/tests/integration/test_dictionaries_mysql/configs/named_collections.xml b/tests/integration/test_dictionaries_mysql/configs/named_collections.xml index e6e8d0c239f..6e4098c4e4a 100644 --- a/tests/integration/test_dictionaries_mysql/configs/named_collections.xml +++ b/tests/integration/test_dictionaries_mysql/configs/named_collections.xml @@ -21,5 +21,14 @@ test test_table
+ + root + clickhouse + mysql57 + 3306 + test + test_table
+ 0 +
diff --git a/tests/integration/test_dictionaries_mysql/test.py b/tests/integration/test_dictionaries_mysql/test.py index c1819923523..664fde2baa8 100644 --- a/tests/integration/test_dictionaries_mysql/test.py +++ b/tests/integration/test_dictionaries_mysql/test.py @@ -205,6 +205,39 @@ def test_predefined_connection_configuration(started_cluster): result = instance.query("SELECT dictGetUInt32(dict, 'value', toUInt64(100))") assert(int(result) == 200) + instance.query(''' + DROP DICTIONARY IF EXISTS dict; + CREATE DICTIONARY dict (id UInt32, value UInt32) + PRIMARY KEY id + SOURCE(MYSQL(NAME mysql1 connection_pool_size 0)) + LIFETIME(MIN 1 MAX 2) + LAYOUT(HASHED()); + ''') + result = instance.query_and_get_error("SELECT dictGetUInt32(dict, 'value', toUInt64(100))") + assert 'Connection pool cannot have zero size' in result + + instance.query(''' + DROP DICTIONARY IF EXISTS dict; + CREATE DICTIONARY dict (id UInt32, value UInt32) + PRIMARY KEY id + SOURCE(MYSQL(NAME mysql4)) + LIFETIME(MIN 1 MAX 2) + LAYOUT(HASHED()); + ''') + result = instance.query_and_get_error("SELECT dictGetUInt32(dict, 'value', toUInt64(100))") + assert 'Connection pool cannot have zero size' in result + + instance.query(''' + DROP DICTIONARY IF EXISTS dict; + CREATE DICTIONARY dict (id UInt32, value UInt32) + PRIMARY KEY id + SOURCE(MYSQL(NAME mysql4 connection_pool_size 1)) + LIFETIME(MIN 1 MAX 2) + LAYOUT(HASHED()); + ''') + result = instance.query("SELECT dictGetUInt32(dict, 'value', toUInt64(100))") + assert(int(result) == 200) + def create_mysql_db(mysql_connection, name): with mysql_connection.cursor() as cursor: diff --git a/tests/integration/test_graphite_merge_tree/test.py b/tests/integration/test_graphite_merge_tree/test.py index 7628211551d..9e48f12f007 100644 --- a/tests/integration/test_graphite_merge_tree/test.py +++ b/tests/integration/test_graphite_merge_tree/test.py @@ -6,6 +6,7 @@ import pytest from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV +from helpers.test_tools import csv_compare cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', @@ -234,18 +235,19 @@ SELECT * FROM test.graphite; def test_system_graphite_retentions(graphite_table): expected = ''' -graphite_rollup \\\\.count$ sum 0 0 1 0 ['test'] ['graphite'] -graphite_rollup \\\\.max$ max 0 0 2 0 ['test'] ['graphite'] -graphite_rollup ^five_min\\\\. 31536000 14400 3 0 ['test'] ['graphite'] -graphite_rollup ^five_min\\\\. 5184000 3600 3 0 ['test'] ['graphite'] -graphite_rollup ^five_min\\\\. 0 300 3 0 ['test'] ['graphite'] -graphite_rollup ^one_min avg 31536000 600 4 0 ['test'] ['graphite'] -graphite_rollup ^one_min avg 7776000 300 4 0 ['test'] ['graphite'] -graphite_rollup ^one_min avg 0 60 4 0 ['test'] ['graphite'] +graphite_rollup all \\\\.count$ sum 0 0 1 0 ['test'] ['graphite'] +graphite_rollup all \\\\.max$ max 0 0 2 0 ['test'] ['graphite'] +graphite_rollup all ^five_min\\\\. 31536000 14400 3 0 ['test'] ['graphite'] +graphite_rollup all ^five_min\\\\. 5184000 3600 3 0 ['test'] ['graphite'] +graphite_rollup all ^five_min\\\\. 0 300 3 0 ['test'] ['graphite'] +graphite_rollup all ^one_min avg 31536000 600 4 0 ['test'] ['graphite'] +graphite_rollup all ^one_min avg 7776000 300 4 0 ['test'] ['graphite'] +graphite_rollup all ^one_min avg 0 60 4 0 ['test'] ['graphite'] ''' result = q('SELECT * from system.graphite_retentions') - assert TSV(result) == TSV(expected) + mismatch = csv_compare(result, expected) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected}\ndiff\n{mismatch}\n" q(''' DROP TABLE IF EXISTS test.graphite2; diff --git a/tests/integration/test_graphite_merge_tree_typed/__init__.py b/tests/integration/test_graphite_merge_tree_typed/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_graphite_merge_tree_typed/configs/graphite_rollup.xml b/tests/integration/test_graphite_merge_tree_typed/configs/graphite_rollup.xml new file mode 100644 index 00000000000..c716540a61c --- /dev/null +++ b/tests/integration/test_graphite_merge_tree_typed/configs/graphite_rollup.xml @@ -0,0 +1,120 @@ + + + + metric + timestamp + value + updated + + plain + \.count$ + sum + + + plain + \.max$ + max + + + plain + ^five_min\. + + 0 + 300 + + + 5184000 + 3600 + + + 31536000 + 14400 + + + + plain + ^one_min + avg + + 0 + 60 + + + 7776000 + 300 + + + 31536000 + 600 + + + + tagged + + avg + + 0 + 60 + + + 7776000 + 300 + + + 31536000 + 600 + + + + tag_list + retention=five_min + avg + + 0 + 300 + + + 5184000 + 3600 + + + 31536000 + 14400 + + + + tagged + ^for_taggged + avg + + 0 + 60 + + + 7776000 + 300 + + + 31536000 + 600 + + + + all + ^ten_min\. + sum + + 0 + 600 + + + 5184000 + 7200 + + + 31536000 + 28800 + + + + diff --git a/tests/integration/test_graphite_merge_tree_typed/configs/users.xml b/tests/integration/test_graphite_merge_tree_typed/configs/users.xml new file mode 100644 index 00000000000..66d0cd7e445 --- /dev/null +++ b/tests/integration/test_graphite_merge_tree_typed/configs/users.xml @@ -0,0 +1,8 @@ + + + + + 0 + + + diff --git a/tests/integration/test_graphite_merge_tree_typed/test.py b/tests/integration/test_graphite_merge_tree_typed/test.py new file mode 100644 index 00000000000..e26fd0d2e77 --- /dev/null +++ b/tests/integration/test_graphite_merge_tree_typed/test.py @@ -0,0 +1,580 @@ +import datetime +import os.path as p +import time + +import sys +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from helpers.test_tools import csv_compare + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance', + main_configs=['configs/graphite_rollup.xml'], + user_configs=["configs/users.xml"]) +q = instance.query + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + q('CREATE DATABASE test') + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture +def graphite_table(started_cluster): + q(''' +DROP TABLE IF EXISTS test.graphite; +CREATE TABLE test.graphite + (metric String, value Float64, timestamp UInt32, date Date, updated UInt32) + ENGINE = GraphiteMergeTree('graphite_rollup') + PARTITION BY toYYYYMM(date) + ORDER BY (metric, timestamp) + SETTINGS index_granularity=8192; +''') + + yield + + q('DROP TABLE test.graphite') + + +def test_rollup_versions_plain(graphite_table): + timestamp = int(time.time()) + rounded_timestamp = timestamp - timestamp % 60 + date = datetime.date.today().isoformat() + + # Insert rows with timestamps relative to the current time so that the + # first retention clause is active. + # Two parts are created. + q(''' +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('one_min.x1', 100, {timestamp}, '{date}', 1); +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('one_min.x1', 200, {timestamp}, '{date}', 2); +'''.format(timestamp=timestamp, date=date)) + + expected1 = '''\ +one_min.x1 100 {timestamp} {date} 1 +one_min.x1 200 {timestamp} {date} 2 +'''.format(timestamp=timestamp, date=date) + + assert TSV( + q('SELECT * FROM test.graphite ORDER BY updated') + ) == TSV(expected1) + + q('OPTIMIZE TABLE test.graphite') + + # After rollup only the row with max version is retained. + expected2 = '''\ +one_min.x1 200 {timestamp} {date} 2 +'''.format(timestamp=rounded_timestamp, date=date) + + assert TSV(q('SELECT * FROM test.graphite')) == TSV(expected2) + + +def test_rollup_versions_tagged(graphite_table): + timestamp = int(time.time()) + rounded_timestamp = timestamp - timestamp % 60 + date = datetime.date.today().isoformat() + + # Insert rows with timestamps relative to the current time so that the + # first retention clause is active. + # Two parts are created. + q(''' +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('x1?retention=one_min', 100, {timestamp}, '{date}', 1); +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('x1?retention=one_min', 200, {timestamp}, '{date}', 2); +'''.format(timestamp=timestamp, date=date)) + + expected1 = '''\ +x1?retention=one_min 100 {timestamp} {date} 1 +x1?retention=one_min 200 {timestamp} {date} 2 +'''.format(timestamp=timestamp, date=date) + + result = q('SELECT * FROM test.graphite ORDER BY metric, updated') + mismatch = csv_compare(result, expected1) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected1}\ndiff\n{mismatch}\n" + + q('OPTIMIZE TABLE test.graphite') + + # After rollup only the row with max version is retained. + expected2 = '''\ +x1?retention=one_min 200 {timestamp} {date} 2 +'''.format(timestamp=rounded_timestamp, date=date) + + result = q('SELECT * FROM test.graphite ORDER BY metric, updated') + mismatch = csv_compare(result, expected2) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected2}\ndiff\n{mismatch}\n" + + +def test_rollup_versions_all(graphite_table): + timestamp = int(time.time()) + rounded_timestamp = timestamp - timestamp % 600 + date = datetime.date.today().isoformat() + + # Insert rows with timestamps relative to the current time so that the + # first retention clause is active. + # Two parts are created. + q(''' +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('ten_min.x1', 100, {timestamp}, '{date}', 1); +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('ten_min.x1', 200, {timestamp}, '{date}', 2); +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('ten_min.x1?env=staging', 100, {timestamp}, '{date}', 1); +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('ten_min.x1?env=staging', 200, {timestamp}, '{date}', 2); +'''.format(timestamp=timestamp, date=date)) + + expected1 = '''\ +ten_min.x1 100 {timestamp} {date} 1 +ten_min.x1 200 {timestamp} {date} 2 +ten_min.x1?env=staging 100 {timestamp} {date} 1 +ten_min.x1?env=staging 200 {timestamp} {date} 2 +'''.format(timestamp=timestamp, date=date) + + result = q('SELECT * FROM test.graphite ORDER BY metric, updated') + mismatch = csv_compare(result, expected1) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected1}\ndiff\n{mismatch}\n" + + q('OPTIMIZE TABLE test.graphite') + + # After rollup only the row with max version is retained. + expected2 = '''\ +ten_min.x1 200 {timestamp} {date} 2 +ten_min.x1?env=staging 200 {timestamp} {date} 2 +'''.format(timestamp=rounded_timestamp, date=date) + + result = q('SELECT * FROM test.graphite ORDER BY metric, updated') + mismatch = csv_compare(result, expected2) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected2}\ndiff\n{mismatch}\n" + + +def test_rollup_aggregation_plain(graphite_table): + # This query essentially emulates what rollup does. + result1 = q(''' +SELECT avg(v), max(upd) +FROM (SELECT timestamp, + argMax(value, (updated, number)) AS v, + max(updated) AS upd + FROM (SELECT 'one_min.x5' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(intDiv(number, 2)) AS updated, + number + FROM system.numbers LIMIT 1000000) + WHERE intDiv(timestamp, 600) * 600 = 1111444200 + GROUP BY timestamp) +''') + + expected1 = '''\ +999634.9918367347 499999 +''' + assert TSV(result1) == TSV(expected1) + + # Timestamp 1111111111 is in sufficiently distant past + # so that the last retention clause is active. + result2 = q(''' +INSERT INTO test.graphite + SELECT 'one_min.x' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, + toDate('2017-02-02') AS date, toUInt32(intDiv(number, 2)) AS updated + FROM (SELECT * FROM system.numbers LIMIT 1000000) + WHERE intDiv(timestamp, 600) * 600 = 1111444200; + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; +''') + + expected2 = '''\ +one_min.x 999634.9918367347 1111444200 2017-02-02 499999 +''' + + assert TSV(result2) == TSV(expected2) + + +def test_rollup_aggregation_tagged(graphite_table): + # This query essentially emulates what rollup does. + result1 = q(''' +SELECT avg(v), max(upd) +FROM (SELECT timestamp, + argMax(value, (updated, number)) AS v, + max(updated) AS upd + FROM (SELECT 'x?retention=one_min' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(intDiv(number, 2)) AS updated, + number + FROM system.numbers LIMIT 1000000) + WHERE intDiv(timestamp, 600) * 600 = 1111444200 + GROUP BY timestamp) +''') + + expected1 = '''\ +999634.9918367347 499999 +''' + assert TSV(result1) == TSV(expected1) + + # Timestamp 1111111111 is in sufficiently distant past + # so that the last retention clause is active. + result2 = q(''' +INSERT INTO test.graphite + SELECT 'x?retention=one_min' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, + toDate('2017-02-02') AS date, toUInt32(intDiv(number, 2)) AS updated + FROM (SELECT * FROM system.numbers LIMIT 1000000) + WHERE intDiv(timestamp, 600) * 600 = 1111444200; + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; +''') + + expected2 = '''\ +x?retention=one_min 999634.9918367347 1111444200 2017-02-02 499999 +''' + + assert TSV(result2) == TSV(expected2) + + +def test_rollup_aggregation_2_plain(graphite_table): + result = q(''' +INSERT INTO test.graphite + SELECT 'one_min.x' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 - intDiv(number, 3)) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(100 - number) AS updated + FROM (SELECT * FROM system.numbers LIMIT 50); + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; +''') + + expected = '''\ +one_min.x 24 1111110600 2017-02-02 100 +''' + + assert TSV(result) == TSV(expected) + + +def test_rollup_aggregation_2_tagged(graphite_table): + result = q(''' +INSERT INTO test.graphite + SELECT 'x?retention=one_min' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 - intDiv(number, 3)) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(100 - number) AS updated + FROM (SELECT * FROM system.numbers LIMIT 50); + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; +''') + + expected = '''\ +x?retention=one_min 24 1111110600 2017-02-02 100 +''' + + assert TSV(result) == TSV(expected) + + +def test_multiple_paths_and_versions_plain(graphite_table): + result = q(''' +INSERT INTO test.graphite + SELECT 'one_min.x' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + intDiv(number, 3) * 600) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(100 - number) AS updated + FROM (SELECT * FROM system.numbers LIMIT 50); + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; + + +INSERT INTO test.graphite + SELECT 'one_min.y' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + number * 600) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(100 - number) AS updated + FROM (SELECT * FROM system.numbers LIMIT 50); + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; +''') + + with open(p.join(p.dirname(__file__), + 'test_multiple_paths_and_versions.reference.plain') + ) as reference: + assert TSV(result) == TSV(reference) + + +def test_multiple_paths_and_versions_tagged(graphite_table): + result = q(''' +INSERT INTO test.graphite + SELECT 'x?retention=one_min' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + intDiv(number, 3) * 600) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(100 - number) AS updated + FROM (SELECT * FROM system.numbers LIMIT 50); + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; + + +INSERT INTO test.graphite + SELECT 'y?retention=one_min' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + number * 600) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(100 - number) AS updated + FROM (SELECT * FROM system.numbers LIMIT 50); + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; +''') + + with open(p.join(p.dirname(__file__), + 'test_multiple_paths_and_versions.reference.tagged') + ) as reference: + assert TSV(result) == TSV(reference) + + +def test_multiple_output_blocks(graphite_table): + MERGED_BLOCK_SIZE = 8192 + + to_insert = '' + expected = '' + for i in range(2 * MERGED_BLOCK_SIZE + 1): + rolled_up_time = 1000000200 + 600 * i + + for j in range(3): + cur_time = rolled_up_time + 100 * j + to_insert += 'one_min.x1 {} {} 2001-09-09 1\n'.format( + 10 * j, cur_time + ) + to_insert += 'one_min.x1 {} {} 2001-09-09 2\n'.format( + 10 * (j + 1), cur_time + ) + + expected += 'one_min.x1 20 {} 2001-09-09 2\n'.format(rolled_up_time) + + q('INSERT INTO test.graphite FORMAT TSV', to_insert) + + result = q(''' +OPTIMIZE TABLE test.graphite PARTITION 200109 FINAL; + +SELECT * FROM test.graphite; +''') + + assert TSV(result) == TSV(expected) + + +def test_paths_not_matching_any_pattern(graphite_table): + to_insert = '''\ +one_min.x1 100 1000000000 2001-09-09 1 +zzzzzzzz 100 1000000001 2001-09-09 1 +zzzzzzzz 200 1000000001 2001-09-09 2 +''' + + q('INSERT INTO test.graphite FORMAT TSV', to_insert) + + expected = '''\ +one_min.x1 100 999999600 2001-09-09 1 +zzzzzzzz 200 1000000001 2001-09-09 2 +''' + + result = q(''' +OPTIMIZE TABLE test.graphite PARTITION 200109 FINAL; + +SELECT * FROM test.graphite; +''') + + assert TSV(result) == TSV(expected) + + +def test_rules_isolation(graphite_table): + to_insert = '''\ +one_min.x1 100 1000000000 2001-09-09 1 +for_taggged 100 1000000001 2001-09-09 1 +for_taggged 200 1000000001 2001-09-09 2 +one_min?env=staging 100 1000000001 2001-09-09 1 +one_min?env=staging 200 1000000001 2001-09-09 2 +''' + + q('INSERT INTO test.graphite FORMAT TSV', to_insert) + + expected = '''\ +for_taggged 200 1000000001 2001-09-09 2 +one_min.x1 100 999999600 2001-09-09 1 +one_min?env=staging 200 1000000001 2001-09-09 2 +''' + + result = q(''' +OPTIMIZE TABLE test.graphite PARTITION 200109 FINAL; + +SELECT * FROM test.graphite; +''') + + result = q('SELECT * FROM test.graphite ORDER BY metric, updated') + mismatch = csv_compare(result, expected) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected}\ndiff\n{mismatch}\n" + + +def test_system_graphite_retentions(graphite_table): + expected = ''' +graphite_rollup plain \\\\.count$ sum 0 0 1 0 ['test'] ['graphite'] +graphite_rollup plain \\\\.max$ max 0 0 2 0 ['test'] ['graphite'] +graphite_rollup plain ^five_min\\\\. 31536000 14400 3 0 ['test'] ['graphite'] +graphite_rollup plain ^five_min\\\\. 5184000 3600 3 0 ['test'] ['graphite'] +graphite_rollup plain ^five_min\\\\. 0 300 3 0 ['test'] ['graphite'] +graphite_rollup plain ^one_min avg 31536000 600 4 0 ['test'] ['graphite'] +graphite_rollup plain ^one_min avg 7776000 300 4 0 ['test'] ['graphite'] +graphite_rollup plain ^one_min avg 0 60 4 0 ['test'] ['graphite'] +graphite_rollup tagged [\\\\?&]retention=one_min(&.*)?$ avg 31536000 600 5 0 ['test'] ['graphite'] +graphite_rollup tagged [\\\\?&]retention=one_min(&.*)?$ avg 7776000 300 5 0 ['test'] ['graphite'] +graphite_rollup tagged [\\\\?&]retention=one_min(&.*)?$ avg 0 60 5 0 ['test'] ['graphite'] +graphite_rollup tagged [\\\\?&]retention=five_min(&.*)?$ avg 31536000 14400 6 0 ['test'] ['graphite'] +graphite_rollup tagged [\\\\?&]retention=five_min(&.*)?$ avg 5184000 3600 6 0 ['test'] ['graphite'] +graphite_rollup tagged [\\\\?&]retention=five_min(&.*)?$ avg 0 300 6 0 ['test'] ['graphite'] +graphite_rollup tagged ^for_taggged avg 31536000 600 7 0 ['test'] ['graphite'] +graphite_rollup tagged ^for_taggged avg 7776000 300 7 0 ['test'] ['graphite'] +graphite_rollup tagged ^for_taggged avg 0 60 7 0 ['test'] ['graphite'] +graphite_rollup all ^ten_min\\\\. sum 31536000 28800 8 0 ['test'] ['graphite'] +graphite_rollup all ^ten_min\\\\. sum 5184000 7200 8 0 ['test'] ['graphite'] +graphite_rollup all ^ten_min\\\\. sum 0 600 8 0 ['test'] ['graphite'] + ''' + result = q('SELECT * from system.graphite_retentions') + + mismatch = csv_compare(result, expected) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected}\ndiff\n{mismatch}\n" + + q(''' +DROP TABLE IF EXISTS test.graphite2; +CREATE TABLE test.graphite2 + (metric String, value Float64, timestamp UInt32, date Date, updated UInt32) + ENGINE = GraphiteMergeTree('graphite_rollup') + PARTITION BY toYYYYMM(date) + ORDER BY (metric, timestamp) + SETTINGS index_granularity=8192; + ''') + expected = ''' +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] + ''' + result = q(''' + SELECT + config_name, + Tables.database, + Tables.table + FROM system.graphite_retentions + ''') + assert csv_compare(result, expected), f"got\n{result}\nwant\n{expected}" + + +def test_path_dangling_pointer(graphite_table): + q(''' +DROP TABLE IF EXISTS test.graphite2; +CREATE TABLE test.graphite2 + (metric String, value Float64, timestamp UInt32, date Date, updated UInt32) + ENGINE = GraphiteMergeTree('graphite_rollup') + PARTITION BY toYYYYMM(date) + ORDER BY (metric, timestamp) + SETTINGS index_granularity=1; + ''') + + path = 'abcd' * 4000000 # 16MB + q('INSERT INTO test.graphite2 FORMAT TSV', + "{}\t0.0\t0\t2018-01-01\t100\n".format(path)) + q('INSERT INTO test.graphite2 FORMAT TSV', + "{}\t0.0\t0\t2018-01-01\t101\n".format(path)) + for version in range(10): + q('INSERT INTO test.graphite2 FORMAT TSV', + "{}\t0.0\t0\t2018-01-01\t{}\n".format(path, version)) + + while True: + q('OPTIMIZE TABLE test.graphite2 PARTITION 201801 FINAL') + parts = int(q("SELECT count() FROM system.parts " + "WHERE active AND database='test' " + "AND table='graphite2'")) + if parts == 1: + break + print(('Parts', parts)) + + assert TSV( + q("SELECT value, timestamp, date, updated FROM test.graphite2") + ) == TSV("0\t0\t2018-01-01\t101\n") + + q('DROP TABLE test.graphite2') + + +def test_combined_rules(graphite_table): + # 1487970000 ~ Sat 25 Feb 00:00:00 MSK 2017 + to_insert = 'INSERT INTO test.graphite VALUES ' + expected_unmerged = '' + for i in range(384): + to_insert += "('five_min.count', {v}, {t}, toDate({t}), 1), ".format( + v=1, t=1487970000 + (i * 300) + ) + to_insert += "('five_min.max', {v}, {t}, toDate({t}), 1), ".format( + v=i, t=1487970000 + (i * 300) + ) + expected_unmerged += ("five_min.count\t{v1}\t{t}\n" + "five_min.max\t{v2}\t{t}\n").format( + v1=1, v2=i, + t=1487970000 + (i * 300) + ) + + q(to_insert) + assert TSV(q('SELECT metric, value, timestamp FROM test.graphite' + ' ORDER BY (timestamp, metric)')) == TSV(expected_unmerged) + + q('OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL') + expected_merged = ''' + five_min.count 48 1487970000 2017-02-25 1 + five_min.count 48 1487984400 2017-02-25 1 + five_min.count 48 1487998800 2017-02-25 1 + five_min.count 48 1488013200 2017-02-25 1 + five_min.count 48 1488027600 2017-02-25 1 + five_min.count 48 1488042000 2017-02-25 1 + five_min.count 48 1488056400 2017-02-26 1 + five_min.count 48 1488070800 2017-02-26 1 + five_min.max 47 1487970000 2017-02-25 1 + five_min.max 95 1487984400 2017-02-25 1 + five_min.max 143 1487998800 2017-02-25 1 + five_min.max 191 1488013200 2017-02-25 1 + five_min.max 239 1488027600 2017-02-25 1 + five_min.max 287 1488042000 2017-02-25 1 + five_min.max 335 1488056400 2017-02-26 1 + five_min.max 383 1488070800 2017-02-26 1 + ''' + assert TSV(q('SELECT * FROM test.graphite' + ' ORDER BY (metric, timestamp)')) == TSV(expected_merged) diff --git a/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.plain b/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.plain new file mode 100644 index 00000000000..0f10d11ed05 --- /dev/null +++ b/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.plain @@ -0,0 +1,84 @@ +one_min.x 0 1111110600 2017-02-02 100 +one_min.x 3 1111111200 2017-02-02 97 +one_min.x 6 1111111800 2017-02-02 94 +one_min.x 9 1111112400 2017-02-02 91 +one_min.x 12 1111113000 2017-02-02 88 +one_min.x 15 1111113600 2017-02-02 85 +one_min.x 18 1111114200 2017-02-02 82 +one_min.x 21 1111114800 2017-02-02 79 +one_min.x 24 1111115400 2017-02-02 76 +one_min.x 27 1111116000 2017-02-02 73 +one_min.x 30 1111116600 2017-02-02 70 +one_min.x 33 1111117200 2017-02-02 67 +one_min.x 36 1111117800 2017-02-02 64 +one_min.x 39 1111118400 2017-02-02 61 +one_min.x 42 1111119000 2017-02-02 58 +one_min.x 45 1111119600 2017-02-02 55 +one_min.x 48 1111120200 2017-02-02 52 +one_min.x 0 1111110600 2017-02-02 100 +one_min.x 3 1111111200 2017-02-02 97 +one_min.x 6 1111111800 2017-02-02 94 +one_min.x 9 1111112400 2017-02-02 91 +one_min.x 12 1111113000 2017-02-02 88 +one_min.x 15 1111113600 2017-02-02 85 +one_min.x 18 1111114200 2017-02-02 82 +one_min.x 21 1111114800 2017-02-02 79 +one_min.x 24 1111115400 2017-02-02 76 +one_min.x 27 1111116000 2017-02-02 73 +one_min.x 30 1111116600 2017-02-02 70 +one_min.x 33 1111117200 2017-02-02 67 +one_min.x 36 1111117800 2017-02-02 64 +one_min.x 39 1111118400 2017-02-02 61 +one_min.x 42 1111119000 2017-02-02 58 +one_min.x 45 1111119600 2017-02-02 55 +one_min.x 48 1111120200 2017-02-02 52 +one_min.y 0 1111110600 2017-02-02 100 +one_min.y 1 1111111200 2017-02-02 99 +one_min.y 2 1111111800 2017-02-02 98 +one_min.y 3 1111112400 2017-02-02 97 +one_min.y 4 1111113000 2017-02-02 96 +one_min.y 5 1111113600 2017-02-02 95 +one_min.y 6 1111114200 2017-02-02 94 +one_min.y 7 1111114800 2017-02-02 93 +one_min.y 8 1111115400 2017-02-02 92 +one_min.y 9 1111116000 2017-02-02 91 +one_min.y 10 1111116600 2017-02-02 90 +one_min.y 11 1111117200 2017-02-02 89 +one_min.y 12 1111117800 2017-02-02 88 +one_min.y 13 1111118400 2017-02-02 87 +one_min.y 14 1111119000 2017-02-02 86 +one_min.y 15 1111119600 2017-02-02 85 +one_min.y 16 1111120200 2017-02-02 84 +one_min.y 17 1111120800 2017-02-02 83 +one_min.y 18 1111121400 2017-02-02 82 +one_min.y 19 1111122000 2017-02-02 81 +one_min.y 20 1111122600 2017-02-02 80 +one_min.y 21 1111123200 2017-02-02 79 +one_min.y 22 1111123800 2017-02-02 78 +one_min.y 23 1111124400 2017-02-02 77 +one_min.y 24 1111125000 2017-02-02 76 +one_min.y 25 1111125600 2017-02-02 75 +one_min.y 26 1111126200 2017-02-02 74 +one_min.y 27 1111126800 2017-02-02 73 +one_min.y 28 1111127400 2017-02-02 72 +one_min.y 29 1111128000 2017-02-02 71 +one_min.y 30 1111128600 2017-02-02 70 +one_min.y 31 1111129200 2017-02-02 69 +one_min.y 32 1111129800 2017-02-02 68 +one_min.y 33 1111130400 2017-02-02 67 +one_min.y 34 1111131000 2017-02-02 66 +one_min.y 35 1111131600 2017-02-02 65 +one_min.y 36 1111132200 2017-02-02 64 +one_min.y 37 1111132800 2017-02-02 63 +one_min.y 38 1111133400 2017-02-02 62 +one_min.y 39 1111134000 2017-02-02 61 +one_min.y 40 1111134600 2017-02-02 60 +one_min.y 41 1111135200 2017-02-02 59 +one_min.y 42 1111135800 2017-02-02 58 +one_min.y 43 1111136400 2017-02-02 57 +one_min.y 44 1111137000 2017-02-02 56 +one_min.y 45 1111137600 2017-02-02 55 +one_min.y 46 1111138200 2017-02-02 54 +one_min.y 47 1111138800 2017-02-02 53 +one_min.y 48 1111139400 2017-02-02 52 +one_min.y 49 1111140000 2017-02-02 51 diff --git a/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.tagged b/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.tagged new file mode 100644 index 00000000000..e2c63ab3b22 --- /dev/null +++ b/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.tagged @@ -0,0 +1,84 @@ +x?retention=one_min 0 1111110600 2017-02-02 100 +x?retention=one_min 3 1111111200 2017-02-02 97 +x?retention=one_min 6 1111111800 2017-02-02 94 +x?retention=one_min 9 1111112400 2017-02-02 91 +x?retention=one_min 12 1111113000 2017-02-02 88 +x?retention=one_min 15 1111113600 2017-02-02 85 +x?retention=one_min 18 1111114200 2017-02-02 82 +x?retention=one_min 21 1111114800 2017-02-02 79 +x?retention=one_min 24 1111115400 2017-02-02 76 +x?retention=one_min 27 1111116000 2017-02-02 73 +x?retention=one_min 30 1111116600 2017-02-02 70 +x?retention=one_min 33 1111117200 2017-02-02 67 +x?retention=one_min 36 1111117800 2017-02-02 64 +x?retention=one_min 39 1111118400 2017-02-02 61 +x?retention=one_min 42 1111119000 2017-02-02 58 +x?retention=one_min 45 1111119600 2017-02-02 55 +x?retention=one_min 48 1111120200 2017-02-02 52 +x?retention=one_min 0 1111110600 2017-02-02 100 +x?retention=one_min 3 1111111200 2017-02-02 97 +x?retention=one_min 6 1111111800 2017-02-02 94 +x?retention=one_min 9 1111112400 2017-02-02 91 +x?retention=one_min 12 1111113000 2017-02-02 88 +x?retention=one_min 15 1111113600 2017-02-02 85 +x?retention=one_min 18 1111114200 2017-02-02 82 +x?retention=one_min 21 1111114800 2017-02-02 79 +x?retention=one_min 24 1111115400 2017-02-02 76 +x?retention=one_min 27 1111116000 2017-02-02 73 +x?retention=one_min 30 1111116600 2017-02-02 70 +x?retention=one_min 33 1111117200 2017-02-02 67 +x?retention=one_min 36 1111117800 2017-02-02 64 +x?retention=one_min 39 1111118400 2017-02-02 61 +x?retention=one_min 42 1111119000 2017-02-02 58 +x?retention=one_min 45 1111119600 2017-02-02 55 +x?retention=one_min 48 1111120200 2017-02-02 52 +y?retention=one_min 0 1111110600 2017-02-02 100 +y?retention=one_min 1 1111111200 2017-02-02 99 +y?retention=one_min 2 1111111800 2017-02-02 98 +y?retention=one_min 3 1111112400 2017-02-02 97 +y?retention=one_min 4 1111113000 2017-02-02 96 +y?retention=one_min 5 1111113600 2017-02-02 95 +y?retention=one_min 6 1111114200 2017-02-02 94 +y?retention=one_min 7 1111114800 2017-02-02 93 +y?retention=one_min 8 1111115400 2017-02-02 92 +y?retention=one_min 9 1111116000 2017-02-02 91 +y?retention=one_min 10 1111116600 2017-02-02 90 +y?retention=one_min 11 1111117200 2017-02-02 89 +y?retention=one_min 12 1111117800 2017-02-02 88 +y?retention=one_min 13 1111118400 2017-02-02 87 +y?retention=one_min 14 1111119000 2017-02-02 86 +y?retention=one_min 15 1111119600 2017-02-02 85 +y?retention=one_min 16 1111120200 2017-02-02 84 +y?retention=one_min 17 1111120800 2017-02-02 83 +y?retention=one_min 18 1111121400 2017-02-02 82 +y?retention=one_min 19 1111122000 2017-02-02 81 +y?retention=one_min 20 1111122600 2017-02-02 80 +y?retention=one_min 21 1111123200 2017-02-02 79 +y?retention=one_min 22 1111123800 2017-02-02 78 +y?retention=one_min 23 1111124400 2017-02-02 77 +y?retention=one_min 24 1111125000 2017-02-02 76 +y?retention=one_min 25 1111125600 2017-02-02 75 +y?retention=one_min 26 1111126200 2017-02-02 74 +y?retention=one_min 27 1111126800 2017-02-02 73 +y?retention=one_min 28 1111127400 2017-02-02 72 +y?retention=one_min 29 1111128000 2017-02-02 71 +y?retention=one_min 30 1111128600 2017-02-02 70 +y?retention=one_min 31 1111129200 2017-02-02 69 +y?retention=one_min 32 1111129800 2017-02-02 68 +y?retention=one_min 33 1111130400 2017-02-02 67 +y?retention=one_min 34 1111131000 2017-02-02 66 +y?retention=one_min 35 1111131600 2017-02-02 65 +y?retention=one_min 36 1111132200 2017-02-02 64 +y?retention=one_min 37 1111132800 2017-02-02 63 +y?retention=one_min 38 1111133400 2017-02-02 62 +y?retention=one_min 39 1111134000 2017-02-02 61 +y?retention=one_min 40 1111134600 2017-02-02 60 +y?retention=one_min 41 1111135200 2017-02-02 59 +y?retention=one_min 42 1111135800 2017-02-02 58 +y?retention=one_min 43 1111136400 2017-02-02 57 +y?retention=one_min 44 1111137000 2017-02-02 56 +y?retention=one_min 45 1111137600 2017-02-02 55 +y?retention=one_min 46 1111138200 2017-02-02 54 +y?retention=one_min 47 1111138800 2017-02-02 53 +y?retention=one_min 48 1111139400 2017-02-02 52 +y?retention=one_min 49 1111140000 2017-02-02 51 diff --git a/tests/integration/test_postgresql_replica_database_engine_1/test.py b/tests/integration/test_postgresql_replica_database_engine_1/test.py index cba9e93c056..8b5d7f5f7b2 100644 --- a/tests/integration/test_postgresql_replica_database_engine_1/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_1/test.py @@ -1,245 +1,67 @@ import pytest import time -import psycopg2 import os.path as p import random from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry -from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT from helpers.test_tools import TSV from random import randrange import threading +from helpers.postgres_utility import get_postgres_conn +from helpers.postgres_utility import PostgresManager + +from helpers.postgres_utility import create_replication_slot, drop_replication_slot +from helpers.postgres_utility import create_postgres_schema, drop_postgres_schema +from helpers.postgres_utility import create_postgres_table, drop_postgres_table +from helpers.postgres_utility import check_tables_are_synchronized +from helpers.postgres_utility import check_several_tables_are_synchronized +from helpers.postgres_utility import assert_nested_table_is_created +from helpers.postgres_utility import assert_number_of_columns +from helpers.postgres_utility import postgres_table_template, postgres_table_template_2, postgres_table_template_3, postgres_table_template_4 +from helpers.postgres_utility import queries + + cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', main_configs = ['configs/log_conf.xml'], user_configs = ['configs/users.xml'], with_postgres=True, stay_alive=True) -postgres_table_template = """ - CREATE TABLE IF NOT EXISTS "{}" ( - key Integer NOT NULL, value Integer, PRIMARY KEY(key)) - """ -postgres_table_template_2 = """ - CREATE TABLE IF NOT EXISTS "{}" ( - key Integer NOT NULL, value1 Integer, value2 Integer, value3 Integer, PRIMARY KEY(key)) - """ -postgres_table_template_3 = """ - CREATE TABLE IF NOT EXISTS "{}" ( - key1 Integer NOT NULL, value1 Integer, key2 Integer NOT NULL, value2 Integer NOT NULL) - """ -postgres_table_template_4 = """ - CREATE TABLE IF NOT EXISTS "{}"."{}" ( - key Integer NOT NULL, value Integer, PRIMARY KEY(key)) - """ - -def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name='postgres_database', replication=False): - if database == True: - conn_string = "host={} port={} dbname='{}' user='postgres' password='mysecretpassword'".format(ip, port, database_name) - else: - conn_string = "host={} port={} user='postgres' password='mysecretpassword'".format(ip, port) - - if replication: - conn_string += " replication='database'" - - conn = psycopg2.connect(conn_string) - if auto_commit: - conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) - conn.autocommit = True - return conn - -def create_replication_slot(conn, slot_name='user_slot'): - cursor = conn.cursor() - cursor.execute('CREATE_REPLICATION_SLOT {} LOGICAL pgoutput EXPORT_SNAPSHOT'.format(slot_name)) - result = cursor.fetchall() - print(result[0][0]) # slot name - print(result[0][1]) # start lsn - print(result[0][2]) # snapshot - return result[0][2] - -def drop_replication_slot(conn, slot_name='user_slot'): - cursor = conn.cursor() - cursor.execute("select pg_drop_replication_slot('{}')".format(slot_name)) - -def create_postgres_db(cursor, name='postgres_database'): - cursor.execute("CREATE DATABASE {}".format(name)) - -def drop_postgres_db(cursor, name='postgres_database'): - cursor.execute("DROP DATABASE IF EXISTS {}".format(name)) - -def drop_postgres_schema(cursor, schema_name): - cursor.execute('DROP SCHEMA IF EXISTS {} CASCADE'.format(schema_name)) - -def create_postgres_schema(cursor, schema_name): - drop_postgres_schema(cursor, schema_name) - cursor.execute('CREATE SCHEMA {}'.format(schema_name)) - -def create_clickhouse_postgres_db(ip, port, name='postgres_database', database_name='postgres_database', schema_name=''): - drop_clickhouse_postgres_db(name) - if len(schema_name) == 0: - instance.query(''' - CREATE DATABASE {} - ENGINE = PostgreSQL('{}:{}', '{}', 'postgres', 'mysecretpassword')'''.format(name, ip, port, database_name)) - else: - instance.query(''' - CREATE DATABASE {} - ENGINE = PostgreSQL('{}:{}', '{}', 'postgres', 'mysecretpassword', '{}')'''.format(name, ip, port, database_name, schema_name)) - -def drop_clickhouse_postgres_db(name='postgres_database'): - instance.query('DROP DATABASE IF EXISTS {}'.format(name)) - -def create_materialized_db(ip, port, - materialized_database='test_database', - postgres_database='postgres_database', - settings=[]): - instance.query(f"DROP DATABASE IF EXISTS {materialized_database}") - create_query = f"CREATE DATABASE {materialized_database} ENGINE = MaterializedPostgreSQL('{ip}:{port}', '{postgres_database}', 'postgres', 'mysecretpassword')" - if len(settings) > 0: - create_query += " SETTINGS " - for i in range(len(settings)): - if i != 0: - create_query += ', ' - create_query += settings[i] - instance.query(create_query) - assert materialized_database in instance.query('SHOW DATABASES') - -def drop_materialized_db(materialized_database='test_database'): - instance.query('DROP DATABASE IF EXISTS {}'.format(materialized_database)) - assert materialized_database not in instance.query('SHOW DATABASES') - -def drop_postgres_table(cursor, table_name): - cursor.execute("""DROP TABLE IF EXISTS "{}" """.format(table_name)) - -def drop_postgres_table_with_schema(cursor, schema_name, table_name): - cursor.execute("""DROP TABLE IF EXISTS "{}"."{}" """.format(schema_name, table_name)) - -def create_postgres_table(cursor, table_name, replica_identity_full=False, template=postgres_table_template): - drop_postgres_table(cursor, table_name) - cursor.execute(template.format(table_name)) - if replica_identity_full: - cursor.execute('ALTER TABLE {} REPLICA IDENTITY FULL;'.format(table_name)) - -def create_postgres_table_with_schema(cursor, schema_name, table_name): - drop_postgres_table_with_schema(cursor, schema_name, table_name) - cursor.execute(postgres_table_template_4.format(schema_name, table_name)) - -queries = [ - 'INSERT INTO postgresql_replica_{} select i, i from generate_series(0, 10000) as t(i);', - 'DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;', - 'UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;', - "UPDATE postgresql_replica_{} SET key=key+20000 WHERE key%2=0", - 'INSERT INTO postgresql_replica_{} select i, i from generate_series(40000, 50000) as t(i);', - 'DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;', - 'UPDATE postgresql_replica_{} SET value = value + 101 WHERE key % 2 = 1;', - "UPDATE postgresql_replica_{} SET key=key+80000 WHERE key%2=1", - 'DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;', - 'UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;', - 'INSERT INTO postgresql_replica_{} select i, i from generate_series(200000, 250000) as t(i);', - 'DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;', - 'UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;', - "UPDATE postgresql_replica_{} SET key=key+500000 WHERE key%2=1", - 'INSERT INTO postgresql_replica_{} select i, i from generate_series(1000000, 1050000) as t(i);', - 'DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;', - "UPDATE postgresql_replica_{} SET key=key+10000000", - 'UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;', - 'DELETE FROM postgresql_replica_{} WHERE value%5 = 0;' - ] - - -def assert_nested_table_is_created(table_name, materialized_database='test_database', schema_name=''): - if len(schema_name) == 0: - table = table_name - else: - table = schema_name + "." + table_name - print(f'Checking table {table} exists in {materialized_database}') - database_tables = instance.query('SHOW TABLES FROM {}'.format(materialized_database)) - while table not in database_tables: - time.sleep(0.2) - database_tables = instance.query('SHOW TABLES FROM {}'.format(materialized_database)) - assert(table in database_tables) - - -def assert_number_of_columns(expected, table_name, database_name='test_database'): - result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')") - while (int(result) != expected): - time.sleep(1) - result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')") - print('Number of columns ok') - - -@pytest.mark.timeout(320) -def check_tables_are_synchronized(table_name, order_by='key', postgres_database='postgres_database', materialized_database='test_database', schema_name=''): - assert_nested_table_is_created(table_name, materialized_database, schema_name) - - print("Checking table is synchronized:", table_name) - expected = instance.query('select * from {}.{} order by {};'.format(postgres_database, table_name, order_by)) - if len(schema_name) == 0: - result = instance.query('select * from {}.{} order by {};'.format(materialized_database, table_name, order_by)) - else: - result = instance.query('select * from {}.`{}.{}` order by {};'.format(materialized_database, schema_name, table_name, order_by)) - - while result != expected: - time.sleep(0.5) - if len(schema_name) == 0: - result = instance.query('select * from {}.{} order by {};'.format(materialized_database, table_name, order_by)) - else: - result = instance.query('select * from {}.`{}.{}` order by {};'.format(materialized_database, schema_name, table_name, order_by)) - - assert(result == expected) +pg_manager = PostgresManager() @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() - conn = get_postgres_conn(ip=cluster.postgres_ip, port=cluster.postgres_port) - cursor = conn.cursor() - create_postgres_db(cursor, 'postgres_database') - create_clickhouse_postgres_db(ip=cluster.postgres_ip, port=cluster.postgres_port) - - instance.query("DROP DATABASE IF EXISTS test_database") + pg_manager.init(instance, cluster.postgres_ip, cluster.postgres_port) yield cluster finally: cluster.shutdown() +@pytest.fixture(autouse=True) +def setup_teardown(): + print("PostgreSQL is available - running test") + yield # run test + pg_manager.restart() + + def test_load_and_sync_all_database_tables(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) - cursor = conn.cursor() NUM_TABLES = 5 - - for i in range(NUM_TABLES): - table_name = 'postgresql_replica_{}'.format(i) - create_postgres_table(cursor, table_name); - instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(50)".format(table_name)) - - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port) - assert 'test_database' in instance.query('SHOW DATABASES') - - for i in range(NUM_TABLES): - table_name = 'postgresql_replica_{}'.format(i) - check_tables_are_synchronized(table_name); - cursor.execute('drop table {};'.format(table_name)) - - result = instance.query('''SELECT count() FROM system.tables WHERE database = 'test_database';''') + pg_manager.create_and_fill_postgres_tables(NUM_TABLES) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) + check_several_tables_are_synchronized(instance, NUM_TABLES) + result = instance.query("SELECT count() FROM system.tables WHERE database = 'test_database';") assert(int(result) == NUM_TABLES) - drop_materialized_db() - for i in range(NUM_TABLES): - cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) - def test_replicating_dml(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) cursor = conn.cursor() NUM_TABLES = 5 @@ -248,41 +70,26 @@ def test_replicating_dml(started_cluster): create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {} from numbers(50)".format(i, i)) - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) for i in range(NUM_TABLES): instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT 50 + number, {} from numbers(1000)".format(i, i)) - - for i in range(NUM_TABLES): - table_name = 'postgresql_replica_{}'.format(i) - check_tables_are_synchronized(table_name); + check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): cursor.execute('UPDATE postgresql_replica_{} SET value = {} * {} WHERE key < 50;'.format(i, i, i)) cursor.execute('UPDATE postgresql_replica_{} SET value = {} * {} * {} WHERE key >= 50;'.format(i, i, i, i)) - - for i in range(NUM_TABLES): - check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): cursor.execute('DELETE FROM postgresql_replica_{} WHERE (value*value + {}) % 2 = 0;'.format(i, i)) cursor.execute('UPDATE postgresql_replica_{} SET value = value - (value % 7) WHERE key > 128 AND key < 512;'.format(i)) cursor.execute('DELETE FROM postgresql_replica_{} WHERE key % 7 = 1;'.format(i, i)) - - for i in range(NUM_TABLES): - check_tables_are_synchronized('postgresql_replica_{}'.format(i)); - - for i in range(NUM_TABLES): - cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) - - drop_materialized_db() + check_several_tables_are_synchronized(instance, NUM_TABLES) def test_different_data_types(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) cursor = conn.cursor() cursor.execute('drop table if exists test_data_types;') @@ -309,15 +116,14 @@ def test_different_data_types(started_cluster): k Char(2)[] -- Nullable(String) )''') - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) for i in range(10): instance.query(''' INSERT INTO postgres_database.test_data_types VALUES ({}, -32768, -2147483648, -9223372036854775808, 1.12345, 1.1234567890, 2147483647, 9223372036854775807, '2000-05-12 12:12:12.012345', '2000-05-12', 0.2, 0.2)'''.format(i)) - check_tables_are_synchronized('test_data_types', 'id'); + check_tables_are_synchronized(instance, 'test_data_types', 'id'); result = instance.query('SELECT * FROM test_database.test_data_types ORDER BY id LIMIT 1;') assert(result == '0\t-32768\t-2147483648\t-9223372036854775808\t1.12345\t1.123456789\t2147483647\t9223372036854775807\t2000-05-12 12:12:12.012345\t2000-05-12\t0.2\t0.2\n') @@ -326,7 +132,7 @@ def test_different_data_types(started_cluster): cursor.execute('UPDATE test_data_types SET {} = {};'.format(col, i)) cursor.execute('''UPDATE test_data_types SET i = '2020-12-12';'''.format(col, i)) - check_tables_are_synchronized('test_data_types', 'id'); + check_tables_are_synchronized(instance, 'test_data_types', 'id'); instance.query("INSERT INTO postgres_database.test_array_data_type " "VALUES (" @@ -357,44 +163,35 @@ def test_different_data_types(started_cluster): "[]\n" ) - check_tables_are_synchronized('test_array_data_type'); + check_tables_are_synchronized(instance, 'test_array_data_type'); result = instance.query('SELECT * FROM test_database.test_array_data_type ORDER BY key;') assert(result == expected) - drop_materialized_db() + pg_manager.drop_materialized_db() cursor.execute('drop table if exists test_data_types;') cursor.execute('drop table if exists test_array_data_type;') def test_load_and_sync_subset_of_database_tables(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) - cursor = conn.cursor() NUM_TABLES = 10 + pg_manager.create_and_fill_postgres_tables(NUM_TABLES) publication_tables = '' for i in range(NUM_TABLES): - table_name = 'postgresql_replica_{}'.format(i) - create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); - instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT number, number from numbers(50)".format(i)) - if i < int(NUM_TABLES/2): if publication_tables != '': publication_tables += ', ' - publication_tables += table_name + publication_tables += f'postgresql_replica_{i}' - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - settings=["materialized_postgresql_tables_list = '{}'".format(publication_tables)]) - assert 'test_database' in instance.query('SHOW DATABASES') + pg_manager.create_materialized_db( + ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + settings=["materialized_postgresql_tables_list = '{}'".format(publication_tables)]) time.sleep(1) for i in range(int(NUM_TABLES/2)): - table_name = 'postgresql_replica_{}'.format(i) - assert_nested_table_is_created(table_name) + table_name = f'postgresql_replica_{i}' + assert_nested_table_is_created(instance, table_name) result = instance.query('''SELECT count() FROM system.tables WHERE database = 'test_database';''') assert(int(result) == int(NUM_TABLES/2)) @@ -409,69 +206,40 @@ def test_load_and_sync_subset_of_database_tables(started_cluster): instance.query("INSERT INTO postgres_database.{} SELECT 50 + number, {} from numbers(100)".format(table_name, i)) for i in range(NUM_TABLES): - table_name = 'postgresql_replica_{}'.format(i) + table_name = f'postgresql_replica_{i}' if i < int(NUM_TABLES/2): - check_tables_are_synchronized(table_name); - - drop_materialized_db() - for i in range(NUM_TABLES): - cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + check_tables_are_synchronized(instance, table_name); def test_changing_replica_identity_value(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) cursor = conn.cursor() create_postgres_table(cursor, 'postgresql_replica'); instance.query("INSERT INTO postgres_database.postgresql_replica SELECT 50 + number, number from numbers(50)") - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) instance.query("INSERT INTO postgres_database.postgresql_replica SELECT 100 + number, number from numbers(50)") - check_tables_are_synchronized('postgresql_replica'); + check_tables_are_synchronized(instance, 'postgresql_replica'); cursor.execute("UPDATE postgresql_replica SET key=key-25 WHERE key<100 ") - check_tables_are_synchronized('postgresql_replica'); - - drop_materialized_db() - cursor.execute('drop table if exists postgresql_replica;') + check_tables_are_synchronized(instance, 'postgresql_replica'); def test_clickhouse_restart(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) - cursor = conn.cursor() NUM_TABLES = 5 - - for i in range(NUM_TABLES): - create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); - instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {} from numbers(50)".format(i, i)) - - instance.query("CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres', 'mysecretpassword')") - - for i in range(NUM_TABLES): - table_name = 'postgresql_replica_{}'.format(i) - check_tables_are_synchronized(table_name); + pg_manager.create_and_fill_postgres_tables(NUM_TABLES) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) + check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT 50 + number, {} from numbers(50000)".format(i, i)) instance.restart_clickhouse() - - for i in range(NUM_TABLES): - check_tables_are_synchronized('postgresql_replica_{}'.format(i)); - - drop_materialized_db() - for i in range(NUM_TABLES): - cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + check_several_tables_are_synchronized(instance, NUM_TABLES) def test_replica_identity_index(started_cluster): - drop_materialized_db() conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) cursor = conn.cursor() @@ -480,27 +248,22 @@ def test_replica_identity_index(started_cluster): cursor.execute("ALTER TABLE postgresql_replica REPLICA IDENTITY USING INDEX idx") instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number, number, number from numbers(50, 10)") - create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) instance.query("INSERT INTO postgres_database.postgresql_replica SELECT number, number, number, number from numbers(100, 10)") - check_tables_are_synchronized('postgresql_replica', order_by='key1'); + check_tables_are_synchronized(instance, 'postgresql_replica', order_by='key1'); cursor.execute("UPDATE postgresql_replica SET key1=key1-25 WHERE key1<100 ") cursor.execute("UPDATE postgresql_replica SET key2=key2-25 WHERE key2>100 ") cursor.execute("UPDATE postgresql_replica SET value1=value1+100 WHERE key1<100 ") cursor.execute("UPDATE postgresql_replica SET value2=value2+200 WHERE key2>100 ") - check_tables_are_synchronized('postgresql_replica', order_by='key1'); + check_tables_are_synchronized(instance, 'postgresql_replica', order_by='key1'); cursor.execute('DELETE FROM postgresql_replica WHERE key2<75;') - check_tables_are_synchronized('postgresql_replica', order_by='key1'); - - drop_materialized_db() - cursor.execute('drop table if exists postgresql_replica;') + check_tables_are_synchronized(instance, 'postgresql_replica', order_by='key1'); def test_table_schema_changes(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) cursor = conn.cursor() NUM_TABLES = 5 @@ -509,15 +272,14 @@ def test_table_schema_changes(started_cluster): create_postgres_table(cursor, 'postgresql_replica_{}'.format(i), template=postgres_table_template_2); instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {}, {}, {} from numbers(25)".format(i, i, i, i)) - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - settings=["materialized_postgresql_allow_automatic_update = 1"]) + pg_manager.create_materialized_db( + ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + settings=["materialized_postgresql_allow_automatic_update = 1"]) for i in range(NUM_TABLES): instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT 25 + number, {}, {}, {} from numbers(25)".format(i, i, i, i)) - for i in range(NUM_TABLES): - check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + check_several_tables_are_synchronized(instance, NUM_TABLES) expected = instance.query("SELECT key, value1, value3 FROM test_database.postgresql_replica_3 ORDER BY key"); @@ -530,13 +292,12 @@ def test_table_schema_changes(started_cluster): cursor.execute(f"UPDATE {altered_table} SET value3 = 12 WHERE key%2=0") time.sleep(2) - assert_nested_table_is_created(altered_table) - assert_number_of_columns(3, altered_table) - check_tables_are_synchronized(altered_table) + assert_nested_table_is_created(instance, altered_table) + assert_number_of_columns(instance, 3, altered_table) + check_tables_are_synchronized(instance, altered_table) print('check1 OK') - for i in range(NUM_TABLES): - check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + check_several_tables_are_synchronized(instance, NUM_TABLES) for i in range(NUM_TABLES): if i != altered_idx: @@ -544,32 +305,12 @@ def test_table_schema_changes(started_cluster): else: instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT 51 + number, {}, {} from numbers(49)".format(i, i, i)) - check_tables_are_synchronized(altered_table); + check_tables_are_synchronized(instance, altered_table); print('check2 OK') - for i in range(NUM_TABLES): - check_tables_are_synchronized('postgresql_replica_{}'.format(i)); - - for i in range(NUM_TABLES): - cursor.execute('drop table postgresql_replica_{};'.format(i)) - - instance.query("DROP DATABASE test_database") - for i in range(NUM_TABLES): - cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + check_several_tables_are_synchronized(instance, NUM_TABLES) def test_many_concurrent_queries(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) - cursor = conn.cursor() - NUM_TABLES = 5 - - for i in range(NUM_TABLES): - create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); - instance.query('INSERT INTO postgres_database.postgresql_replica_{} SELECT number, number from numbers(10000)'.format(i)) - n = [10000] - query_pool = ['DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;', 'UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;', 'DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;', @@ -582,6 +323,13 @@ def test_many_concurrent_queries(started_cluster): 'UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;', 'DELETE FROM postgresql_replica_{} WHERE value%5 = 0;'] + NUM_TABLES = 5 + + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + database=True) + cursor = conn.cursor() + pg_manager.create_and_fill_postgres_tables_from_cursor(cursor, NUM_TABLES, numbers=10000) + def attack(thread_id): print('thread {}'.format(thread_id)) k = 10000 @@ -606,13 +354,14 @@ def test_many_concurrent_queries(started_cluster): cursor.execute("UPDATE postgresql_replica_{} SET key=key%100000+100000*{} WHERE key%{}=0".format(thread_id, i+1, i+1)) print("update primary key {} ok".format(thread_id)) + n = [10000] + threads = [] threads_num = 16 for i in range(threads_num): threads.append(threading.Thread(target=attack, args=(i,))) - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) for thread in threads: time.sleep(random.uniform(0, 1)) @@ -628,108 +377,91 @@ def test_many_concurrent_queries(started_cluster): thread.join() for i in range(NUM_TABLES): - check_tables_are_synchronized('postgresql_replica_{}'.format(i)); + check_tables_are_synchronized(instance, 'postgresql_replica_{}'.format(i)); count1 = instance.query('SELECT count() FROM postgres_database.postgresql_replica_{}'.format(i)) count2 = instance.query('SELECT count() FROM (SELECT * FROM test_database.postgresql_replica_{})'.format(i)) assert(int(count1) == int(count2)) print(count1, count2) - drop_materialized_db() - for i in range(NUM_TABLES): - cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) - def test_single_transaction(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, auto_commit=False) cursor = conn.cursor() - create_postgres_table(cursor, 'postgresql_replica_0'); + table_name = 'postgresql_replica_0' + create_postgres_table(cursor, table_name); conn.commit() - create_materialized_db(ip=started_cluster.postgres_ip, + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) - assert_nested_table_is_created('postgresql_replica_0') + assert_nested_table_is_created(instance, table_name) for query in queries: print('query {}'.format(query)) cursor.execute(query.format(0)) time.sleep(5) - result = instance.query("select count() from test_database.postgresql_replica_0") + result = instance.query(f"select count() from test_database.{table_name}") # no commit yet assert(int(result) == 0) conn.commit() - check_tables_are_synchronized('postgresql_replica_0'); - - drop_materialized_db() - cursor.execute('drop table if exists postgresql_replica_0;') + check_tables_are_synchronized(instance, table_name); def test_virtual_columns(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) cursor = conn.cursor() - create_postgres_table(cursor, 'postgresql_replica_0'); + table_name = 'postgresql_replica_0' + create_postgres_table(cursor, table_name); - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - settings=["materialized_postgresql_allow_automatic_update = 1"]) - assert_nested_table_is_created('postgresql_replica_0') - instance.query("INSERT INTO postgres_database.postgresql_replica_0 SELECT number, number from numbers(10)") - check_tables_are_synchronized('postgresql_replica_0'); + pg_manager.create_materialized_db( + ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + settings=["materialized_postgresql_allow_automatic_update = 1"]) + + assert_nested_table_is_created(instance, table_name) + instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(10)") + check_tables_are_synchronized(instance, table_name); # just check that it works, no check with `expected` because _version is taken as LSN, which will be different each time. - result = instance.query('SELECT key, value, _sign, _version FROM test_database.postgresql_replica_0;') + result = instance.query(f'SELECT key, value, _sign, _version FROM test_database.{table_name};') print(result) - cursor.execute("ALTER TABLE postgresql_replica_0 ADD COLUMN value2 integer") - instance.query("INSERT INTO postgres_database.postgresql_replica_0 SELECT number, number, number from numbers(10, 10)") - assert_number_of_columns(3, 'postgresql_replica_0') - check_tables_are_synchronized('postgresql_replica_0'); + cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value2 integer") + instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number, number from numbers(10, 10)") + assert_number_of_columns(instance, 3, table_name) + check_tables_are_synchronized(instance, table_name); result = instance.query('SELECT key, value, value2, _sign, _version FROM test_database.postgresql_replica_0;') print(result) - instance.query("INSERT INTO postgres_database.postgresql_replica_0 SELECT number, number, number from numbers(20, 10)") - check_tables_are_synchronized('postgresql_replica_0'); + instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number, number from numbers(20, 10)") + check_tables_are_synchronized(instance, table_name); - result = instance.query('SELECT key, value, value2, _sign, _version FROM test_database.postgresql_replica_0;') + result = instance.query(f'SELECT key, value, value2, _sign, _version FROM test_database.{table_name};') print(result) - drop_materialized_db() - cursor.execute('drop table if exists postgresql_replica_0;') - def test_multiple_databases(started_cluster): - drop_materialized_db('test_database_1') - drop_materialized_db('test_database_2') NUM_TABLES = 5 - - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=False) cursor = conn.cursor() - create_postgres_db(cursor, 'postgres_database_1') - create_postgres_db(cursor, 'postgres_database_2') + pg_manager.create_postgres_db(cursor, 'postgres_database_1') + pg_manager.create_postgres_db(cursor, 'postgres_database_2') - conn1 = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, + conn1 = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, database_name='postgres_database_1') - conn2 = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, + conn2 = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, database_name='postgres_database_2') cursor1 = conn1.cursor() cursor2 = conn2.cursor() - create_clickhouse_postgres_db(cluster.postgres_ip, cluster.postgres_port, 'postgres_database_1', 'postgres_database_1') - create_clickhouse_postgres_db(cluster.postgres_ip, cluster.postgres_port, 'postgres_database_2', 'postgres_database_2') + pg_manager.create_clickhouse_postgres_db(cluster.postgres_ip, cluster.postgres_port, 'postgres_database_1', 'postgres_database_1') + pg_manager.create_clickhouse_postgres_db(cluster.postgres_ip, cluster.postgres_port, 'postgres_database_2', 'postgres_database_2') cursors = [cursor1, cursor2] for cursor_id in range(len(cursors)): @@ -740,9 +472,9 @@ def test_multiple_databases(started_cluster): print('database 1 tables: ', instance.query('''SELECT name FROM system.tables WHERE database = 'postgres_database_1';''')) print('database 2 tables: ', instance.query('''SELECT name FROM system.tables WHERE database = 'postgres_database_2';''')) - create_materialized_db(started_cluster.postgres_ip, started_cluster.postgres_port, + pg_manager.create_materialized_db(started_cluster.postgres_ip, started_cluster.postgres_port, 'test_database_1', 'postgres_database_1') - create_materialized_db(started_cluster.postgres_ip, started_cluster.postgres_port, + pg_manager.create_materialized_db(started_cluster.postgres_ip, started_cluster.postgres_port, 'test_database_2', 'postgres_database_2') cursors = [cursor1, cursor2] @@ -754,289 +486,186 @@ def test_multiple_databases(started_cluster): for cursor_id in range(len(cursors)): for i in range(NUM_TABLES): table_name = 'postgresql_replica_{}'.format(i) - check_tables_are_synchronized( + check_tables_are_synchronized(instance, table_name, 'key', 'postgres_database_{}'.format(cursor_id + 1), 'test_database_{}'.format(cursor_id + 1)); - for i in range(NUM_TABLES): - cursor1.execute('drop table if exists postgresql_replica_{};'.format(i)) - for i in range(NUM_TABLES): - cursor2.execute('drop table if exists postgresql_replica_{};'.format(i)) - - drop_clickhouse_postgres_db('postgres_database_1') - drop_clickhouse_postgres_db('postgres_database_2') - - drop_materialized_db('test_database_1') - drop_materialized_db('test_database_2') - def test_concurrent_transactions(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) - cursor = conn.cursor() - NUM_TABLES = 6 - - for i in range(NUM_TABLES): - create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); - def transaction(thread_id): conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, auto_commit=False) - cursor_ = conn.cursor() + cursor = conn.cursor() for query in queries: - cursor_.execute(query.format(thread_id)) + cursor.execute(query.format(thread_id)) print('thread {}, query {}'.format(thread_id, query)) conn.commit() + NUM_TABLES = 6 + pg_manager.create_and_fill_postgres_tables(NUM_TABLES, numbers=0) + threads = [] threads_num = 6 for i in range(threads_num): threads.append(threading.Thread(target=transaction, args=(i,))) - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) for thread in threads: time.sleep(random.uniform(0, 0.5)) thread.start() + for thread in threads: thread.join() for i in range(NUM_TABLES): - check_tables_are_synchronized('postgresql_replica_{}'.format(i)); - count1 = instance.query('SELECT count() FROM postgres_database.postgresql_replica_{}'.format(i)) - count2 = instance.query('SELECT count() FROM (SELECT * FROM test_database.postgresql_replica_{})'.format(i)) + check_tables_are_synchronized(instance, f'postgresql_replica_{i}'); + count1 = instance.query(f'SELECT count() FROM postgres_database.postgresql_replica_{i}') + count2 = instance.query(f'SELECT count() FROM (SELECT * FROM test_database.postgresql_replica_{i})') print(int(count1), int(count2), sep=' ') assert(int(count1) == int(count2)) - drop_materialized_db() - for i in range(NUM_TABLES): - cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) - def test_abrupt_connection_loss_while_heavy_replication(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) - cursor = conn.cursor() - NUM_TABLES = 6 - - for i in range(NUM_TABLES): - create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); - def transaction(thread_id): if thread_id % 2: - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, auto_commit=True) else: - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, auto_commit=False) - cursor_ = conn.cursor() + cursor = conn.cursor() for query in queries: - cursor_.execute(query.format(thread_id)) + cursor.execute(query.format(thread_id)) print('thread {}, query {}'.format(thread_id, query)) if thread_id % 2 == 0: conn.commit() - threads = [] + NUM_TABLES = 6 + pg_manager.create_and_fill_postgres_tables(NUM_TABLES, numbers=0) + threads_num = 6 + threads = [] for i in range(threads_num): threads.append(threading.Thread(target=transaction, args=(i,))) - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) for thread in threads: time.sleep(random.uniform(0, 0.5)) thread.start() - # Join here because it takes time for data to reach wal for thread in threads: - thread.join() - time.sleep(1) + thread.join() # Join here because it takes time for data to reach wal + + time.sleep(2) started_cluster.pause_container('postgres1') - for i in range(NUM_TABLES): - result = instance.query("SELECT count() FROM test_database.postgresql_replica_{}".format(i)) - print(result) # Just debug + # for i in range(NUM_TABLES): + # result = instance.query(f"SELECT count() FROM test_database.postgresql_replica_{i}") + # print(result) # Just debug started_cluster.unpause_container('postgres1') - - for i in range(NUM_TABLES): - check_tables_are_synchronized('postgresql_replica_{}'.format(i)); - - for i in range(NUM_TABLES): - result = instance.query("SELECT count() FROM test_database.postgresql_replica_{}".format(i)) - print(result) # Just debug - - drop_materialized_db() - for i in range(NUM_TABLES): - cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + check_several_tables_are_synchronized(instance, NUM_TABLES) def test_drop_database_while_replication_startup_not_finished(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) - cursor = conn.cursor() NUM_TABLES = 5 - - for i in range(NUM_TABLES): - table_name = 'postgresql_replica_{}'.format(i) - create_postgres_table(cursor, table_name); - instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(100000)".format(table_name)) - + pg_manager.create_and_fill_postgres_tables(NUM_TABLES, 100000) for i in range(6): - create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) time.sleep(0.5 * i) - drop_materialized_db() - - for i in range(NUM_TABLES): - cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + pg_manager.drop_materialized_db() def test_restart_server_while_replication_startup_not_finished(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) - cursor = conn.cursor() NUM_TABLES = 5 - - for i in range(NUM_TABLES): - table_name = 'postgresql_replica_{}'.format(i) - create_postgres_table(cursor, table_name); - instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(100000)".format(table_name)) - - create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) - time.sleep(0.5) + pg_manager.create_and_fill_postgres_tables(NUM_TABLES, 100000) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) + time.sleep(1) instance.restart_clickhouse() - for i in range(NUM_TABLES): - check_tables_are_synchronized('postgresql_replica_{}'.format(i)); - - drop_materialized_db() - for i in range(NUM_TABLES): - cursor.execute('drop table postgresql_replica_{};'.format(i)) + check_several_tables_are_synchronized(instance, NUM_TABLES) def test_abrupt_server_restart_while_heavy_replication(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) - cursor = conn.cursor() - NUM_TABLES = 6 - - for i in range(NUM_TABLES): - create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); - def transaction(thread_id): if thread_id % 2: - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, auto_commit=True) else: - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True, auto_commit=False) - cursor_ = conn.cursor() + cursor = conn.cursor() for query in queries: - cursor_.execute(query.format(thread_id)) + cursor.execute(query.format(thread_id)) print('thread {}, query {}'.format(thread_id, query)) if thread_id % 2 == 0: conn.commit() + NUM_TABLES = 6 + pg_manager.create_and_fill_postgres_tables(tables_num=NUM_TABLES, numbers=0) + threads = [] threads_num = 6 for i in range(threads_num): threads.append(threading.Thread(target=transaction, args=(i,))) - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) for thread in threads: time.sleep(random.uniform(0, 0.5)) thread.start() - # Join here because it takes time for data to reach wal for thread in threads: - thread.join() + thread.join() # Join here because it takes time for data to reach wal + instance.restart_clickhouse() - - for i in range(NUM_TABLES): - result = instance.query("SELECT count() FROM test_database.postgresql_replica_{}".format(i)) - print(result) # Just debug - - for i in range(NUM_TABLES): - check_tables_are_synchronized('postgresql_replica_{}'.format(i)); - - for i in range(NUM_TABLES): - result = instance.query("SELECT count() FROM test_database.postgresql_replica_{}".format(i)) - print(result) # Just debug - - drop_materialized_db() - for i in range(NUM_TABLES): - cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + check_several_tables_are_synchronized(instance, NUM_TABLES) def test_quoting_1(started_cluster): - conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) - cursor = conn.cursor() table_name = 'user' - create_postgres_table(cursor, table_name); - instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)") - create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) - check_tables_are_synchronized(table_name); - drop_materialized_db() - drop_postgres_table(cursor, table_name) + pg_manager.create_and_fill_postgres_table(table_name) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) + check_tables_are_synchronized(instance, table_name); def test_quoting_2(started_cluster): - conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) - cursor = conn.cursor() table_name = 'user' - create_postgres_table(cursor, table_name); - instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)") - create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, - settings=[f"materialized_postgresql_tables_list = '{table_name}'"]) - check_tables_are_synchronized(table_name); - drop_materialized_db() - drop_postgres_table(cursor, table_name) + pg_manager.create_and_fill_postgres_table(table_name) + pg_manager.create_materialized_db( + ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + settings=[f"materialized_postgresql_tables_list = '{table_name}'"]) + check_tables_are_synchronized(instance, table_name); def test_user_managed_slots(started_cluster): - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) - cursor = conn.cursor() - table_name = 'test_table' - create_postgres_table(cursor, table_name); - instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format(table_name)) - slot_name = 'user_slot' - replication_connection = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, - database=True, replication=True, auto_commit=True) + table_name = 'test_table' + pg_manager.create_and_fill_postgres_table(table_name) + + replication_connection = get_postgres_conn( + ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + database=True, replication=True, auto_commit=True) snapshot = create_replication_slot(replication_connection, slot_name=slot_name) - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - settings=["materialized_postgresql_replication_slot = '{}'".format(slot_name), - "materialized_postgresql_snapshot = '{}'".format(snapshot)]) - check_tables_are_synchronized(table_name); + + pg_manager.create_materialized_db( + ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + settings=[f"materialized_postgresql_replication_slot = '{slot_name}'", + f"materialized_postgresql_snapshot = '{snapshot}'"]) + check_tables_are_synchronized(instance, table_name); + instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(10000, 10000)".format(table_name)) - check_tables_are_synchronized(table_name); + check_tables_are_synchronized(instance, table_name); + instance.restart_clickhouse() + instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(20000, 10000)".format(table_name)) - check_tables_are_synchronized(table_name); - drop_postgres_table(cursor, table_name) - drop_materialized_db() + check_tables_are_synchronized(instance, table_name); + + pg_manager.drop_materialized_db() drop_replication_slot(replication_connection, slot_name) - cursor.execute('DROP TABLE IF EXISTS test_table') + replication_connection.close() if __name__ == '__main__': diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 9a1e2cd9a38..3226c040e8e 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -12,235 +12,62 @@ from helpers.test_tools import TSV from random import randrange import threading +from helpers.postgres_utility import get_postgres_conn +from helpers.postgres_utility import PostgresManager + +from helpers.postgres_utility import create_replication_slot, drop_replication_slot +from helpers.postgres_utility import create_postgres_schema, drop_postgres_schema +from helpers.postgres_utility import create_postgres_table, drop_postgres_table +from helpers.postgres_utility import create_postgres_table_with_schema, drop_postgres_table_with_schema +from helpers.postgres_utility import check_tables_are_synchronized +from helpers.postgres_utility import check_several_tables_are_synchronized +from helpers.postgres_utility import assert_nested_table_is_created +from helpers.postgres_utility import assert_number_of_columns +from helpers.postgres_utility import postgres_table_template, postgres_table_template_2, postgres_table_template_3, postgres_table_template_4, postgres_table_template_5 +from helpers.postgres_utility import queries + + cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', main_configs = ['configs/log_conf.xml'], user_configs = ['configs/users.xml'], with_postgres=True, stay_alive=True) -postgres_table_template = """ - CREATE TABLE IF NOT EXISTS "{}" ( - key Integer NOT NULL, value Integer, PRIMARY KEY(key)) - """ -postgres_table_template_2 = """ - CREATE TABLE IF NOT EXISTS "{}" ( - key Integer NOT NULL, value1 Integer, value2 Integer, value3 Integer, PRIMARY KEY(key)) - """ -postgres_table_template_3 = """ - CREATE TABLE IF NOT EXISTS "{}" ( - key1 Integer NOT NULL, value1 Integer, key2 Integer NOT NULL, value2 Integer NOT NULL) - """ -postgres_table_template_4 = """ - CREATE TABLE IF NOT EXISTS "{}"."{}" ( - key Integer NOT NULL, value Integer, PRIMARY KEY(key)) - """ -postgres_table_template_5 = """ - CREATE TABLE IF NOT EXISTS "{}" ( - key Integer NOT NULL, value UUID, PRIMARY KEY(key)) - """ - -def get_postgres_conn(ip, port, database=False, auto_commit=True, database_name='postgres_database', replication=False): - if database == True: - conn_string = "host={} port={} dbname='{}' user='postgres' password='mysecretpassword'".format(ip, port, database_name) - else: - conn_string = "host={} port={} user='postgres' password='mysecretpassword'".format(ip, port) - - if replication: - conn_string += " replication='database'" - - conn = psycopg2.connect(conn_string) - if auto_commit: - conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) - conn.autocommit = True - return conn - -def create_replication_slot(conn, slot_name='user_slot'): - cursor = conn.cursor() - cursor.execute('CREATE_REPLICATION_SLOT {} LOGICAL pgoutput EXPORT_SNAPSHOT'.format(slot_name)) - result = cursor.fetchall() - print(result[0][0]) # slot name - print(result[0][1]) # start lsn - print(result[0][2]) # snapshot - return result[0][2] - -def drop_replication_slot(conn, slot_name='user_slot'): - cursor = conn.cursor() - cursor.execute("select pg_drop_replication_slot('{}')".format(slot_name)) - -def create_postgres_db(cursor, name='postgres_database'): - cursor.execute("CREATE DATABASE {}".format(name)) - -def drop_postgres_db(cursor, name='postgres_database'): - cursor.execute("DROP DATABASE IF EXISTS {}".format(name)) - -def drop_postgres_schema(cursor, schema_name): - cursor.execute('DROP SCHEMA IF EXISTS {} CASCADE'.format(schema_name)) - -def create_postgres_schema(cursor, schema_name): - drop_postgres_schema(cursor, schema_name) - cursor.execute('CREATE SCHEMA {}'.format(schema_name)) - -def create_clickhouse_postgres_db(ip, port, name='postgres_database', database_name='postgres_database', schema_name=''): - drop_clickhouse_postgres_db(name) - if len(schema_name) == 0: - instance.query(''' - CREATE DATABASE {} - ENGINE = PostgreSQL('{}:{}', '{}', 'postgres', 'mysecretpassword')'''.format(name, ip, port, database_name)) - else: - instance.query(''' - CREATE DATABASE {} - ENGINE = PostgreSQL('{}:{}', '{}', 'postgres', 'mysecretpassword', '{}')'''.format(name, ip, port, database_name, schema_name)) - -def drop_clickhouse_postgres_db(name='postgres_database'): - instance.query('DROP DATABASE IF EXISTS {}'.format(name)) - -def create_materialized_db(ip, port, - materialized_database='test_database', - postgres_database='postgres_database', - settings=[], table_overrides=''): - instance.query(f"DROP DATABASE IF EXISTS {materialized_database}") - create_query = f"CREATE DATABASE {materialized_database} ENGINE = MaterializedPostgreSQL('{ip}:{port}', '{postgres_database}', 'postgres', 'mysecretpassword')" - if len(settings) > 0: - create_query += " SETTINGS " - for i in range(len(settings)): - if i != 0: - create_query += ', ' - create_query += settings[i] - create_query += table_overrides - instance.query(create_query) - assert materialized_database in instance.query('SHOW DATABASES') - -def drop_materialized_db(materialized_database='test_database'): - instance.query('DROP DATABASE IF EXISTS {}'.format(materialized_database)) - assert materialized_database not in instance.query('SHOW DATABASES') - -def drop_postgres_table(cursor, table_name): - cursor.execute("""DROP TABLE IF EXISTS "{}" """.format(table_name)) - -def drop_postgres_table_with_schema(cursor, schema_name, table_name): - cursor.execute("""DROP TABLE IF EXISTS "{}"."{}" """.format(schema_name, table_name)) - -def create_postgres_table(cursor, table_name, replica_identity_full=False, template=postgres_table_template): - drop_postgres_table(cursor, table_name) - cursor.execute(template.format(table_name)) - if replica_identity_full: - cursor.execute('ALTER TABLE {} REPLICA IDENTITY FULL;'.format(table_name)) - -def create_postgres_table_with_schema(cursor, schema_name, table_name): - drop_postgres_table_with_schema(cursor, schema_name, table_name) - cursor.execute(postgres_table_template_4.format(schema_name, table_name)) - -queries = [ - 'INSERT INTO postgresql_replica_{} select i, i from generate_series(0, 10000) as t(i);', - 'DELETE FROM postgresql_replica_{} WHERE (value*value) % 3 = 0;', - 'UPDATE postgresql_replica_{} SET value = value - 125 WHERE key % 2 = 0;', - "UPDATE postgresql_replica_{} SET key=key+20000 WHERE key%2=0", - 'INSERT INTO postgresql_replica_{} select i, i from generate_series(40000, 50000) as t(i);', - 'DELETE FROM postgresql_replica_{} WHERE key % 10 = 0;', - 'UPDATE postgresql_replica_{} SET value = value + 101 WHERE key % 2 = 1;', - "UPDATE postgresql_replica_{} SET key=key+80000 WHERE key%2=1", - 'DELETE FROM postgresql_replica_{} WHERE value % 2 = 0;', - 'UPDATE postgresql_replica_{} SET value = value + 2000 WHERE key % 5 = 0;', - 'INSERT INTO postgresql_replica_{} select i, i from generate_series(200000, 250000) as t(i);', - 'DELETE FROM postgresql_replica_{} WHERE value % 3 = 0;', - 'UPDATE postgresql_replica_{} SET value = value * 2 WHERE key % 3 = 0;', - "UPDATE postgresql_replica_{} SET key=key+500000 WHERE key%2=1", - 'INSERT INTO postgresql_replica_{} select i, i from generate_series(1000000, 1050000) as t(i);', - 'DELETE FROM postgresql_replica_{} WHERE value % 9 = 2;', - "UPDATE postgresql_replica_{} SET key=key+10000000", - 'UPDATE postgresql_replica_{} SET value = value + 2 WHERE key % 3 = 1;', - 'DELETE FROM postgresql_replica_{} WHERE value%5 = 0;' - ] - - -def assert_nested_table_is_created(table_name, materialized_database='test_database', schema_name=''): - if len(schema_name) == 0: - table = table_name - else: - table = schema_name + "." + table_name - print(f'Checking table {table} exists in {materialized_database}') - database_tables = instance.query('SHOW TABLES FROM {}'.format(materialized_database)) - while table not in database_tables: - time.sleep(0.2) - database_tables = instance.query('SHOW TABLES FROM {}'.format(materialized_database)) - assert(table in database_tables) - - -def assert_number_of_columns(expected, table_name, database_name='test_database'): - result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')") - while (int(result) != expected): - time.sleep(1) - result = instance.query(f"select count() from system.columns where table = '{table_name}' and database = '{database_name}' and not startsWith(name, '_')") - print('Number of columns ok') - - -@pytest.mark.timeout(320) -def check_tables_are_synchronized(table_name, order_by='key', postgres_database='postgres_database', materialized_database='test_database', schema_name=''): - assert_nested_table_is_created(table_name, materialized_database, schema_name) - - print(f"Checking table is synchronized. Table name: {table_name}, table schema: {schema_name}") - expected = instance.query('select * from {}.{} order by {};'.format(postgres_database, table_name, order_by)) - if len(schema_name) == 0: - result = instance.query('select * from {}.{} order by {};'.format(materialized_database, table_name, order_by)) - else: - result = instance.query('select * from {}.`{}.{}` order by {};'.format(materialized_database, schema_name, table_name, order_by)) - - try_num = 0 - while result != expected: - time.sleep(0.5) - if len(schema_name) == 0: - result = instance.query('select * from {}.{} order by {};'.format(materialized_database, table_name, order_by)) - else: - result = instance.query('select * from {}.`{}.{}` order by {};'.format(materialized_database, schema_name, table_name, order_by)) - try_num += 1 - if try_num > 30: - break - - assert(result == expected) +pg_manager = PostgresManager() @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() - conn = get_postgres_conn(ip=cluster.postgres_ip, port=cluster.postgres_port) - cursor = conn.cursor() - create_postgres_db(cursor, 'postgres_database') - create_clickhouse_postgres_db(ip=cluster.postgres_ip, port=cluster.postgres_port) - - instance.query("DROP DATABASE IF EXISTS test_database") + pg_manager.init(instance, cluster.postgres_ip, cluster.postgres_port) yield cluster finally: cluster.shutdown() +@pytest.fixture(autouse=True) +def setup_teardown(): + print("PostgreSQL is available - running test") + yield # run test + pg_manager.restart() + + def test_add_new_table_to_replication(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) - cursor = conn.cursor() + cursor = pg_manager.get_db_cursor() cursor.execute('DROP TABLE IF EXISTS test_table') NUM_TABLES = 5 - for i in range(NUM_TABLES): - create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); - instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {} from numbers(10000)".format(i, i)) - - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port) - - for i in range(NUM_TABLES): - table_name = 'postgresql_replica_{}'.format(i) - check_tables_are_synchronized(table_name); + pg_manager.create_and_fill_postgres_tables_from_cursor(cursor, NUM_TABLES, 10000) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) + check_several_tables_are_synchronized(instance, NUM_TABLES) result = instance.query("SHOW TABLES FROM test_database") assert(result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\n") table_name = 'postgresql_replica_5' - create_postgres_table(cursor, table_name) - instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format(table_name)) + pg_manager.create_and_fill_postgres_table_from_cursor(cursor, table_name) result = instance.query('SHOW CREATE DATABASE test_database') assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") # Check without ip @@ -252,16 +79,16 @@ def test_add_new_table_to_replication(started_cluster): result = instance.query_and_get_error("ALTER DATABASE test_database MODIFY SETTING materialized_postgresql_tables='tabl1'") assert('Database engine MaterializedPostgreSQL does not support setting' in result) - instance.query("ATTACH TABLE test_database.{}".format(table_name)); + instance.query(f"ATTACH TABLE test_database.{table_name}"); result = instance.query("SHOW TABLES FROM test_database") assert(result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\npostgresql_replica_5\n") - check_tables_are_synchronized(table_name); - instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(10000, 10000)".format(table_name)) - check_tables_are_synchronized(table_name); + check_tables_are_synchronized(instance, table_name); + instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(10000, 10000)") + check_tables_are_synchronized(instance, table_name); - result = instance.query_and_get_error("ATTACH TABLE test_database.{}".format(table_name)); + result = instance.query_and_get_error(f"ATTACH TABLE test_database.{table_name}"); assert('Table test_database.postgresql_replica_5 already exists' in result) result = instance.query_and_get_error("ATTACH TABLE test_database.unknown_table"); @@ -274,14 +101,14 @@ def test_add_new_table_to_replication(started_cluster): table_name = 'postgresql_replica_6' create_postgres_table(cursor, table_name) instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format(table_name)) - instance.query("ATTACH TABLE test_database.{}".format(table_name)); + instance.query(f"ATTACH TABLE test_database.{table_name}"); instance.restart_clickhouse() table_name = 'postgresql_replica_7' create_postgres_table(cursor, table_name) instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(10000)".format(table_name)) - instance.query("ATTACH TABLE test_database.{}".format(table_name)); + instance.query(f"ATTACH TABLE test_database.{table_name}"); result = instance.query('SHOW CREATE DATABASE test_database') assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") @@ -289,33 +116,14 @@ def test_add_new_table_to_replication(started_cluster): result = instance.query("SHOW TABLES FROM test_database") assert(result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\npostgresql_replica_5\npostgresql_replica_6\npostgresql_replica_7\n") + check_several_tables_are_synchronized(instance, NUM_TABLES + 3) - for i in range(NUM_TABLES + 3): - table_name = 'postgresql_replica_{}'.format(i) - check_tables_are_synchronized(table_name); - - for i in range(NUM_TABLES + 3): - cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) def test_remove_table_from_replication(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) - cursor = conn.cursor() - cursor.execute('DROP TABLE IF EXISTS test_table') NUM_TABLES = 5 - - for i in range(NUM_TABLES): - create_postgres_table(cursor, 'postgresql_replica_{}'.format(i)); - instance.query("INSERT INTO postgres_database.postgresql_replica_{} SELECT number, {} from numbers(10000)".format(i, i)) - - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port) - - for i in range(NUM_TABLES): - table_name = 'postgresql_replica_{}'.format(i) - check_tables_are_synchronized(table_name); + pg_manager.create_and_fill_postgres_tables(NUM_TABLES, 10000) + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) + check_several_tables_are_synchronized(instance, NUM_TABLES) result = instance.query("SHOW TABLES FROM test_database") assert(result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\n") @@ -325,8 +133,8 @@ def test_remove_table_from_replication(started_cluster): assert(result[-59:] == "\\'postgres_database\\', \\'postgres\\', \\'mysecretpassword\\')\n") table_name = 'postgresql_replica_4' - instance.query('DETACH TABLE test_database.{}'.format(table_name)); - result = instance.query_and_get_error('SELECT * FROM test_database.{}'.format(table_name)) + instance.query(f'DETACH TABLE test_database.{table_name}'); + result = instance.query_and_get_error(f'SELECT * FROM test_database.{table_name}') assert("doesn't exist" in result) result = instance.query("SHOW TABLES FROM test_database") @@ -336,52 +144,42 @@ def test_remove_table_from_replication(started_cluster): assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert(result[-138:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3\\'\n") - instance.query('ATTACH TABLE test_database.{}'.format(table_name)); - check_tables_are_synchronized(table_name); - - for i in range(NUM_TABLES): - table_name = 'postgresql_replica_{}'.format(i) - check_tables_are_synchronized(table_name); + instance.query(f'ATTACH TABLE test_database.{table_name}'); + check_tables_are_synchronized(instance, table_name); + check_several_tables_are_synchronized(instance, NUM_TABLES) result = instance.query('SHOW CREATE DATABASE test_database') assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert(result[-159:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4\\'\n") table_name = 'postgresql_replica_1' - instance.query('DETACH TABLE test_database.{}'.format(table_name)); + instance.query(f'DETACH TABLE test_database.{table_name}'); result = instance.query('SHOW CREATE DATABASE test_database') assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert(result[-138:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4\\'\n") - for i in range(NUM_TABLES): - cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) + cursor = pg_manager.get_db_cursor() + cursor.execute(f'drop table if exists postgresql_replica_0;') # Removing from replication table which does not exist in PostgreSQL must be ok. instance.query('DETACH TABLE test_database.postgresql_replica_0'); assert instance.contains_in_log("from publication, because table does not exist in PostgreSQL") - drop_materialized_db() def test_predefined_connection_configuration(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) - cursor = conn.cursor() + cursor = pg_manager.get_db_cursor() cursor.execute(f'DROP TABLE IF EXISTS test_table') cursor.execute(f'CREATE TABLE test_table (key integer PRIMARY KEY, value integer)') cursor.execute(f'INSERT INTO test_table SELECT 1, 2') - instance.query("CREATE DATABASE test_database ENGINE = MaterializedPostgreSQL(postgres1) SETTINGS materialized_postgresql_tables_list='test_table'") - check_tables_are_synchronized("test_table"); - drop_materialized_db() - cursor.execute('DROP TABLE IF EXISTS test_table') + check_tables_are_synchronized(instance, "test_table"); + pg_manager.drop_materialized_db() insert_counter = 0 def test_database_with_single_non_default_schema(started_cluster): - conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) - cursor = conn.cursor() - + cursor = pg_manager.get_db_cursor() NUM_TABLES=5 schema_name = 'test_schema' materialized_db = 'test_database' @@ -405,18 +203,17 @@ def test_database_with_single_non_default_schema(started_cluster): def check_all_tables_are_synchronized(): for i in range(NUM_TABLES): print('checking table', i) - check_tables_are_synchronized("postgresql_replica_{}".format(i), postgres_database=clickhouse_postgres_db); + check_tables_are_synchronized(instance, f"postgresql_replica_{i}", postgres_database=clickhouse_postgres_db); print('synchronization Ok') create_postgres_schema(cursor, schema_name) - create_clickhouse_postgres_db(ip=cluster.postgres_ip, port=cluster.postgres_port, name=clickhouse_postgres_db, schema_name=schema_name) + pg_manager.create_clickhouse_postgres_db(ip=cluster.postgres_ip, port=cluster.postgres_port, name=clickhouse_postgres_db, schema_name=schema_name) for i in range(NUM_TABLES): - table_name = 'postgresql_replica_{}'.format(i) - create_postgres_table_with_schema(cursor, schema_name, table_name); + create_postgres_table_with_schema(cursor, schema_name, f'postgresql_replica_{i}'); insert_into_tables() - create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=[f"materialized_postgresql_schema = '{schema_name}'", "materialized_postgresql_allow_automatic_update = 1"]) insert_into_tables() @@ -434,22 +231,19 @@ def test_database_with_single_non_default_schema(started_cluster): cursor.execute("ALTER TABLE test_schema.postgresql_replica_{} ADD COLUMN value2 integer".format(altered_table)) instance.query(f"INSERT INTO {clickhouse_postgres_db}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(5000, 1000)") - assert_number_of_columns(3, f'postgresql_replica_{altered_table}') - check_tables_are_synchronized(f"postgresql_replica_{altered_table}", postgres_database=clickhouse_postgres_db); + assert_number_of_columns(instance, 3, f'postgresql_replica_{altered_table}') + check_tables_are_synchronized(instance, f"postgresql_replica_{altered_table}", postgres_database=clickhouse_postgres_db); print('DETACH-ATTACH') detached_table_name = "postgresql_replica_1" instance.query(f"DETACH TABLE {materialized_db}.{detached_table_name}") assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL") instance.query(f"ATTACH TABLE {materialized_db}.{detached_table_name}") - check_tables_are_synchronized(detached_table_name, postgres_database=clickhouse_postgres_db); - - drop_materialized_db() + check_tables_are_synchronized(instance, detached_table_name, postgres_database=clickhouse_postgres_db); def test_database_with_multiple_non_default_schemas_1(started_cluster): - conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) - cursor = conn.cursor() + cursor = pg_manager.get_db_cursor() NUM_TABLES = 5 schema_name = 'test_schema' @@ -475,11 +269,11 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): def check_all_tables_are_synchronized(): for i in range(NUM_TABLES): print('checking table', i) - check_tables_are_synchronized("postgresql_replica_{}".format(i), schema_name=schema_name, postgres_database=clickhouse_postgres_db); + check_tables_are_synchronized(instance, "postgresql_replica_{}".format(i), schema_name=schema_name, postgres_database=clickhouse_postgres_db); print('synchronization Ok') create_postgres_schema(cursor, schema_name) - create_clickhouse_postgres_db(ip=cluster.postgres_ip, port=cluster.postgres_port, name=clickhouse_postgres_db, schema_name=schema_name) + pg_manager.create_clickhouse_postgres_db(ip=cluster.postgres_ip, port=cluster.postgres_port, name=clickhouse_postgres_db, schema_name=schema_name) for i in range(NUM_TABLES): table_name = 'postgresql_replica_{}'.format(i) @@ -489,7 +283,7 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): publication_tables += schema_name + '.' + table_name insert_into_tables() - create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + pg_manager.create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=[f"materialized_postgresql_tables_list = '{publication_tables}'", "materialized_postgresql_tables_list_with_schema=1", "materialized_postgresql_allow_automatic_update = 1"]) check_all_tables_are_synchronized() @@ -507,8 +301,8 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): cursor.execute("ALTER TABLE test_schema.postgresql_replica_{} ADD COLUMN value2 integer".format(altered_table)) instance.query(f"INSERT INTO {clickhouse_postgres_db}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(5000, 1000)") - assert_number_of_columns(3, f'{schema_name}.postgresql_replica_{altered_table}') - check_tables_are_synchronized(f"postgresql_replica_{altered_table}", schema_name=schema_name, postgres_database=clickhouse_postgres_db); + assert_number_of_columns(instance, 3, f'{schema_name}.postgresql_replica_{altered_table}') + check_tables_are_synchronized(instance, f"postgresql_replica_{altered_table}", schema_name=schema_name, postgres_database=clickhouse_postgres_db); print('DETACH-ATTACH') detached_table_name = "postgresql_replica_1" @@ -516,15 +310,11 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL") instance.query(f"ATTACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}`") assert_show_tables("test_schema.postgresql_replica_0\ntest_schema.postgresql_replica_1\ntest_schema.postgresql_replica_2\ntest_schema.postgresql_replica_3\ntest_schema.postgresql_replica_4\n") - check_tables_are_synchronized(detached_table_name, schema_name=schema_name, postgres_database=clickhouse_postgres_db); - - drop_materialized_db() + check_tables_are_synchronized(instance, detached_table_name, schema_name=schema_name, postgres_database=clickhouse_postgres_db); def test_database_with_multiple_non_default_schemas_2(started_cluster): - conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) - cursor = conn.cursor() - + cursor = pg_manager.get_db_cursor() NUM_TABLES = 2 schemas_num = 2 schema_list = 'schema0, schema1' @@ -539,7 +329,7 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): for ti in range(NUM_TABLES): table_name = f'postgresql_replica_{ti}' print(f'checking table {schema_name}.{table_name}') - check_tables_are_synchronized(f'{table_name}', schema_name=schema_name, postgres_database=clickhouse_postgres_db); + check_tables_are_synchronized(instance, f'{table_name}', schema_name=schema_name, postgres_database=clickhouse_postgres_db); print('synchronized Ok') def insert_into_tables(): @@ -560,14 +350,16 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): schema_name = f'schema{i}' clickhouse_postgres_db = f'clickhouse_postgres_db{i}' create_postgres_schema(cursor, schema_name) - create_clickhouse_postgres_db(ip=cluster.postgres_ip, port=cluster.postgres_port, name=clickhouse_postgres_db, schema_name=schema_name) + pg_manager.create_clickhouse_postgres_db(ip=cluster.postgres_ip, port=cluster.postgres_port, name=clickhouse_postgres_db, schema_name=schema_name) for ti in range(NUM_TABLES): table_name = f'postgresql_replica_{ti}' create_postgres_table_with_schema(cursor, schema_name, table_name); insert_into_tables() - create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, - settings=[f"materialized_postgresql_schema_list = '{schema_list}'", "materialized_postgresql_allow_automatic_update = 1"]) + pg_manager.create_materialized_db( + ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + settings=[f"materialized_postgresql_schema_list = '{schema_list}'", + "materialized_postgresql_allow_automatic_update = 1"]) check_all_tables_are_synchronized() insert_into_tables() @@ -586,8 +378,8 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): cursor.execute(f"ALTER TABLE schema{altered_schema}.postgresql_replica_{altered_table} ADD COLUMN value2 integer") instance.query(f"INSERT INTO clickhouse_postgres_db{altered_schema}.postgresql_replica_{altered_table} SELECT number, number, number from numbers(1000 * {insert_counter}, 1000)") - assert_number_of_columns(3, f'schema{altered_schema}.postgresql_replica_{altered_table}') - check_tables_are_synchronized(f"postgresql_replica_{altered_table}", schema_name=f"schema{altered_schema}", postgres_database=clickhouse_postgres_db); + assert_number_of_columns(instance, 3, f'schema{altered_schema}.postgresql_replica_{altered_table}') + check_tables_are_synchronized(instance, f"postgresql_replica_{altered_table}", schema_name=f"schema{altered_schema}", postgres_database=clickhouse_postgres_db); print('DETACH-ATTACH') detached_table_name = "postgresql_replica_1" @@ -597,23 +389,22 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL") instance.query(f"ATTACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}`") assert_show_tables("schema0.postgresql_replica_0\nschema0.postgresql_replica_1\nschema1.postgresql_replica_0\nschema1.postgresql_replica_1\n") - check_tables_are_synchronized(f"postgresql_replica_{altered_table}", schema_name=detached_table_schema, postgres_database=clickhouse_postgres_db); - - drop_materialized_db() + check_tables_are_synchronized(instance, f"postgresql_replica_{altered_table}", schema_name=detached_table_schema, postgres_database=clickhouse_postgres_db); def test_table_override(started_cluster): - conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) - cursor = conn.cursor() + cursor = pg_manager.get_db_cursor() table_name = 'table_override' materialized_database = 'test_database' create_postgres_table(cursor, table_name, template=postgres_table_template_5); instance.query(f"create table {table_name}(key Int32, value UUID) engine = PostgreSQL (postgres1, table={table_name})") instance.query(f"insert into {table_name} select number, generateUUIDv4() from numbers(10)") table_overrides = f" TABLE OVERRIDE {table_name} (COLUMNS (key Int32, value UUID))" - create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, - settings=[f"materialized_postgresql_tables_list = '{table_name}'"], table_overrides=table_overrides) - assert_nested_table_is_created(table_name, materialized_database) + pg_manager.create_materialized_db( + ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + settings=[f"materialized_postgresql_tables_list = '{table_name}'"], + table_overrides=table_overrides) + assert_nested_table_is_created(instance, table_name, materialized_database) result = instance.query(f"show create table {materialized_database}.{table_name}") print(result) expected = "CREATE TABLE test_database.table_override\\n(\\n `key` Int32,\\n `value` UUID,\\n `_sign` Int8() MATERIALIZED 1,\\n `_version` UInt64() MATERIALIZED 1\\n)\\nENGINE = ReplacingMergeTree(_version)\\nORDER BY tuple(key)" @@ -621,29 +412,23 @@ def test_table_override(started_cluster): time.sleep(5) query = f"select * from {materialized_database}.{table_name} order by key" expected = instance.query(f"select * from {table_name} order by key") + instance.query(f"drop table {table_name} no delay") assert_eq_with_retry(instance, query, expected) - drop_materialized_db() - drop_postgres_table(cursor, table_name) def test_table_schema_changes_2(started_cluster): - drop_materialized_db() - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) - cursor = conn.cursor() - + cursor = pg_manager.get_db_cursor() table_name = "test_table" create_postgres_table(cursor, table_name, template=postgres_table_template_2); instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number, number, number from numbers(25)") - create_materialized_db(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - settings=["materialized_postgresql_allow_automatic_update = 1, materialized_postgresql_tables_list='test_table'"]) + pg_manager.create_materialized_db( + ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + settings=["materialized_postgresql_allow_automatic_update = 1, materialized_postgresql_tables_list='test_table'"]) instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number, number, number from numbers(25, 25)") - check_tables_are_synchronized(table_name); + check_tables_are_synchronized(instance, table_name); cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN value1") cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN value2") @@ -653,24 +438,24 @@ def test_table_schema_changes_2(started_cluster): cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value3 Text") cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value4 Text") cursor.execute(f"UPDATE {table_name} SET value3 = 'kek' WHERE key%2=0") - check_tables_are_synchronized(table_name); + check_tables_are_synchronized(instance, table_name); instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, toString(number), toString(number), toString(number), toString(number) from numbers(50, 25)") cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value5 Integer") cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN value2") instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, toString(number), toString(number), toString(number), number from numbers(75, 25)") - check_tables_are_synchronized(table_name); + check_tables_are_synchronized(instance, table_name); instance.restart_clickhouse() - check_tables_are_synchronized(table_name); + check_tables_are_synchronized(instance, table_name); cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN value5") cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value5 Text") instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, toString(number), toString(number), toString(number), toString(number) from numbers(100, 25)") - check_tables_are_synchronized(table_name); + check_tables_are_synchronized(instance, table_name); cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value6 Text") cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value7 Integer") cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN value8 Integer") cursor.execute(f"ALTER TABLE {table_name} DROP COLUMN value5") instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, toString(number), toString(number), toString(number), toString(number), number, number from numbers(125, 25)") - check_tables_are_synchronized(table_name); + check_tables_are_synchronized(instance, table_name); if __name__ == '__main__': diff --git a/tests/integration/test_storage_mysql/configs/named_collections.xml b/tests/integration/test_storage_mysql/configs/named_collections.xml index 4a97be7bd98..b4a79880d2a 100644 --- a/tests/integration/test_storage_mysql/configs/named_collections.xml +++ b/tests/integration/test_storage_mysql/configs/named_collections.xml @@ -21,5 +21,14 @@ clickhouse test_table
+ + root + clickhouse + mysql57 + 3306 + clickhouse + test_table
+ 0 +
diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index c0ba0d8735e..713a8793f48 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -418,6 +418,10 @@ def test_predefined_connection_configuration(started_cluster): ''') assert (node1.query(f"SELECT count() FROM test_table").rstrip() == '100') + assert 'Connection pool cannot have zero size' in node1.query_and_get_error("SELECT count() FROM mysql(mysql1, table='test_table', connection_pool_size=0)") + assert 'Connection pool cannot have zero size' in node1.query_and_get_error("SELECT count() FROM mysql(mysql4)") + assert int(node1.query("SELECT count() FROM mysql(mysql4, connection_pool_size=1)")) == 100 + # Regression for (k, v) IN ((k, v)) def test_mysql_in(started_cluster): diff --git a/tests/integration/test_system_logs_recreate/test.py b/tests/integration/test_system_logs_recreate/test.py index 3ab0269b42e..c0afa8cd555 100644 --- a/tests/integration/test_system_logs_recreate/test.py +++ b/tests/integration/test_system_logs_recreate/test.py @@ -68,3 +68,26 @@ def test_system_logs_recreate(): # IOW that the table created only when the structure is indeed different. for table in system_logs: assert len(node.query(f"SHOW TABLES FROM system LIKE '{table}%'").strip().split('\n')) == 3 + + +def test_drop_system_log(): + node.exec_in_container(['bash', '-c', f"""echo " + + + 1000000 + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """]) + node.restart_clickhouse() + node.query("select 1") + node.query("system flush logs") + node.query("select 2") + node.query("system flush logs") + assert node.query("select count() > 0 from system.query_log") == "1\n" + node.query("drop table system.query_log sync") + node.query("select 3") + node.query("system flush logs") + assert node.query("select count() > 0 from system.query_log") == "1\n" + node.exec_in_container(['rm', f'/etc/clickhouse-server/config.d/yyy-override-query_log.xml']) + node.restart_clickhouse() diff --git a/tests/queries/0_stateless/01074_h3_range_check.sql b/tests/queries/0_stateless/01074_h3_range_check.sql index acf59b16d75..4c655f44a8b 100644 --- a/tests/queries/0_stateless/01074_h3_range_check.sql +++ b/tests/queries/0_stateless/01074_h3_range_check.sql @@ -2,3 +2,4 @@ SELECT h3EdgeLengthM(100); -- { serverError 69 } SELECT h3HexAreaM2(100); -- { serverError 69 } +SELECT h3HexAreaKm2(100); -- { serverError 69 } diff --git a/tests/queries/0_stateless/01282_system_parts_ttl_info.sql b/tests/queries/0_stateless/01282_system_parts_ttl_info.sql index dfa340636b3..ede5350ddd4 100644 --- a/tests/queries/0_stateless/01282_system_parts_ttl_info.sql +++ b/tests/queries/0_stateless/01282_system_parts_ttl_info.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS ttl; -CREATE TABLE ttl (d DateTime) ENGINE = MergeTree ORDER BY tuple() TTL d + INTERVAL 10 DAY; +CREATE TABLE ttl (d DateTime) ENGINE = MergeTree ORDER BY tuple() TTL d + INTERVAL 10 DAY SETTINGS remove_empty_parts=0; SYSTEM STOP MERGES ttl; INSERT INTO ttl VALUES ('2000-01-01 01:02:03'), ('2000-02-03 04:05:06'); SELECT rows, delete_ttl_info_min, delete_ttl_info_max, move_ttl_info.expression, move_ttl_info.min, move_ttl_info.max FROM system.parts WHERE database = currentDatabase() AND table = 'ttl'; diff --git a/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.reference b/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.reference index 887c701a5e4..4cc67aa517c 100644 --- a/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.reference +++ b/tests/queries/0_stateless/01604_explain_ast_of_nonselect_query.reference @@ -1,6 +1,6 @@ AlterQuery t1 (children 2) ExpressionList (children 1) - AlterCommand 33 (children 1) + AlterCommand DELETE (children 1) Function equals (children 1) ExpressionList (children 2) Identifier date diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index b52a8712087..234804f1078 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -21,7 +21,7 @@ CREATE TABLE system.events\n(\n `event` String,\n `value` UInt64,\n `de CREATE TABLE system.formats\n(\n `name` String,\n `is_input` UInt8,\n `is_output` UInt8\n)\nENGINE = SystemFormats()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.functions\n(\n `name` String,\n `is_aggregate` UInt8,\n `case_insensitive` UInt8,\n `alias_to` String,\n `create_query` String,\n `origin` Enum8(\'System\' = 0, \'SQLUserDefined\' = 1, \'ExecutableUserDefined\' = 2)\n)\nENGINE = SystemFunctions()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.grants\n(\n `user_name` Nullable(String),\n `role_name` Nullable(String),\n `access_type` Enum8(\'POSTGRES\' = -128, \'SQLITE\' = -127, \'ODBC\' = -126, \'JDBC\' = -125, \'HDFS\' = -124, \'S3\' = -123, \'SOURCES\' = -122, \'ALL\' = -121, \'NONE\' = -120, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToSymbol\' = 120, \'demangle\' = 121, \'INTROSPECTION\' = 122, \'FILE\' = 123, \'URL\' = 124, \'REMOTE\' = 125, \'MONGO\' = 126, \'MYSQL\' = 127),\n `database` Nullable(String),\n `table` Nullable(String),\n `column` Nullable(String),\n `is_partial_revoke` UInt8,\n `grant_option` UInt8\n)\nENGINE = SystemGrants()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.graphite_retentions\n(\n `config_name` String,\n `regexp` String,\n `function` String,\n `age` UInt64,\n `precision` UInt64,\n `priority` UInt16,\n `is_default` UInt8,\n `Tables.database` Array(String),\n `Tables.table` Array(String)\n)\nENGINE = SystemGraphite()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.graphite_retentions\n(\n `config_name` String,\n `rule_type` String,\n `regexp` String,\n `function` String,\n `age` UInt64,\n `precision` UInt64,\n `priority` UInt16,\n `is_default` UInt8,\n `Tables.database` Array(String),\n `Tables.table` Array(String)\n)\nENGINE = SystemGraphite()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.licenses\n(\n `library_name` String,\n `license_type` String,\n `license_path` String,\n `license_text` String\n)\nENGINE = SystemLicenses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.macros\n(\n `macro` String,\n `substitution` String\n)\nENGINE = SystemMacros()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.merge_tree_settings\n(\n `name` String,\n `value` String,\n `changed` UInt8,\n `description` String,\n `type` String\n)\nENGINE = SystemMergeTreeSettings()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' diff --git a/tests/queries/0_stateless/02155_nested_lc_defalut_bug.reference b/tests/queries/0_stateless/02155_nested_lc_defalut_bug.reference new file mode 100644 index 00000000000..fe99b0a6585 --- /dev/null +++ b/tests/queries/0_stateless/02155_nested_lc_defalut_bug.reference @@ -0,0 +1 @@ +1 ['a','b'] [3,4] ['',''] diff --git a/tests/queries/0_stateless/02155_nested_lc_defalut_bug.sql b/tests/queries/0_stateless/02155_nested_lc_defalut_bug.sql new file mode 100644 index 00000000000..45cb9f96b95 --- /dev/null +++ b/tests/queries/0_stateless/02155_nested_lc_defalut_bug.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS nested_test; +CREATE TABLE nested_test (x UInt32, `nest.col1` Array(String), `nest.col2` Array(Int8)) ENGINE = MergeTree ORDER BY x; + +ALTER TABLE nested_test ADD COLUMN `nest.col3` Array(LowCardinality(String)); +INSERT INTO nested_test (x, `nest.col1`, `nest.col2`) values (1, ['a', 'b'], [3, 4]); +SELECT * FROM nested_test; + +DROP TABLE IF EXISTS nested_test; diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference index 6df60403ae0..2dc83f1eaa5 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference @@ -1,13 +1,13 @@ SELECT count() FROM t_02156_merge1 -PREWHERE k = 1 -WHERE (k = 1) AND notEmpty(v) +PREWHERE k = 3 +WHERE (k = 3) AND notEmpty(v) 2 SELECT count() FROM t_02156_merge2 -WHERE (k = 1) AND notEmpty(v) +WHERE (k = 3) AND notEmpty(v) 2 SELECT count() FROM t_02156_merge3 -WHERE (k = 1) AND notEmpty(v) +WHERE (k = 3) AND notEmpty(v) 2 diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql index e033005e014..69fa9ac5ee2 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql @@ -20,14 +20,14 @@ INSERT INTO t_02156_mt1 SELECT number, toString(number) FROM numbers(10000); INSERT INTO t_02156_mt2 SELECT number, toString(number) FROM numbers(10000); INSERT INTO t_02156_log SELECT number, toString(number) FROM numbers(10000); -EXPLAIN SYNTAX SELECT count() FROM t_02156_merge1 WHERE k = 1 AND notEmpty(v); -SELECT count() FROM t_02156_merge1 WHERE k = 1 AND notEmpty(v); +EXPLAIN SYNTAX SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v); +SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v); -EXPLAIN SYNTAX SELECT count() FROM t_02156_merge2 WHERE k = 1 AND notEmpty(v); -SELECT count() FROM t_02156_merge2 WHERE k = 1 AND notEmpty(v); +EXPLAIN SYNTAX SELECT count() FROM t_02156_merge2 WHERE k = 3 AND notEmpty(v); +SELECT count() FROM t_02156_merge2 WHERE k = 3 AND notEmpty(v); -EXPLAIN SYNTAX SELECT count() FROM t_02156_merge3 WHERE k = 1 AND notEmpty(v); -SELECT count() FROM t_02156_merge3 WHERE k = 1 AND notEmpty(v); +EXPLAIN SYNTAX SELECT count() FROM t_02156_merge3 WHERE k = 3 AND notEmpty(v); +SELECT count() FROM t_02156_merge3 WHERE k = 3 AND notEmpty(v); DROP TABLE IF EXISTS t_02156_mt1; DROP TABLE IF EXISTS t_02156_mt2; diff --git a/tests/queries/0_stateless/02158_explain_ast_alter_commands.reference b/tests/queries/0_stateless/02158_explain_ast_alter_commands.reference new file mode 100644 index 00000000000..030d5a8f5af --- /dev/null +++ b/tests/queries/0_stateless/02158_explain_ast_alter_commands.reference @@ -0,0 +1,41 @@ + AlterCommand ADD_COLUMN (children 1) + AlterCommand DROP_COLUMN (children 1) + AlterCommand MODIFY_COLUMN (children 1) + AlterCommand COMMENT_COLUMN (children 2) + AlterCommand RENAME_COLUMN (children 2) + AlterCommand MATERIALIZE_COLUMN (children 1) + AlterCommand MODIFY_ORDER_BY (children 1) + AlterCommand MODIFY_SAMPLE_BY (children 1) + AlterCommand MODIFY_TTL (children 1) + AlterCommand MATERIALIZE_TTL (children 1) + AlterCommand MODIFY_SETTING (children 1) + AlterCommand RESET_SETTING + AlterCommand MODIFY_QUERY (children 1) + AlterCommand REMOVE_TTL + AlterCommand REMOVE_SAMPLE_BY + AlterCommand ADD_INDEX (children 1) + AlterCommand DROP_INDEX (children 1) + AlterCommand MATERIALIZE_INDEX (children 1) + AlterCommand ADD_CONSTRAINT (children 1) + AlterCommand DROP_CONSTRAINT (children 1) + AlterCommand ADD_PROJECTION (children 1) + AlterCommand DROP_PROJECTION (children 1) + AlterCommand MATERIALIZE_PROJECTION (children 1) + AlterCommand DROP_PARTITION (children 1) + AlterCommand DROP_PARTITION (children 1) + AlterCommand ATTACH_PARTITION (children 1) + AlterCommand ATTACH_PARTITION (children 1) + AlterCommand REPLACE_PARTITION (children 1) + AlterCommand REPLACE_PARTITION (children 1) + AlterCommand MOVE_PARTITION (children 1) + AlterCommand DROP_COLUMN (children 2) + AlterCommand FREEZE_ALL + AlterCommand FREEZE_PARTITION (children 1) + AlterCommand UNFREEZE_ALL + AlterCommand UNFREEZE_PARTITION (children 1) + AlterCommand FETCH_PARTITION (children 1) + AlterCommand FETCH_PARTITION (children 1) + AlterCommand UPDATE (children 2) + AlterCommand UPDATE (children 3) + AlterCommand DELETE (children 1) + AlterCommand DELETE (children 2) diff --git a/tests/queries/0_stateless/02158_explain_ast_alter_commands.sh b/tests/queries/0_stateless/02158_explain_ast_alter_commands.sh new file mode 100755 index 00000000000..8dfb61eedfb --- /dev/null +++ b/tests/queries/0_stateless/02158_explain_ast_alter_commands.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo "EXPLAIN AST ALTER TABLE t ADD COLUMN c Int8; +EXPLAIN AST ALTER TABLE t DROP COLUMN c; +EXPLAIN AST ALTER TABLE t MODIFY COLUMN c Int8; +EXPLAIN AST ALTER TABLE t COMMENT COLUMN c 'comment'; +EXPLAIN AST ALTER TABLE t RENAME COLUMN c TO d; +EXPLAIN AST ALTER TABLE t MATERIALIZE COLUMN c; +EXPLAIN AST ALTER TABLE t MODIFY ORDER BY x; +EXPLAIN AST ALTER TABLE t MODIFY SAMPLE BY y; +EXPLAIN AST ALTER TABLE t MODIFY TTL z + INTERVAL 7 DAY; +EXPLAIN AST ALTER TABLE t MATERIALIZE TTL IN PARTITION 'p'; +EXPLAIN AST ALTER TABLE t MODIFY SETTING index_granularity = 4096; +EXPLAIN AST ALTER TABLE t RESET SETTING index_granularity; +EXPLAIN AST ALTER TABLE t MODIFY QUERY SELECT 42; +EXPLAIN AST ALTER TABLE t REMOVE TTL; +EXPLAIN AST ALTER TABLE t REMOVE SAMPLE BY; +EXPLAIN AST ALTER TABLE t ADD INDEX i c TYPE minmax GRANULARITY 1; +EXPLAIN AST ALTER TABLE t DROP INDEX i; +EXPLAIN AST ALTER TABLE t MATERIALIZE INDEX i; +EXPLAIN AST ALTER TABLE t ADD CONSTRAINT x CHECK 1; +EXPLAIN AST ALTER TABLE t DROP CONSTRAINT x; +EXPLAIN AST ALTER TABLE t ADD PROJECTION p (SELECT c); +EXPLAIN AST ALTER TABLE t DROP PROJECTION p; +EXPLAIN AST ALTER TABLE t MATERIALIZE PROJECTION p; +EXPLAIN AST ALTER TABLE t DETACH PARTITION 'p'; +EXPLAIN AST ALTER TABLE t DROP PARTITION 'p'; +EXPLAIN AST ALTER TABLE t ATTACH PARTITION 'p'; +EXPLAIN AST ALTER TABLE t ATTACH PART 'p'; +EXPLAIN AST ALTER TABLE t ATTACH PARTITION 'p' FROM t2; +EXPLAIN AST ALTER TABLE t REPLACE PARTITION 'p' FROM t2; +EXPLAIN AST ALTER TABLE t MOVE PARTITION 'p' TO TABLE t2; +EXPLAIN AST ALTER TABLE t CLEAR COLUMN c IN PARTITION 'p'; +EXPLAIN AST ALTER TABLE t FREEZE; +EXPLAIN AST ALTER TABLE t FREEZE PARTITION 'p'; +EXPLAIN AST ALTER TABLE t UNFREEZE WITH NAME 'n'; +EXPLAIN AST ALTER TABLE t UNFREEZE PARTITION 'p' WITH NAME 'n'; +EXPLAIN AST ALTER TABLE t FETCH PARTITION 'p' FROM '/path'; +EXPLAIN AST ALTER TABLE t FETCH PART 'p' FROM '/path'; +EXPLAIN AST ALTER TABLE t UPDATE c = 1 WHERE 1; +EXPLAIN AST ALTER TABLE t UPDATE c = 1 IN PARTITION 'p' WHERE 1; +EXPLAIN AST ALTER TABLE t DELETE WHERE c = 1; +EXPLAIN AST ALTER TABLE t DELETE IN PARTITION 'p' WHERE c = 1;" | \ + $CLICKHOUSE_CLIENT --readonly 1 --multiquery 2>&1 | grep 'AlterCommand' diff --git a/tests/queries/0_stateless/02160_h3_cell_area_m2.reference b/tests/queries/0_stateless/02160_h3_cell_area_m2.reference new file mode 100644 index 00000000000..e8727e05cf9 --- /dev/null +++ b/tests/queries/0_stateless/02160_h3_cell_area_m2.reference @@ -0,0 +1,16 @@ +4106166334463.9233 +666617118882.2277 +85294486110.07852 +12781831077.715292 +1730585103.2965515 +302748289.6422262 +30296673.089799587 +4984621.68910725 +644257.1047199412 +113498.17901913072 +16692.536464980716 +2335.8824226249617 +324.4496823479308 +48.63220901355471 +7.442732649761864 +0.5977527784258132 diff --git a/tests/queries/0_stateless/02160_h3_cell_area_m2.sql b/tests/queries/0_stateless/02160_h3_cell_area_m2.sql new file mode 100644 index 00000000000..55c6ef45542 --- /dev/null +++ b/tests/queries/0_stateless/02160_h3_cell_area_m2.sql @@ -0,0 +1,30 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS h3_indexes; + +CREATE TABLE h3_indexes (h3_index UInt64) ENGINE = Memory; + +-- Random geo coordinates were generated using the H3 tool: https://github.com/ClickHouse-Extras/h3/blob/master/src/apps/testapps/mkRandGeo.c at various resolutions from 0 to 15. +-- Corresponding H3 index values were in turn generated with those geo coordinates using `geoToH3(lon, lat, res)` ClickHouse function for the following test. + +INSERT INTO h3_indexes VALUES (579205133326352383); +INSERT INTO h3_indexes VALUES (581263419093549055); +INSERT INTO h3_indexes VALUES (589753847883235327); +INSERT INTO h3_indexes VALUES (594082350283882495); +INSERT INTO h3_indexes VALUES (598372386957426687); +INSERT INTO h3_indexes VALUES (599542359671177215); +INSERT INTO h3_indexes VALUES (604296355086598143); +INSERT INTO h3_indexes VALUES (608785214872748031); +INSERT INTO h3_indexes VALUES (615732192485572607); +INSERT INTO h3_indexes VALUES (617056794467368959); +INSERT INTO h3_indexes VALUES (624586477873168383); +INSERT INTO h3_indexes VALUES (627882919484481535); +INSERT INTO h3_indexes VALUES (634600058503392255); +INSERT INTO h3_indexes VALUES (635544851677385791); +INSERT INTO h3_indexes VALUES (639763125756281263); +INSERT INTO h3_indexes VALUES (644178757620501158); + + +SELECT h3CellAreaM2(h3_index) FROM h3_indexes ORDER BY h3_index; + +DROP TABLE h3_indexes; diff --git a/tests/queries/0_stateless/02160_h3_cell_area_rads2.reference b/tests/queries/0_stateless/02160_h3_cell_area_rads2.reference new file mode 100644 index 00000000000..d74c3f77f97 --- /dev/null +++ b/tests/queries/0_stateless/02160_h3_cell_area_rads2.reference @@ -0,0 +1,16 @@ +0.10116268528089567 +0.01642329421346843 +0.002101380838405832 +0.00031490306268786255 +0.000042636031250655976 +0.000007458740696242262 +7.464122383736096e-7 +1.2280498988731694e-7 +1.587241563444197e-8 +2.7962288004989136e-9 +4.112502211061015e-10 +5.754860352096175e-11 +7.99339296836726e-12 +1.1981406631437076e-12 +1.8336491007639705e-13 +1.4726699133479243e-14 diff --git a/tests/queries/0_stateless/02160_h3_cell_area_rads2.sql b/tests/queries/0_stateless/02160_h3_cell_area_rads2.sql new file mode 100644 index 00000000000..038a0cabd50 --- /dev/null +++ b/tests/queries/0_stateless/02160_h3_cell_area_rads2.sql @@ -0,0 +1,30 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS h3_indexes; + +CREATE TABLE h3_indexes (h3_index UInt64) ENGINE = Memory; + +-- Random geo coordinates were generated using the H3 tool: https://github.com/ClickHouse-Extras/h3/blob/master/src/apps/testapps/mkRandGeo.c at various resolutions from 0 to 15. +-- Corresponding H3 index values were in turn generated with those geo coordinates using `geoToH3(lon, lat, res)` ClickHouse function for the following test. + +INSERT INTO h3_indexes VALUES (579205133326352383); +INSERT INTO h3_indexes VALUES (581263419093549055); +INSERT INTO h3_indexes VALUES (589753847883235327); +INSERT INTO h3_indexes VALUES (594082350283882495); +INSERT INTO h3_indexes VALUES (598372386957426687); +INSERT INTO h3_indexes VALUES (599542359671177215); +INSERT INTO h3_indexes VALUES (604296355086598143); +INSERT INTO h3_indexes VALUES (608785214872748031); +INSERT INTO h3_indexes VALUES (615732192485572607); +INSERT INTO h3_indexes VALUES (617056794467368959); +INSERT INTO h3_indexes VALUES (624586477873168383); +INSERT INTO h3_indexes VALUES (627882919484481535); +INSERT INTO h3_indexes VALUES (634600058503392255); +INSERT INTO h3_indexes VALUES (635544851677385791); +INSERT INTO h3_indexes VALUES (639763125756281263); +INSERT INTO h3_indexes VALUES (644178757620501158); + + +SELECT h3CellAreaRads2(h3_index) FROM h3_indexes ORDER BY h3_index; + +DROP TABLE h3_indexes; diff --git a/tests/queries/0_stateless/02160_h3_hex_area_Km2.reference b/tests/queries/0_stateless/02160_h3_hex_area_Km2.reference new file mode 100644 index 00000000000..4d33b49f257 --- /dev/null +++ b/tests/queries/0_stateless/02160_h3_hex_area_Km2.reference @@ -0,0 +1,16 @@ +4250546.848 +607220.9782 +86745.85403 +12392.26486 +1770.323552 +252.9033645 +36.1290521 +5.1612932 +0.7373276 +0.1053325 +0.0150475 +0.0021496 +0.0003071 +0.0000439 +0.0000063 +9e-7 diff --git a/tests/queries/0_stateless/02160_h3_hex_area_Km2.sql b/tests/queries/0_stateless/02160_h3_hex_area_Km2.sql new file mode 100644 index 00000000000..e6c73fa9bda --- /dev/null +++ b/tests/queries/0_stateless/02160_h3_hex_area_Km2.sql @@ -0,0 +1,18 @@ +-- Tags: no-fasttest + +SELECT h3HexAreaKm2(0); +SELECT h3HexAreaKm2(1); +SELECT h3HexAreaKm2(2); +SELECT h3HexAreaKm2(3); +SELECT h3HexAreaKm2(4); +SELECT h3HexAreaKm2(5); +SELECT h3HexAreaKm2(6); +SELECT h3HexAreaKm2(7); +SELECT h3HexAreaKm2(8); +SELECT h3HexAreaKm2(9); +SELECT h3HexAreaKm2(10); +SELECT h3HexAreaKm2(11); +SELECT h3HexAreaKm2(12); +SELECT h3HexAreaKm2(13); +SELECT h3HexAreaKm2(14); +SELECT h3HexAreaKm2(15); diff --git a/tests/queries/0_stateless/02160_h3_rads_to_degs_degs_to_rads.reference b/tests/queries/0_stateless/02160_h3_rads_to_degs_degs_to_rads.reference new file mode 100644 index 00000000000..3c26be9d9b2 --- /dev/null +++ b/tests/queries/0_stateless/02160_h3_rads_to_degs_degs_to_rads.reference @@ -0,0 +1,9 @@ +-360 +-180.6 +-180 +-1 +0 +1 +180 +180.5 +360 diff --git a/tests/queries/0_stateless/02160_h3_rads_to_degs_degs_to_rads.sql b/tests/queries/0_stateless/02160_h3_rads_to_degs_degs_to_rads.sql new file mode 100644 index 00000000000..b30fc68725b --- /dev/null +++ b/tests/queries/0_stateless/02160_h3_rads_to_degs_degs_to_rads.sql @@ -0,0 +1,21 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS h3_indexes; + + +CREATE TABLE h3_indexes (degrees Float64) ENGINE = Memory; + + +INSERT INTO h3_indexes VALUES (-1); +INSERT INTO h3_indexes VALUES (-180); +INSERT INTO h3_indexes VALUES (-180.6); +INSERT INTO h3_indexes VALUES (-360); +INSERT INTO h3_indexes VALUES (0); +INSERT INTO h3_indexes VALUES (1); +INSERT INTO h3_indexes VALUES (180); +INSERT INTO h3_indexes VALUES (180.5); +INSERT INTO h3_indexes VALUES (360); + +select h3RadsToDegs(h3DegsToRads(degrees)) from h3_indexes order by degrees; + +DROP TABLE h3_indexes; diff --git a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect new file mode 100755 index 00000000000..4f006b926bd --- /dev/null +++ b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect @@ -0,0 +1,21 @@ +#!/usr/bin/expect -f + +log_user 0 +set timeout 20 +match_max 100000 + +expect_after { + eof { exp_continue } + timeout { exit 1 } +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion" + +expect ":) " +send -- "insert into table function null() format TSV some trash here 123 \n 456\r" +expect -re ".*DB::Exception: Table function 'null' requires 'structure'.*\r" +expect ":) " + +send -- "" +expect eof diff --git a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.reference b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02164_materialized_view_support_virtual_column.reference b/tests/queries/0_stateless/02164_materialized_view_support_virtual_column.reference new file mode 100644 index 00000000000..00750edc07d --- /dev/null +++ b/tests/queries/0_stateless/02164_materialized_view_support_virtual_column.reference @@ -0,0 +1 @@ +3 diff --git a/tests/queries/0_stateless/02164_materialized_view_support_virtual_column.sql b/tests/queries/0_stateless/02164_materialized_view_support_virtual_column.sql new file mode 100644 index 00000000000..ad48a7507da --- /dev/null +++ b/tests/queries/0_stateless/02164_materialized_view_support_virtual_column.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS test_tb; +CREATE TABLE test_tb (a UInt64, s String) ENGINE = MergeTree() ORDER BY a; + +DROP VIEW IF EXISTS test_view_tb; +CREATE MATERIALIZED VIEW test_view_tb ENGINE = MergeTree() ORDER BY a AS SELECT * FROM test_tb; + +INSERT INTO test_tb VALUES (1, '1'), (2, '2'), (3, '3'); + +SELECT count(_part) FROM test_view_tb; diff --git a/tests/queries/0_stateless/02165_auto_format_by_file_extension.reference b/tests/queries/0_stateless/02165_auto_format_by_file_extension.reference new file mode 100644 index 00000000000..ca3d2dd1d80 --- /dev/null +++ b/tests/queries/0_stateless/02165_auto_format_by_file_extension.reference @@ -0,0 +1,40 @@ +1 one +2 tow +1 one +2 tow +1 one +2 tow +1 one +2 tow +1 one +2 tow +1 one +2 tow +1 one +2 tow +{ + "meta": + [ + { + "name": "id", + "type": "UInt64" + }, + { + "name": "name", + "type": "String" + } + ], + + "data": + [ + { + "id": "1", + "name": "one" + }, + { + "id": "2", + "name": "tow" + } + ], + + "rows": 2, diff --git a/tests/queries/0_stateless/02165_auto_format_by_file_extension.sh b/tests/queries/0_stateless/02165_auto_format_by_file_extension.sh new file mode 100755 index 00000000000..d2e16d9ec0b --- /dev/null +++ b/tests/queries/0_stateless/02165_auto_format_by_file_extension.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +[ -e "${CLICKHOUSE_TMP}"/hello.csv ] && rm "${CLICKHOUSE_TMP}"/hello.csv +[ -e "${CLICKHOUSE_TMP}"/world.csv.gz ] && rm "${CLICKHOUSE_TMP}"/world.csv.gz +[ -e "${CLICKHOUSE_TMP}"/hello.world.csv ] && rm "${CLICKHOUSE_TMP}"/hello.world.csv +[ -e "${CLICKHOUSE_TMP}"/hello.world.csv.xz ] && rm "${CLICKHOUSE_TMP}"/hello.world.csv.xz +[ -e "${CLICKHOUSE_TMP}"/.htaccess.json ] && rm "${CLICKHOUSE_TMP}"/.htaccess.json +[ -e "${CLICKHOUSE_TMP}"/example.com. ] && rm "${CLICKHOUSE_TMP}"/example.com. +[ -e "${CLICKHOUSE_TMP}"/museum...protobuf ] && rm "${CLICKHOUSE_TMP}"/museum...protobuf + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS 02165_out_tb;" +${CLICKHOUSE_CLIENT} --query "CREATE TABLE 02165_out_tb (id UInt64, name String) Engine=Memory;" +${CLICKHOUSE_CLIENT} --query "INSERT INTO 02165_out_tb Values(1, 'one'), (2, 'tow');" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "CREATE TABLE 02165_in_tb (id UInt64, name String) Engine=Memory;" + + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/hello.csv';" +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/hello.csv' FORMAT CSV;" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/world.csv.gz';" +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/world.csv.gz' COMPRESSION 'gz' FORMAT CSV;" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/hello.world.csv';" +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/hello.world.csv' FORMAT CSV;" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/hello.world.csv.xz';" +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/hello.world.csv.xz' COMPRESSION 'xz' FORMAT CSV;" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/example.com.';" +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/example.com.' FORMAT TabSeparated;" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/museum...protobuf';" +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/museum...protobuf' FORMAT TabSeparated;" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/world.csv.gz';" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;" + + +${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/.htaccess.json';" +head -n 26 ${CLICKHOUSE_TMP}/.htaccess.json + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS 02165_out_tb;" +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS 02165_in_tb;" + +rm "${CLICKHOUSE_TMP}"/hello.csv +rm "${CLICKHOUSE_TMP}"/world.csv.gz +rm "${CLICKHOUSE_TMP}"/hello.world.csv +rm "${CLICKHOUSE_TMP}"/hello.world.csv.xz +rm "${CLICKHOUSE_TMP}"/.htaccess.json +rm "${CLICKHOUSE_TMP}"/example.com. +rm "${CLICKHOUSE_TMP}"/museum...protobuf diff --git a/tests/queries/0_stateless/02165_insert_from_infile.reference b/tests/queries/0_stateless/02165_insert_from_infile.reference new file mode 100644 index 00000000000..2a00a8faa31 --- /dev/null +++ b/tests/queries/0_stateless/02165_insert_from_infile.reference @@ -0,0 +1,5 @@ +INSERT INTO test FROM INFILE data.file SELECT x +FROM input(\'x UInt32\') +INSERT INTO test FROM INFILE data.file WITH number AS x +SELECT number +FROM input(\'number UInt32\') diff --git a/tests/queries/0_stateless/02165_insert_from_infile.sql b/tests/queries/0_stateless/02165_insert_from_infile.sql new file mode 100644 index 00000000000..8cc851fa4e5 --- /dev/null +++ b/tests/queries/0_stateless/02165_insert_from_infile.sql @@ -0,0 +1,4 @@ +EXPLAIN SYNTAX INSERT INTO test FROM INFILE 'data.file' SELECT x from input('x UInt32') FORMAT TSV; +EXPLAIN SYNTAX INSERT INTO test FROM INFILE 'data.file' WATCH view; -- { clientError SYNTAX_ERROR } +EXPLAIN SYNTAX INSERT INTO test FROM INFILE 'data.file' VALUES (1) -- { clientError SYNTAX_ERROR } +EXPLAIN SYNTAX INSERT INTO test FROM INFILE 'data.file' WITH number AS x SELECT number FROM input('number UInt32'); diff --git a/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference b/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference new file mode 100644 index 00000000000..46f448cfba7 --- /dev/null +++ b/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference @@ -0,0 +1 @@ +x LowCardinality(UInt64) diff --git a/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh b/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh new file mode 100755 index 00000000000..e560dc10d2c --- /dev/null +++ b/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "insert into table function file('arrow.dict', 'Arrow', 'x LowCardinality(UInt64)') select number from numbers(10) settings output_format_arrow_low_cardinality_as_dictionary=1" + +$CLICKHOUSE_CLIENT -q "desc file('arrow.dict', 'Arrow')" + diff --git a/tests/queries/0_stateless/02169_fix_view_offset_limit_setting.reference b/tests/queries/0_stateless/02169_fix_view_offset_limit_setting.reference new file mode 100644 index 00000000000..32c54e3eeea --- /dev/null +++ b/tests/queries/0_stateless/02169_fix_view_offset_limit_setting.reference @@ -0,0 +1,12 @@ +5 +6 +7 +8 +9 +10 +0 +1 +2 +3 +4 +5 diff --git a/tests/queries/0_stateless/02169_fix_view_offset_limit_setting.sql b/tests/queries/0_stateless/02169_fix_view_offset_limit_setting.sql new file mode 100644 index 00000000000..8ac88ebc5c0 --- /dev/null +++ b/tests/queries/0_stateless/02169_fix_view_offset_limit_setting.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS counter; +CREATE TABLE counter (id UInt64, createdAt DateTime) ENGINE = MergeTree() ORDER BY id; +INSERT INTO counter SELECT number, now() FROM numbers(500); + +DROP TABLE IF EXISTS vcounter; +CREATE VIEW vcounter AS SELECT intDiv(id, 10) AS tens, max(createdAt) AS maxid FROM counter GROUP BY tens; + +SELECT tens FROM vcounter ORDER BY tens ASC LIMIT 100 SETTINGS limit = 6, offset = 5; + +SELECT tens FROM vcounter ORDER BY tens ASC LIMIT 100 SETTINGS limit = 6, offset = 0; +DROP TABLE vcounter; +DROP TABLE counter; diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 8309b6bcb53..a930e7db3fc 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -32,6 +32,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (wal-dump) add_subdirectory (check-mysql-binlog) add_subdirectory (keeper-bench) + add_subdirectory (graphite-rollup) if (USE_NURAFT) add_subdirectory (keeper-data-dumper) diff --git a/utils/graphite-rollup/CMakeLists.txt b/utils/graphite-rollup/CMakeLists.txt new file mode 100644 index 00000000000..3cc0d3e756f --- /dev/null +++ b/utils/graphite-rollup/CMakeLists.txt @@ -0,0 +1,23 @@ +add_executable(graphite-rollup-bench graphite-rollup-bench.cpp) +target_link_libraries( + graphite-rollup-bench + PRIVATE + clickhouse_storages_system + clickhouse_aggregate_functions + clickhouse_common_config + dbms +) +target_include_directories( + graphite-rollup-bench + SYSTEM PRIVATE + ${ClickHouse_SOURCE_DIR}/src ${CMAKE_BINARY_DIR}/src + ${ClickHouse_SOURCE_DIR}/base ${ClickHouse_SOURCE_DIR}/base/pcg-random + ${CMAKE_BINARY_DIR}/src/Core/include + ${POCO_INCLUDE_DIR} + ${ClickHouse_SOURCE_DIR}/contrib/double-conversion ${ClickHouse_SOURCE_DIR}/contrib/dragonbox/include + ${ClickHouse_SOURCE_DIR}/contrib/fmtlib/include + ${ClickHouse_SOURCE_DIR}/contrib/cityhash102/include + ${RE2_INCLUDE_DIR} ${CMAKE_BINARY_DIR}/contrib/re2_st +) + +target_compile_definitions(graphite-rollup-bench PRIVATE RULES_DIR="${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/utils/graphite-rollup/graphite-rollup-bench.cpp b/utils/graphite-rollup/graphite-rollup-bench.cpp new file mode 100644 index 00000000000..dabe0353b0f --- /dev/null +++ b/utils/graphite-rollup/graphite-rollup-bench.cpp @@ -0,0 +1,147 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +using namespace DB; + +static SharedContextHolder shared_context = Context::createShared(); + +std::vector loadMetrics(const std::string & metrics_file) +{ + std::vector metrics; + + FILE * stream; + char * line = nullptr; + size_t len = 0; + ssize_t nread; + + stream = fopen(metrics_file.c_str(), "r"); + if (stream == nullptr) + { + throw std::runtime_error(strerror(errno)); + } + + while ((nread = getline(&line, &len, stream)) != -1) + { + size_t l = strlen(line); + if (l > 0) + { + if (line[l - 1] == '\n') + { + line[l - 1] = '\0'; + l--; + } + if (l > 0) + { + metrics.push_back(StringRef(strdup(line), l)); + } + } + } + free(line); + if (ferror(stream)) + { + fclose(stream); + throw std::runtime_error(strerror(errno)); + } + + fclose(stream); + + return metrics; +} + +ConfigProcessor::LoadedConfig loadConfiguration(const std::string & config_path) +{ + ConfigProcessor config_processor(config_path, true, true); + ConfigProcessor::LoadedConfig config = config_processor.loadConfig(false); + return config; +} + +void bench(const std::string & config_path, const std::string & metrics_file, size_t n, bool verbose) +{ + auto config = loadConfiguration(config_path); + + auto context = Context::createGlobal(shared_context.get()); + context->setConfig(config.configuration.get()); + + Graphite::Params params; + setGraphitePatternsFromConfig(context, "graphite_rollup", params); + + std::vector metrics = loadMetrics(metrics_file); + + std::vector durations(metrics.size()); + size_t j, i; + for (j = 0; j < n; j++) + { + for (i = 0; i < metrics.size(); i++) + { + auto start = std::chrono::high_resolution_clock::now(); + + auto rule = DB::Graphite::selectPatternForPath(params, metrics[i]); + (void)rule; + + auto end = std::chrono::high_resolution_clock::now(); + double duration = (duration_cast>(end - start)).count() * 1E9; + durations[i] += duration; + + if (j == 0 && verbose) + { + std::cout << metrics[i].data << ": rule with regexp '" << rule.second->regexp_str << "' found\n"; + } + } + } + + for (i = 0; i < metrics.size(); i++) + { + std::cout << metrics[i].data << " " << durations[i] / n << " ns\n"; + free(const_cast(static_cast(metrics[i].data))); + } +} + +int main(int argc, char ** argv) +{ + registerAggregateFunctions(); + + std::string config_file, metrics_file; + + using namespace std::literals; + + std::string config_default = RULES_DIR + "/rollup.xml"s; + std::string metrics_default = RULES_DIR + "/metrics.txt"s; + + namespace po = boost::program_options; + po::variables_map vm; + + po::options_description desc; + desc.add_options()("help,h", "produce help")( + "config,c", po::value()->default_value(config_default), "XML config with rollup rules")( + "metrics,m", po::value()->default_value(metrics_default), "metrcis files (one metric per line) for run benchmark")( + "verbose,V", po::bool_switch()->default_value(false), "verbose output (print found rule)"); + + po::parsed_options parsed = po::command_line_parser(argc, argv).options(desc).run(); + po::store(parsed, vm); + po::notify(vm); + + if (vm.count("help")) + { + std::cout << desc << '\n'; + exit(1); + } + + bench(vm["config"].as(), vm["metrics"].as(), 10000, vm["verbose"].as()); + + return 0; +} diff --git a/utils/graphite-rollup/metrics.txt b/utils/graphite-rollup/metrics.txt new file mode 100644 index 00000000000..199c3791310 --- /dev/null +++ b/utils/graphite-rollup/metrics.txt @@ -0,0 +1,11 @@ +test.sum +sum?env=test&tag=Fake3 +test.max +max?env=test&tag=Fake4 +test.min +min?env=test&tag=Fake5 +fake5?env=test&tag=Fake5 +test.p95 +p95?env=test&tag=FakeNo +default +default?env=test&tag=FakeNo diff --git a/utils/graphite-rollup/rollup-tag-list.xml b/utils/graphite-rollup/rollup-tag-list.xml new file mode 100644 index 00000000000..ef28f2089ad --- /dev/null +++ b/utils/graphite-rollup/rollup-tag-list.xml @@ -0,0 +1,167 @@ + + + + plain + \.sum$ + sum + + 0 + 60 + + + 86400 + 3600 + + + + tagged + ^((.*)|.)sum\? + sum + + 0 + 60 + + + 86400 + 3600 + + + + plain + \.max$ + max + + 0 + 60 + + + 86400 + 3600 + + + + tagged + ^((.*)|.)max\? + max + + 0 + 60 + + + 86400 + 3600 + + + + plain + \.min$ + min + + 0 + 60 + + + 86400 + 3600 + + + + tagged + ^((.*)|.)min\? + min + + 0 + 60 + + + 86400 + 3600 + + + + plain + \.fake1\..*\.Fake1\. + sum + + + tag_list + fake1;tag=Fake1 + sum + + + plain + \.fake2\..*\.Fake2\. + sum + + + tag_list + fake2;tag=Fake2 + sum + + + plain + \.fake3\..*\.Fake3\. + sum + + + tag_list + fake3;tag=Fake3 + sum + + + plain + \.fake4\..*\.Fake4\. + sum + + + tag_list + fake4;tag=Fake4 + sum + + + plain + \.fake5\..*\.Fake5\. + sum + + + tag_list + fake5;tag=Fake5 + sum + + + plain + \.fake6\..*\.Fake6\. + sum + + + tag_list + fake6;tag=Fake6 + sum + + + plain + \.fake7\..*\.Fake7\. + sum + + + tag_list + fake7;tag=Fake7 + sum + + + avg + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + diff --git a/utils/graphite-rollup/rollup-typed.xml b/utils/graphite-rollup/rollup-typed.xml new file mode 100644 index 00000000000..0b27d43ece9 --- /dev/null +++ b/utils/graphite-rollup/rollup-typed.xml @@ -0,0 +1,167 @@ + + + + plain + \.sum$ + sum + + 0 + 60 + + + 86400 + 3600 + + + + tagged + ^((.*)|.)sum\? + sum + + 0 + 60 + + + 86400 + 3600 + + + + plain + \.max$ + max + + 0 + 60 + + + 86400 + 3600 + + + + tagged + ^((.*)|.)max\? + max + + 0 + 60 + + + 86400 + 3600 + + + + plain + \.min$ + min + + 0 + 60 + + + 86400 + 3600 + + + + tagged + ^((.*)|.)min\? + min + + 0 + 60 + + + 86400 + 3600 + + + + plain + \.fake1\..*\.Fake1\. + sum + + + tagged + + sum + + + plain + \.fake2\..*\.Fake2\. + sum + + + tagged + + sum + + + plain + \.fake3\..*\.Fake3\. + sum + + + tagged + + sum + + + plain + \.fake4\..*\.Fake4\. + sum + + + tagged + + sum + + + plain + \.fake5\..*\.Fake5\. + sum + + + tagged + + sum + + + plain + \.fake6\..*\.Fake6\. + sum + + + tagged + + sum + + + plain + \.fake7\..*\.Fake7\. + sum + + + tagged + + sum + + + avg + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + diff --git a/utils/graphite-rollup/rollup.xml b/utils/graphite-rollup/rollup.xml new file mode 100644 index 00000000000..641b0130509 --- /dev/null +++ b/utils/graphite-rollup/rollup.xml @@ -0,0 +1,147 @@ + + + + \.sum$ + sum + + 0 + 60 + + + 86400 + 3600 + + + + ^((.*)|.)sum\? + sum + + 0 + 60 + + + 86400 + 3600 + + + + \.max$ + max + + 0 + 60 + + + 86400 + 3600 + + + + ^((.*)|.)max\? + max + + 0 + 60 + + + 86400 + 3600 + + + + \.min$ + min + + 0 + 60 + + + 86400 + 3600 + + + + ^((.*)|.)min\? + min + + 0 + 60 + + + 86400 + 3600 + + + + \.fake1\..*\.Fake1\. + sum + + + + sum + + + \.fake2\..*\.Fake2\. + sum + + + + sum + + + \.fake3\..*\.Fake3\. + sum + + + + sum + + + \.fake4\..*\.Fake4\. + sum + + + + sum + + + \.fake5\..*\.Fake5\. + sum + + + + sum + + + \.fake6\..*\.Fake6\. + sum + + + + sum + + + \.fake7\..*\.Fake7\. + sum + + + + sum + + + avg + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + diff --git a/website/blog/en/2022/admixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md b/website/blog/en/2022/admixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md new file mode 100644 index 00000000000..3f38d31b2f7 --- /dev/null +++ b/website/blog/en/2022/admixer-aggregates-over-1-billion-unique-users-a-day-using-clickhouse.md @@ -0,0 +1,135 @@ +--- +title: 'Admixer Aggregates Over 1 Billion Unique Users a Day using ClickHouse' +image: 'https://blog-images.clickhouse.com/en/2022/admixer-case-study/featured.jpg' +date: '2022-01-11' +author: 'Vladimir Zakrevsky' +tags: ['company'] +--- + +## Highlights + +* Inserting around 100 billion records per day, over 1 million records per second +* Able to aggregate over 1 billion unique users a day +* Moved from MSSQL to Azure Table Storage to ClickHouse +* ClickHouse is deployed on 15 servers with 2 TB total RAM + +Admixer is an Ad-Tech company that provides all the components to build infrastructure for advertising products for brands, ad agencies, media houses, publishers, ad networks, and other buy- and sell-side industry players looking for effective ad management. A distinctive feature of Admixer is their technology, which allows: + +* Agencies to place advertising campaigns with specified execution conditions (terms, budget, creative display settings) +* Set the rules for distributing advertising campaign budgets among thousands of publishers +* Provide accounts for publishers, where they could not only see income statistics or withdraw money but also create their advertising campaigns, as well as connect other sources of monetization in addition to Network advertising campaigns. + +Admixers products include: + +* SSP - Supply-side platform where publishers/websites offer advertising space +* DSP - Demand-side platform where advertisers buy advertising space +* ADX - Ad exchange (connects SSPs and DSPs - buyers and sellers of advertisements and advertising space) +* DMP - Data management platform (used by advertisers to configure the audience they want to target) + + Admixer provides not only access to these products but allows customers to build an entire ecosystem. + +## Why We Chose ClickHouse + +To implement the previous point, Admixer began developing an Advertising Exchange. Initially, AdExchange was based on the sale of local inventory by external DSPs. Then it began to aggregate the traffic of external SSPs to place local advertisements on it and later redirect this traffic to external DSPs. Thus, ADX was created. + +In 2015-2016, the share of external inventory was 3% (100 million requests), then at the end of 2016, it was more than 90% (3 billion requests). With a sharp increase in requests, the load on their processing increased, and most importantly, the load on the storage and provision of online analytics increased. Relational databases could not handle that many inserts for statistics records. Before migrating to Azure, we used a MSSQL server which stored the object structure and statistics. + +In 2011, when migrating to Azure, we used Azure Table Storage to store and issue statistics. But with an increase in the number of transactions and the amount of data, it was not optimal to use this solution since Azure Table Storage charges for the number of transactions and the amount of data. + +Thus we needed to: + +* Display statistics on advertising transactions in the user interface in real-time; +* Accept a significant amount (1 million records per second) of data for insertion; +* Aggregate the received data for different sections (40 operations and the same number of metrics); +* Be able to scale the data warehouse as the number of requests grew; +* Have full control over our costs. + +![Profile Report](https://blog-images.clickhouse.com/en/2022/admixer-case-study/profile-report.png) + +This image shows the Profile Report. Any Ad Campaign in Admixer is split by Line Items (Profiles). It is possible to overview detailed reports by each Profile including Date-Time Statistics, Geo, Domans, SSPs. This report is also updated in real time. + +## The Advantages of Using ClickHouse + +ClickHouse helps to cope with the challenges above and provides the following benefits: + +* Not tied to the platform (we decided to migrate from the cloud); +* The cluster we built allows us to receive up to a million inserts per second (and we know how to scale up on demand); +* Has built-in mechanisms for aggregating and distributing data across tables (materialized views); +* Excellent data compression; +* Reading speed makes it possible to display statistics directly in the user interface in real-time; +* Has a SQL dialect that provides the ability to build any reports; +* Has several advanced functions (and allows you to write your own) for processing statistics; +* Built-in HyperLogLog for storing rough data; +* Data sampling; +* Open source / community / good documentation; +* Constant additions of new features, bug fixes, and improvements to the current functionality; +* Convenient operations. + +## ClickHouse Architecture + +Our architecture changed from 2016 to 2020. There are two diagrams below: the state we started and the state we came to. + +![Architecture 2016](https://blog-images.clickhouse.com/en/2022/admixer-case-study/architecture-2016.png) + +_Architecture 2016_ + +![Architecture 2020](https://blog-images.clickhouse.com/en/2022/admixer-case-study/architecture-2020.png) + +_Architecture 2020_ + +Requests Handler is a component that accepts a request for an advertisement and determines which banner to display. After the banner is selected, it records this in the statistics. Since 2020, these components have been receiving over 1 million requests per second. Statistics were recorded through an intermediate element named Global Events Queue. Events were retrieved from GlobalEventsQueue, read by the EventsProcessor components, and additionally validated/enriched, then written to the ClickHouse cluster. + +Initially, we wrote from EventsProcessor in ClickHouse into several tables in parallel but then switched through Buffer-> Null-table-> MatViews. We will next investigate if the new [asynchronous insert feature](https://clickhouse.com/blog/en/2021/clickhouse-v21.11-released/#async-inserts) in version 21.11 would be an alternative to using a buffer table. + +We also reviewed the implementation of the event queue. Initially, we used Redis (but Redis is InMemory storage), thus: + +* On server restart, there was a risk of losing events; +* The amount of RAM is relatively small, and if we planned to stop the Events Processor or ClickHouse, there was a risk of overflowing the event queue, so a very high response rate to event processor problems was required. + +We tried to replace Redis and use Kafka instead, but the Kafka driver for ClickHouse at the time had issues with arrays (which has since been fixed). + +Therefore, we implemented our event queue, which was stored on the disk of each EventHandler component, and the local EventsProcessor was located on the same server. The number of EventsProcessor components has increased, which means that the number of insert requests in ClickHouse has also increased, but this was not a problem. + +Since financial optimization was also an essential factor for us, this scheme proved to be excellent in this regard as well. To receive processing and storage of data from ADX, we assembled a cluster with 15 servers (40 threads, 128 RAM, SSD storage), and we also took this with a margin. For the storage cluster for unique users, we used a cluster with 6 of the same servers. + +An important point was also the work with receiving data from clusters. If you recklessly send a request to the cluster, this could create a pretty significant load on it, leading to the slowdown of other processes. But ClickHouse has settings for limiting resources and allocating quotas for specific users, which allowed us to solve this case quickly. All configuration files can be perfectly placed in the configuration management system and managed from there. + +## ClickHouse Handles Over 1 Billion Unique Users Per Day + +In addition to statistics aggregation, which summed up metrics by dimension, Admixer provides information on how many unique users have watched ads for an arbitrary time. The number of uniques cannot be summed up. In our system, the user ID is the UUID. When we want to get several unique UUIDs for some arbitrary period, we need to recalculate the unique UUIDs for this period each time. We cannot decompose all possible combinations in advance since the intersection will be too large. + +Before using ClickHouse, we could count uniques only for predefined periods: day, week, month, all the time. Also, the number of slices was limited. Also, constant bulk requests for Aerospike slowed down the event processor. + +AggregatingMergeTree allowed us with minimal costs to count unique users by a large number of keys in one report. In the beginning, with a cluster from three servers, we could easily count 1 billion uniques per day in ~ 12 slices. There are nuances; large slices cannot be output to the interface since simultaneous scanning of large tables will take a lot of CPU time. The solution to this problem was the report generation service, which has its internal queue and sends the already generated CSV files to the interface. On the other hand, we can output small slices to the interface with a limited date range. + +ClickHouse was perfect as Big Data Storage for our ML models. + +## Advice To Others Who Might Be Considering ClickHouse + +The Devil is in the details! + +ClickHouse technical tips: + +* If you do not need high data accuracy, use HyperLogLog and sampling; +* Run load tests to determine the number of operations that your cluster can withstand given your data structure before assembling the cluster; +* Buffer is a great way to insert data, but watch out for memory; +* Use Native format for insert; +* Avoid large numbers of small parts for continuous flow insertion. Too many tables generate a lot of merges in the background such as the Too many parts (300) error; +* It is necessary to decide on the replication scheme at the beginning. One option is to use ZooKeeper and let tables replicate themselves using ReplicatedMergeTree and other replicating table engines. Because we had many tables and we wanted to choose what parts of the data to replicate to which servers we chose to not use ZooKeeper and have our client spread the writes - each write goes to two servers. + +Over the past five years, the Admixer's Core team has been working with a high-load and aggregation of big data. Any work has its subtleties, do not step on your rake. Use ours. + +We offer customers specialized audit, consultation, or create ready-made solutions using ClickHouse to solve high-load tasks. These speciality services are now offered via our new initiative [LoadFighters](https://loadfighters.com). + +### About Admixer + +Admixer is an independent adtech company that develops an ecosystem of full-stack programmatic solutions. Admixer has its own line of adtech products for brands, ad agencies, media houses, publishers, ad networks, and other buy- and sell-side industry players looking for effective ad management. Our customizable technology, in-depth expertise, and a personal approach help businesses turn programmatic advertising into a scalable revenue channel. + +Since their start in 2008, we’ve been on a mission to build an ecosystem with effective and transparent relationships between all of the players in the digital advertising industry. + +Today, the company has over 100 supply and demand partners, 3,000+ customers, and 200+ employees worldwide. They run offices in Ukraine, Belarus, Kazakhstan, Moldova, Georgia, and legal entities in the UK and Germany. + +For more information please visit: +[https://admixer.com/](https://admixer.com/) + + diff --git a/website/images/photos/anne-carlhoff.jpg b/website/images/photos/anne-carlhoff.jpg new file mode 100644 index 00000000000..4bbc9265585 Binary files /dev/null and b/website/images/photos/anne-carlhoff.jpg differ diff --git a/website/images/photos/baird-garrett.jpg b/website/images/photos/baird-garrett.jpg new file mode 100644 index 00000000000..3400431e379 Binary files /dev/null and b/website/images/photos/baird-garrett.jpg differ diff --git a/website/images/photos/dale-mcdiarmid.jpg b/website/images/photos/dale-mcdiarmid.jpg new file mode 100644 index 00000000000..bf590696a87 Binary files /dev/null and b/website/images/photos/dale-mcdiarmid.jpg differ diff --git a/website/images/photos/geoffrey-genz.jpg b/website/images/photos/geoffrey-genz.jpg new file mode 100644 index 00000000000..6d86aca47f7 Binary files /dev/null and b/website/images/photos/geoffrey-genz.jpg differ diff --git a/website/images/photos/marcel-birkner.jpg b/website/images/photos/marcel-birkner.jpg new file mode 100644 index 00000000000..6ec821cfb66 Binary files /dev/null and b/website/images/photos/marcel-birkner.jpg differ diff --git a/website/images/photos/melvyn-peignon.jpg b/website/images/photos/melvyn-peignon.jpg new file mode 100644 index 00000000000..532c1759c65 Binary files /dev/null and b/website/images/photos/melvyn-peignon.jpg differ diff --git a/website/images/photos/michael-lex.jpg b/website/images/photos/michael-lex.jpg new file mode 100644 index 00000000000..0e6de27a14e Binary files /dev/null and b/website/images/photos/michael-lex.jpg differ diff --git a/website/images/photos/nihat-hosgur.jpg b/website/images/photos/nihat-hosgur.jpg new file mode 100644 index 00000000000..ad47b4aba50 Binary files /dev/null and b/website/images/photos/nihat-hosgur.jpg differ diff --git a/website/images/photos/nikolay-degterinsky.jpg b/website/images/photos/nikolay-degterinsky.jpg new file mode 100644 index 00000000000..620c2d83f51 Binary files /dev/null and b/website/images/photos/nikolay-degterinsky.jpg differ diff --git a/website/images/photos/nir-peled.jpg b/website/images/photos/nir-peled.jpg new file mode 100644 index 00000000000..a8952465164 Binary files /dev/null and b/website/images/photos/nir-peled.jpg differ diff --git a/website/images/photos/sergei-trifonov.jpg b/website/images/photos/sergei-trifonov.jpg new file mode 100644 index 00000000000..87ce88a3b1b Binary files /dev/null and b/website/images/photos/sergei-trifonov.jpg differ diff --git a/website/images/photos/tanya-bragin.jpg b/website/images/photos/tanya-bragin.jpg new file mode 100644 index 00000000000..0b5a6972b01 Binary files /dev/null and b/website/images/photos/tanya-bragin.jpg differ diff --git a/website/images/photos/tom-schreiber.jpg b/website/images/photos/tom-schreiber.jpg new file mode 100644 index 00000000000..ec227de6122 Binary files /dev/null and b/website/images/photos/tom-schreiber.jpg differ diff --git a/website/images/photos/yuko-takagi.jpg b/website/images/photos/yuko-takagi.jpg new file mode 100644 index 00000000000..eb44e414256 Binary files /dev/null and b/website/images/photos/yuko-takagi.jpg differ diff --git a/website/templates/company/team.html b/website/templates/company/team.html index b4ed1c26a29..e8cc07751dd 100644 --- a/website/templates/company/team.html +++ b/website/templates/company/team.html @@ -19,6 +19,20 @@ {{ _('Principal Sofware Engineer') }}

+ +
+ + + + +

+ {{ _('Marcel Birkner') }} +

+

+ {{ _(' + Cloud SWE') }} +

+
@@ -32,6 +46,33 @@ {{ _('VP, Product') }}

+
+
+ + + + +

+ {{ _('Tanya Bragin') }} +

+

+ {{ _('VP, Product') }} +

+ +
+
+ + + + +

+ {{ _('Anne Carlhoff') }} +

+

+ {{ _(' + Sr Recruiter') }} +

+
@@ -58,6 +99,19 @@ {{ _('Software Engineer') }}

+
+
+ + + + +

+ {{ _('Nikolay Degterinsky') }} +

+

+ {{ _('Core SWE') }} +

+
@@ -71,6 +125,32 @@ {{ _('Senior Director, Business Technology') }}

+
+
+ + + + +

+ {{ _('Baird Garrett') }} +

+

+ {{ _('General Counsel') }} +

+ +
+
+ + + + +

+ {{ _('Geoffrey Genz') }} +

+

+ {{ _('Principal Support Engineer') }} +

+
@@ -97,6 +177,19 @@ {{ _('VP, Sales') }}

+
+
+ + + + +

+ {{ _('Nihat Hosgur') }} +

+

+ {{ _('Principal Cloud SWE') }} +

+
@@ -162,6 +255,19 @@ {{ _('Software Engineer') }}

+
+
+ + + + +

+ {{ _('Michael Lex') }} +

+

+ {{ _('Cloud SWE') }} +

+
@@ -201,6 +307,19 @@ {{ _('Executive Assistant') }}

+
+
+ + + + +

+ {{ _('Dale McDiarmid') }} +

+

+ {{ _('Consulting Architect') }} +

+
@@ -240,6 +359,32 @@ {{ _('VP, Support & Services') }}

+
+
+ + + + +

+ {{ _('Melvyn Peignon') }} +

+

+ {{ _('Manager, Support Services – EMEA') }} +

+ +
+
+ + + + +

+ {{ _('Nir Peled') }} +

+

+ {{ _('Principal UX/UI Engineer') }} +

+
@@ -279,6 +424,19 @@ {{ _('Engineering Team Lead') }}

+
+
+ + + + +

+ {{ _('Tom Schreiber') }} +

+

+ {{ _('Consulting Architect – EMEA') }} +

+
@@ -318,6 +476,19 @@ {{ _('VP, Operations') }}

+
+
+ + + + +

+ {{ _('Yuko Takagi') }} +

+

+ {{ _('Director, Go To Market Technology') }} +

+
@@ -344,6 +515,19 @@ {{ _('Software Engineer') }}

+
+
+ +
+ +
+

+ {{ _('Sergei Trifonov') }} +

+

+ {{ _('Principal Core SWE') }} +

+