diff --git a/.gitattributes b/.gitattributes index efb059f169a..bcc7d57b904 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,2 @@ contrib/* linguist-vendored *.h linguist-language=C++ -# to avoid frequent conflicts -tests/queries/0_stateless/arcadia_skip_list.txt text merge=union diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 98a33927667..859756f07af 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -207,8 +207,6 @@ jobs: - BuilderDebRelease - BuilderDebAsan - BuilderDebTsan - - BuilderDebUBsan - - BuilderDebMsan - BuilderDebDebug runs-on: [self-hosted, style-checker] steps: @@ -333,7 +331,7 @@ jobs: ############################# INTEGRATION TESTS ############################################# ############################################################################################# IntegrationTestsRelease: - needs: [BuilderDebRelease, FunctionalStatelessTestRelease] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2adfbce3577..57a30d44fae 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -886,7 +886,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan: + FunctionalStatelessTestTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] steps: @@ -903,6 +903,70 @@ jobs: CHECK_NAME: 'Stateless tests (thread, actions)' REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -944,7 +1008,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan: + FunctionalStatelessTestMsan0: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] steps: @@ -961,6 +1025,8 @@ jobs: CHECK_NAME: 'Stateless tests (memory, actions)' REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -973,7 +1039,69 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug: + FunctionalStatelessTestMsan1: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestMsan2: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug0: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] steps: @@ -990,6 +1118,70 @@ jobs: CHECK_NAME: 'Stateless tests (debug, actions)' REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug1: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug2: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1497,8 +1689,8 @@ jobs: ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# - IntegrationTestsAsan: - needs: [BuilderDebAsan, FunctionalStatelessTestAsan] + IntegrationTestsAsan0: + needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports @@ -1513,6 +1705,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (asan, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1525,8 +1719,68 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsTsan: - needs: [BuilderDebTsan, FunctionalStatelessTestTsan] + IntegrationTestsAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan0: + needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports @@ -1541,6 +1795,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (thread, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 4 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1553,8 +1809,98 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsRelease: - needs: [BuilderDebRelease, FunctionalStatelessTestRelease] + IntegrationTestsTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan3: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 3 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease0: + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports @@ -1569,6 +1915,38 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (release, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 2 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease1: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_release + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (release, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 2 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1759,13 +2137,19 @@ jobs: - CheckLabels - BuilderReport - FastTest - - FunctionalStatelessTestDebug + - FunctionalStatelessTestDebug0 + - FunctionalStatelessTestDebug1 + - FunctionalStatelessTestDebug2 - FunctionalStatelessTestRelease - FunctionalStatelessTestReleaseDatabaseReplicated - FunctionalStatelessTestReleaseWideParts - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan - - FunctionalStatelessTestMsan + - FunctionalStatelessTestTsan0 + - FunctionalStatelessTestTsan1 + - FunctionalStatelessTestTsan2 + - FunctionalStatelessTestMsan0 + - FunctionalStatelessTestMsan1 + - FunctionalStatelessTestMsan2 - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease @@ -1783,9 +2167,15 @@ jobs: - ASTFuzzerTestTsan - ASTFuzzerTestMSan - ASTFuzzerTestUBSan - - IntegrationTestsAsan - - IntegrationTestsRelease - - IntegrationTestsTsan + - IntegrationTestsAsan0 + - IntegrationTestsAsan1 + - IntegrationTestsAsan2 + - IntegrationTestsRelease0 + - IntegrationTestsRelease1 + - IntegrationTestsTsan0 + - IntegrationTestsTsan1 + - IntegrationTestsTsan2 + - IntegrationTestsTsan3 - PVSCheck - UnitTestsAsan - UnitTestsTsan diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 26921b8ea48..cdf66d26310 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -799,7 +799,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan: + FunctionalStatelessTestTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] steps: @@ -816,6 +816,70 @@ jobs: CHECK_NAME: 'Stateless tests (thread, actions)' REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -857,7 +921,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan: + FunctionalStatelessTestMsan0: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] steps: @@ -874,6 +938,8 @@ jobs: CHECK_NAME: 'Stateless tests (memory, actions)' REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -886,7 +952,69 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug: + FunctionalStatelessTestMsan1: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestMsan2: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug0: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] steps: @@ -903,6 +1031,70 @@ jobs: CHECK_NAME: 'Stateless tests (debug, actions)' REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug1: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug2: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1267,8 +1459,8 @@ jobs: ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# - IntegrationTestsAsan: - needs: [BuilderDebAsan, FunctionalStatelessTestAsan] + IntegrationTestsAsan0: + needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports @@ -1283,6 +1475,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (asan, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1295,8 +1489,68 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsTsan: - needs: [BuilderDebTsan, FunctionalStatelessTestTsan] + IntegrationTestsAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan0: + needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports @@ -1311,6 +1565,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (thread, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 4 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1323,8 +1579,98 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsRelease: - needs: [BuilderDebRelease, FunctionalStatelessTestRelease] + IntegrationTestsTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan3: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 3 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease0: + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports @@ -1339,6 +1685,66 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (release, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 2 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease1: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_release + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (release, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 2 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsFlakyCheck: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan_flaky_check + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests flaky check (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan_flaky_check/ClickHouse run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1623,7 +2029,7 @@ jobs: env: TEMP_PATH: ${{runner.temp}}/unit_tests_ubsan REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Unit tests (msan, actions)' + CHECK_NAME: 'Unit tests (ubsan, actions)' REPO_COPY: ${{runner.temp}}/unit_tests_ubsan/ClickHouse run: | sudo rm -fr $TEMP_PATH @@ -1641,12 +2047,18 @@ jobs: needs: - DockerHubPush - BuilderReport - - FunctionalStatelessTestDebug + - FunctionalStatelessTestDebug0 + - FunctionalStatelessTestDebug1 + - FunctionalStatelessTestDebug2 - FunctionalStatelessTestRelease - FunctionalStatelessTestReleaseDatabaseOrdinary - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan - - FunctionalStatelessTestMsan + - FunctionalStatelessTestTsan0 + - FunctionalStatelessTestTsan1 + - FunctionalStatelessTestTsan2 + - FunctionalStatelessTestMsan0 + - FunctionalStatelessTestMsan1 + - FunctionalStatelessTestMsan2 - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease @@ -1660,9 +2072,15 @@ jobs: - StressTestTsan - StressTestMsan - StressTestUBsan - - IntegrationTestsAsan - - IntegrationTestsRelease - - IntegrationTestsTsan + - IntegrationTestsAsan0 + - IntegrationTestsAsan1 + - IntegrationTestsAsan2 + - IntegrationTestsRelease0 + - IntegrationTestsRelease1 + - IntegrationTestsTsan0 + - IntegrationTestsTsan1 + - IntegrationTestsTsan2 + - IntegrationTestsTsan3 - CompatibilityCheck - ASTFuzzerTestDebug - ASTFuzzerTestAsan diff --git a/CHANGELOG.md b/CHANGELOG.md index f34725448f2..3b6046d38de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,181 @@ +### ClickHouse release v21.12, 2021-12-13 + +#### Backward Incompatible Change + +* *A fix for a feature that previously had unwanted behaviour.* Do not allow direct select for Kafka/RabbitMQ/FileLog. Can be enabled by setting `stream_like_engine_allow_direct_select`. Direct select will be not allowed even if enabled by setting, in case there is an attached materialized view. For Kafka and RabbitMQ direct selectm if allowed, will not commit massages by default. To enable commits with direct select, user must use storage level setting `kafka{rabbitmq}_commit_on_select=1` (default `0`). [#31053](https://github.com/ClickHouse/ClickHouse/pull/31053) ([Kseniia Sumarokova](https://github.com/kssenii)). +* *A slight change in behaviour of a new function.* Return unquoted string in JSON_VALUE. Closes [#27965](https://github.com/ClickHouse/ClickHouse/issues/27965). [#31008](https://github.com/ClickHouse/ClickHouse/pull/31008) ([Kseniia Sumarokova](https://github.com/kssenii)). +* *Setting rename.* Add custom null representation support for TSV/CSV input formats. Fix deserialing Nullable(String) in TSV/CSV/JSONCompactStringsEachRow/JSONStringsEachRow input formats. Rename `output_format_csv_null_representation` and `output_format_tsv_null_representation` to `format_csv_null_representation` and `format_tsv_null_representation` accordingly. [#30497](https://github.com/ClickHouse/ClickHouse/pull/30497) ([Kruglov Pavel](https://github.com/Avogar)). +* *Further deprecation of already unused code.* This is relevant only for users of ClickHouse versions older than 20.6. A "leader election" mechanism is removed from `ReplicatedMergeTree`, because multiple leaders are supported since 20.6. If you are upgrading from an older version and some replica with an old version is a leader, then server will fail to start after upgrade. Stop replicas with old version to make new version start. After that it will not be possible to downgrade to version older than 20.6. [#32140](https://github.com/ClickHouse/ClickHouse/pull/32140) ([tavplubix](https://github.com/tavplubix)). + +#### New Feature + +* Implemented more of the ZooKeeper Four Letter Words commands in clickhouse-keeper: https://zookeeper.apache.org/doc/r3.4.8/zookeeperAdmin.html#sc_zkCommands. [#28981](https://github.com/ClickHouse/ClickHouse/pull/28981) ([JackyWoo](https://github.com/JackyWoo)). Now `clickhouse-keeper` is feature complete. +* Support for `Bool` data type. [#31072](https://github.com/ClickHouse/ClickHouse/pull/31072) ([kevin wan](https://github.com/MaxWk)). +* Support for `PARTITION BY` in File, URL, HDFS storages and with `INSERT INTO` table function. Closes [#30273](https://github.com/ClickHouse/ClickHouse/issues/30273). [#30690](https://github.com/ClickHouse/ClickHouse/pull/30690) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added `CONSTRAINT ... ASSUME ...` (without checking during `INSERT`). Added query transformation to CNF (https://github.com/ClickHouse/ClickHouse/issues/11749) for more convenient optimization. Added simple query rewriting using constraints (only simple matching now, will be improved to support <,=,>... later). Added ability to replace heavy columns with light columns if it's possible. [#18787](https://github.com/ClickHouse/ClickHouse/pull/18787) ([Nikita Vasilev](https://github.com/nikvas0)). +* Basic access authentication for http/url functions. [#31648](https://github.com/ClickHouse/ClickHouse/pull/31648) ([michael1589](https://github.com/michael1589)). +* Support `INTERVAL` type in `STEP` clause for `WITH FILL` modifier. [#30927](https://github.com/ClickHouse/ClickHouse/pull/30927) ([Anton Popov](https://github.com/CurtizJ)). +* Add support for parallel reading from multiple files and support globs in `FROM INFILE` clause. [#30135](https://github.com/ClickHouse/ClickHouse/pull/30135) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Add support for `Identifier` table and database query parameters. Closes [#27226](https://github.com/ClickHouse/ClickHouse/issues/27226). [#28668](https://github.com/ClickHouse/ClickHouse/pull/28668) ([Nikolay Degterinsky](https://github.com/evillique)). +* *TLDR: Major improvements of completeness and consistency of text formats.* Refactor formats `TSV`, `TSVRaw`, `CSV` and `JSONCompactEachRow`, `JSONCompactStringsEachRow`, remove code duplication, add base interface for formats with `-WithNames` and `-WithNamesAndTypes` suffixes. Add formats `CSVWithNamesAndTypes`, `TSVRawWithNames`, `TSVRawWithNamesAndTypes`, `JSONCompactEachRowWIthNames`, `JSONCompactStringsEachRowWIthNames`, `RowBinaryWithNames`. Support parallel parsing for formats `TSVWithNamesAndTypes`, `TSVRaw(WithNames/WIthNamesAndTypes)`, `CSVWithNamesAndTypes`, `JSONCompactEachRow(WithNames/WIthNamesAndTypes)`, `JSONCompactStringsEachRow(WithNames/WIthNamesAndTypes)`. Support columns mapping and types checking for `RowBinaryWithNamesAndTypes` format. Add setting `input_format_with_types_use_header` which specify if we should check that types written in `WIthNamesAndTypes` format matches with table structure. Add setting `input_format_csv_empty_as_default` and use it in CSV format instead of `input_format_defaults_for_omitted_fields` (because this setting should not control `csv_empty_as_default`). Fix usage of setting `input_format_defaults_for_omitted_fields` (it was used only as `csv_empty_as_default`, but it should control calculation of default expressions for omitted fields). Fix Nullable input/output in `TSVRaw` format, make this format fully compatible with inserting into TSV. Fix inserting NULLs in `LowCardinality(Nullable)` when `input_format_null_as_default` is enabled (previously default values was inserted instead of actual NULLs). Fix strings deserialization in `JSONStringsEachRow`/`JSONCompactStringsEachRow` formats (strings were parsed just until first '\n' or '\t'). Add ability to use `Raw` escaping rule in Template input format. Add diagnostic info for JSONCompactEachRow(WithNames/WIthNamesAndTypes) input format. Fix bug with parallel parsing of `-WithNames` formats in case when setting `min_chunk_bytes_for_parallel_parsing` is less than bytes in a single row. [#30178](https://github.com/ClickHouse/ClickHouse/pull/30178) ([Kruglov Pavel](https://github.com/Avogar)). Allow to print/parse names and types of colums in `CustomSeparated` input/output format. Add formats `CustomSeparatedWithNames/WithNamesAndTypes` similar to `TSVWithNames/WithNamesAndTypes`. [#31434](https://github.com/ClickHouse/ClickHouse/pull/31434) ([Kruglov Pavel](https://github.com/Avogar)). +* Aliyun OSS Storage support. [#31286](https://github.com/ClickHouse/ClickHouse/pull/31286) ([cfcz48](https://github.com/cfcz48)). +* Exposes all settings of the global thread pool in the configuration file. [#31285](https://github.com/ClickHouse/ClickHouse/pull/31285) ([Tomáš Hromada](https://github.com/gyfis)). +* Introduced window functions `exponentialTimeDecayedSum`, `exponentialTimeDecayedMax`, `exponentialTimeDecayedCount` and `exponentialTimeDecayedAvg` which are more effective than `exponentialMovingAverage` for bigger windows. Also more use-cases were covered. [#29799](https://github.com/ClickHouse/ClickHouse/pull/29799) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add option to compress logs before writing them to a file using LZ4. Closes [#23860](https://github.com/ClickHouse/ClickHouse/issues/23860). [#29219](https://github.com/ClickHouse/ClickHouse/pull/29219) ([Nikolay Degterinsky](https://github.com/evillique)). +* Support `JOIN ON 1 = 1` that have CROSS JOIN semantic. This closes [#25578](https://github.com/ClickHouse/ClickHouse/issues/25578). [#25894](https://github.com/ClickHouse/ClickHouse/pull/25894) ([Vladimir C](https://github.com/vdimir)). +* Add Map combinator for `Map` type. - Rename old `sum-, min-, max- Map` for mapped arrays to `sum-, min-, max- MappedArrays`. [#24539](https://github.com/ClickHouse/ClickHouse/pull/24539) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Make reading from HTTP retriable. Closes [#29696](https://github.com/ClickHouse/ClickHouse/issues/29696). [#29894](https://github.com/ClickHouse/ClickHouse/pull/29894) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Experimental Feature + +* `WINDOW VIEW` to enable stream processing in ClickHouse. [#8331](https://github.com/ClickHouse/ClickHouse/pull/8331) ([vxider](https://github.com/Vxider)). +* Drop support for using Ordinary databases with `MaterializedMySQL`. [#31292](https://github.com/ClickHouse/ClickHouse/pull/31292) ([Stig Bakken](https://github.com/stigsb)). +* Implement the commands BACKUP and RESTORE for the Log family. This feature is under development. [#30688](https://github.com/ClickHouse/ClickHouse/pull/30688) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Performance Improvement + +* Reduce memory usage when reading with `s3` / `url` / `hdfs` formats `Parquet`, `ORC`, `Arrow` (controlled by setting `input_format_allow_seeks`, enabled by default). Also add setting `remote_read_min_bytes_for_seek` to control seeks. Closes [#10461](https://github.com/ClickHouse/ClickHouse/issues/10461). Closes [#16857](https://github.com/ClickHouse/ClickHouse/issues/16857). [#30936](https://github.com/ClickHouse/ClickHouse/pull/30936) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add optimizations for constant conditions in JOIN ON, ref [#26928](https://github.com/ClickHouse/ClickHouse/issues/26928). [#27021](https://github.com/ClickHouse/ClickHouse/pull/27021) ([Vladimir C](https://github.com/vdimir)). +* Support parallel formatting for all text formats, except `JSONEachRowWithProgress` and `PrettyCompactMonoBlock`. [#31489](https://github.com/ClickHouse/ClickHouse/pull/31489) ([Kruglov Pavel](https://github.com/Avogar)). +* Speed up count over nullable columns. [#31806](https://github.com/ClickHouse/ClickHouse/pull/31806) ([Raúl Marín](https://github.com/Algunenano)). +* Speed up `avg` and `sumCount` aggregate functions. [#31694](https://github.com/ClickHouse/ClickHouse/pull/31694) ([Raúl Marín](https://github.com/Algunenano)). +* Improve performance of JSON and XML output formats. [#31673](https://github.com/ClickHouse/ClickHouse/pull/31673) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve performance of syncing data to block device. This closes [#31181](https://github.com/ClickHouse/ClickHouse/issues/31181). [#31229](https://github.com/ClickHouse/ClickHouse/pull/31229) ([zhanglistar](https://github.com/zhanglistar)). +* Fixing query performance issue in `LiveView` tables. Fixes [#30831](https://github.com/ClickHouse/ClickHouse/issues/30831). [#31006](https://github.com/ClickHouse/ClickHouse/pull/31006) ([vzakaznikov](https://github.com/vzakaznikov)). +* Speed up query parsing. [#31949](https://github.com/ClickHouse/ClickHouse/pull/31949) ([Raúl Marín](https://github.com/Algunenano)). +* Allow to split `GraphiteMergeTree` rollup rules for plain/tagged metrics (optional `rule_type` field). [#25122](https://github.com/ClickHouse/ClickHouse/pull/25122) ([Michail Safronov](https://github.com/msaf1980)). +* Remove excessive `DESC TABLE` requests for `remote()` (in case of `remote('127.1', system.one)` (i.e. identifier as the db.table instead of string) there was excessive `DESC TABLE` request). [#32019](https://github.com/ClickHouse/ClickHouse/pull/32019) ([Azat Khuzhin](https://github.com/azat)). +* Optimize function `tupleElement` to reading of subcolumn with enabled setting `optimize_functions_to_subcolumns`. [#31261](https://github.com/ClickHouse/ClickHouse/pull/31261) ([Anton Popov](https://github.com/CurtizJ)). +* Optimize function `mapContains` to reading of subcolumn `key` with enabled settings `optimize_functions_to_subcolumns`. [#31218](https://github.com/ClickHouse/ClickHouse/pull/31218) ([Anton Popov](https://github.com/CurtizJ)). +* Add settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem` and `merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem`. [#30970](https://github.com/ClickHouse/ClickHouse/pull/30970) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Skipping mutations of different partitions in `StorageMergeTree`. [#21326](https://github.com/ClickHouse/ClickHouse/pull/21326) ([Vladimir Chebotarev](https://github.com/excitoon)). + +#### Improvement + +* Do not allow to drop a table or dictionary if some tables or dictionaries depend on it. [#30977](https://github.com/ClickHouse/ClickHouse/pull/30977) ([tavplubix](https://github.com/tavplubix)). +* Allow versioning of aggregate function states. Now we can introduce backward compatible changes in serialization format of aggregate function states. Closes [#12552](https://github.com/ClickHouse/ClickHouse/issues/12552). [#24820](https://github.com/ClickHouse/ClickHouse/pull/24820) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support PostgreSQL style `ALTER MODIFY COLUMN` syntax. [#32003](https://github.com/ClickHouse/ClickHouse/pull/32003) ([SuperDJY](https://github.com/cmsxbc)). +* Added `update_field` support for `RangeHashedDictionary`, `ComplexKeyRangeHashedDictionary`. [#32185](https://github.com/ClickHouse/ClickHouse/pull/32185) ([Maksim Kita](https://github.com/kitaisreal)). +* The `murmurHash3_128` and `sipHash128` functions now accept an arbitrary number of arguments. This closes [#28774](https://github.com/ClickHouse/ClickHouse/issues/28774). [#28965](https://github.com/ClickHouse/ClickHouse/pull/28965) ([小路](https://github.com/nicelulu)). +* Support default expression for `HDFS` storage and optimize fetching when source is column oriented. [#32256](https://github.com/ClickHouse/ClickHouse/pull/32256) ([李扬](https://github.com/taiyang-li)). +* Improve the operation name of an opentelemetry span. [#32234](https://github.com/ClickHouse/ClickHouse/pull/32234) ([Frank Chen](https://github.com/FrankChen021)). +* Use `Content-Type: application/x-ndjson` (http://ndjson.org/) for output format `JSONEachRow`. [#32223](https://github.com/ClickHouse/ClickHouse/pull/32223) ([Dmitriy Dorofeev](https://github.com/deem0n)). +* Improve skipping unknown fields with quoted escaping rule in Template/CustomSeparated formats. Previously you could skip only quoted strings, now you can skip values with any type. [#32204](https://github.com/ClickHouse/ClickHouse/pull/32204) ([Kruglov Pavel](https://github.com/Avogar)). +* Now `clickhouse-keeper` refuses to start or apply configuration changes when they contain duplicated IDs or endpoints. Fixes [#31339](https://github.com/ClickHouse/ClickHouse/issues/31339). [#32121](https://github.com/ClickHouse/ClickHouse/pull/32121) ([alesapin](https://github.com/alesapin)). +* Set Content-Type in HTTP packets issued from URL engine. [#32113](https://github.com/ClickHouse/ClickHouse/pull/32113) ([Frank Chen](https://github.com/FrankChen021)). +* Return Content-Type as 'application/json' for `JSONEachRow` format if `output_format_json_array_of_rows` is enabled. [#32112](https://github.com/ClickHouse/ClickHouse/pull/32112) ([Frank Chen](https://github.com/FrankChen021)). +* Allow to parse `+` before `Float32`/`Float64` values. [#32079](https://github.com/ClickHouse/ClickHouse/pull/32079) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow a user configured `hdfs_replication` parameter for `DiskHDFS` and `StorageHDFS`. Closes [#32039](https://github.com/ClickHouse/ClickHouse/issues/32039). [#32049](https://github.com/ClickHouse/ClickHouse/pull/32049) ([leosunli](https://github.com/leosunli)). +* Added ClickHouse `exception` and `exception_code` fields to opentelemetry span log. [#32040](https://github.com/ClickHouse/ClickHouse/pull/32040) ([Frank Chen](https://github.com/FrankChen021)). +* Improve opentelemetry span log duration - it was is zero at the query level if there is a query exception. [#32038](https://github.com/ClickHouse/ClickHouse/pull/32038) ([Frank Chen](https://github.com/FrankChen021)). +* Fix the issue that `LowCardinality` of `Int256` cannot be created. [#31832](https://github.com/ClickHouse/ClickHouse/pull/31832) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Recreate `system.*_log` tables in case of different engine/partition_by. [#31824](https://github.com/ClickHouse/ClickHouse/pull/31824) ([Azat Khuzhin](https://github.com/azat)). +* `MaterializedMySQL`: Fix issue with table named 'table'. [#31781](https://github.com/ClickHouse/ClickHouse/pull/31781) ([Håvard Kvålen](https://github.com/havardk)). +* ClickHouse dictionary source: support named collections. Closes [#31705](https://github.com/ClickHouse/ClickHouse/issues/31705). [#31749](https://github.com/ClickHouse/ClickHouse/pull/31749) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to use named collections configuration for Kafka and RabbitMQ engines (the same way as for other integration table engines). [#31691](https://github.com/ClickHouse/ClickHouse/pull/31691) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Always re-render prompt while navigating history in clickhouse-client. This will improve usability of manipulating very long queries that don't fit on screen. [#31675](https://github.com/ClickHouse/ClickHouse/pull/31675) ([alexey-milovidov](https://github.com/alexey-milovidov)) (author: Amos Bird). +* Add key bindings for navigating through history (instead of lines/history). [#31641](https://github.com/ClickHouse/ClickHouse/pull/31641) ([Azat Khuzhin](https://github.com/azat)). +* Improve the `max_execution_time` checks. Fixed some cases when timeout checks do not happen and query could run too long. [#31636](https://github.com/ClickHouse/ClickHouse/pull/31636) ([Raúl Marín](https://github.com/Algunenano)). +* Better exception message when `users.xml` cannot be loaded due to bad password hash. This closes [#24126](https://github.com/ClickHouse/ClickHouse/issues/24126). [#31557](https://github.com/ClickHouse/ClickHouse/pull/31557) ([Vitaly Baranov](https://github.com/vitlibar)). +* Use shard and replica name from `Replicated` database arguments when expanding macros in `ReplicatedMergeTree` arguments if these macros are not defined in config. Closes [#31471](https://github.com/ClickHouse/ClickHouse/issues/31471). [#31488](https://github.com/ClickHouse/ClickHouse/pull/31488) ([tavplubix](https://github.com/tavplubix)). +* Better analysis for `min/max/count` projection. Now, with enabled `allow_experimental_projection_optimization`, virtual `min/max/count` projection can be used together with columns from partition key. [#31474](https://github.com/ClickHouse/ClickHouse/pull/31474) ([Amos Bird](https://github.com/amosbird)). +* Add `--pager` support for `clickhouse-local`. [#31457](https://github.com/ClickHouse/ClickHouse/pull/31457) ([Azat Khuzhin](https://github.com/azat)). +* Fix waiting of the editor during interactive query edition (`waitpid()` returns -1 on `SIGWINCH` and `EDITOR` and `clickhouse-local`/`clickhouse-client` works concurrently). [#31456](https://github.com/ClickHouse/ClickHouse/pull/31456) ([Azat Khuzhin](https://github.com/azat)). +* Throw an exception if there is some garbage after field in `JSONCompactStrings(EachRow)` format. [#31455](https://github.com/ClickHouse/ClickHouse/pull/31455) ([Kruglov Pavel](https://github.com/Avogar)). +* Default value of `http_send_timeout` and `http_receive_timeout` settings changed from 1800 (30 minutes) to 180 (3 minutes). [#31450](https://github.com/ClickHouse/ClickHouse/pull/31450) ([tavplubix](https://github.com/tavplubix)). +* `MaterializedMySQL` now handles `CREATE TABLE ... LIKE ...` DDL queries. [#31410](https://github.com/ClickHouse/ClickHouse/pull/31410) ([Stig Bakken](https://github.com/stigsb)). +* Return artificial create query when executing `show create table` on system's tables. [#31391](https://github.com/ClickHouse/ClickHouse/pull/31391) ([SuperDJY](https://github.com/cmsxbc)). +* Previously progress was shown only for `numbers` table function. Now for `numbers_mt` it is also shown. [#31318](https://github.com/ClickHouse/ClickHouse/pull/31318) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Initial user's roles are used now to find row policies, see [#31080](https://github.com/ClickHouse/ClickHouse/issues/31080). [#31262](https://github.com/ClickHouse/ClickHouse/pull/31262) ([Vitaly Baranov](https://github.com/vitlibar)). +* If some obsolete setting is changed - show warning in `system.warnings`. [#31252](https://github.com/ClickHouse/ClickHouse/pull/31252) ([tavplubix](https://github.com/tavplubix)). +* Improved backoff for background cleanup tasks in `MergeTree`. Settings `merge_tree_clear_old_temporary_directories_interval_seconds` and `merge_tree_clear_old_parts_interval_seconds` moved from users settings to merge tree settings. [#31180](https://github.com/ClickHouse/ClickHouse/pull/31180) ([tavplubix](https://github.com/tavplubix)). +* Now every replica will send to client only incremental information about profile events counters. [#31155](https://github.com/ClickHouse/ClickHouse/pull/31155) ([Dmitry Novik](https://github.com/novikd)). This makes `--hardware_utilization` option in `clickhouse-client` usable. +* Enable multiline editing in clickhouse-client by default. This addresses [#31121](https://github.com/ClickHouse/ClickHouse/issues/31121) . [#31123](https://github.com/ClickHouse/ClickHouse/pull/31123) ([Amos Bird](https://github.com/amosbird)). +* Function name normalization for `ALTER` queries. This helps avoid metadata mismatch between creating table with indices/projections and adding indices/projections via alter commands. This is a follow-up PR of https://github.com/ClickHouse/ClickHouse/pull/20174. Mark as improvements as there are no bug reports and the senario is somehow rare. [#31095](https://github.com/ClickHouse/ClickHouse/pull/31095) ([Amos Bird](https://github.com/amosbird)). +* Support `IF EXISTS` modifier for `RENAME DATABASE`/`TABLE`/`DICTIONARY` query. If this directive is used, one will not get an error if the DATABASE/TABLE/DICTIONARY to be renamed doesn't exist. [#31081](https://github.com/ClickHouse/ClickHouse/pull/31081) ([victorgao](https://github.com/kafka1991)). +* Cancel vertical merges when partition is dropped. This is a follow-up of https://github.com/ClickHouse/ClickHouse/pull/25684 and https://github.com/ClickHouse/ClickHouse/pull/30996. [#31057](https://github.com/ClickHouse/ClickHouse/pull/31057) ([Amos Bird](https://github.com/amosbird)). +* The local session inside a Clickhouse dictionary source won't send its events to the session log anymore. This fixes a possible deadlock (tsan alert) on shutdown. Also this PR fixes flaky `test_dictionaries_dependency_xml/`. [#31013](https://github.com/ClickHouse/ClickHouse/pull/31013) ([Vitaly Baranov](https://github.com/vitlibar)). +* Less locking in ALTER command. [#31010](https://github.com/ClickHouse/ClickHouse/pull/31010) ([Amos Bird](https://github.com/amosbird)). +* Fix `--verbose` option in clickhouse-local interactive mode and allow logging into file. [#30881](https://github.com/ClickHouse/ClickHouse/pull/30881) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added `\l`, `\d`, `\c` commands in `clickhouse-client` like in MySQL and PostgreSQL. [#30876](https://github.com/ClickHouse/ClickHouse/pull/30876) ([Pavel Medvedev](https://github.com/pmed)). +* For clickhouse-local or clickhouse-client: if there is `--interactive` option with `--query` or `--queries-file`, then first execute them like in non-interactive and then start interactive mode. [#30851](https://github.com/ClickHouse/ClickHouse/pull/30851) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible "The local set of parts of X doesn't look like the set of parts in ZooKeeper" error (if DROP fails during removing znodes from zookeeper). [#30826](https://github.com/ClickHouse/ClickHouse/pull/30826) ([Azat Khuzhin](https://github.com/azat)). +* Avro format works against Kafka. Setting `output_format_avro_rows_in_file` added. [#30351](https://github.com/ClickHouse/ClickHouse/pull/30351) ([Ilya Golshtein](https://github.com/ilejn)). +* Allow to specify one or any number of PostgreSQL schemas for one `MaterializedPostgreSQL` database. Closes [#28901](https://github.com/ClickHouse/ClickHouse/issues/28901). Closes [#29324](https://github.com/ClickHouse/ClickHouse/issues/29324). [#28933](https://github.com/ClickHouse/ClickHouse/pull/28933) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Replaced default ports for clickhouse-keeper internal communication from 44444 to 9234. Fixes [#30879](https://github.com/ClickHouse/ClickHouse/issues/30879). [#31799](https://github.com/ClickHouse/ClickHouse/pull/31799) ([alesapin](https://github.com/alesapin)). +* Implement function transform with Decimal arguments. [#31839](https://github.com/ClickHouse/ClickHouse/pull/31839) ([李帅](https://github.com/loneylee)). +* Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible assert in `hdfs` table function/engine, add test. [#31036](https://github.com/ClickHouse/ClickHouse/pull/31036) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Bug Fixes + +* Fix group by / order by / limit by aliases with positional arguments enabled. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173). [#31741](https://github.com/ClickHouse/ClickHouse/pull/31741) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix usage of `Buffer` table engine with type `Map`. Fixes [#30546](https://github.com/ClickHouse/ClickHouse/issues/30546). [#31742](https://github.com/ClickHouse/ClickHouse/pull/31742) ([Anton Popov](https://github.com/CurtizJ)). +* Fix reading from `MergeTree` tables with enabled `use_uncompressed_cache`. [#31826](https://github.com/ClickHouse/ClickHouse/pull/31826) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed the behavior when mutations that have nothing to do are stuck (with enabled setting `empty_result_for_aggregation_by_empty_set`). [#32358](https://github.com/ClickHouse/ClickHouse/pull/32358) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix skipping columns while writing protobuf. This PR fixes [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160), see the comment [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160)#issuecomment-980595318. [#31988](https://github.com/ClickHouse/ClickHouse/pull/31988) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). +* Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). +* Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). +* Memory amount was incorrectly estimated when ClickHouse is run in containers with cgroup limits. [#31157](https://github.com/ClickHouse/ClickHouse/pull/31157) ([Pavel Medvedev](https://github.com/pmed)). +* Fix `ALTER ... MATERIALIZE COLUMN ...` queries in case when data type of default expression is not equal to the data type of column. [#32348](https://github.com/ClickHouse/ClickHouse/pull/32348) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed crash with SIGFPE in aggregate function `avgWeighted` with `Decimal` argument. Fixes [#32053](https://github.com/ClickHouse/ClickHouse/issues/32053). [#32303](https://github.com/ClickHouse/ClickHouse/pull/32303) ([tavplubix](https://github.com/tavplubix)). +* Server might fail to start with `Cannot attach 1 tables due to cyclic dependencies` error if `Dictionary` table looks at XML-dictionary with the same name, it's fixed. Fixes [#31315](https://github.com/ClickHouse/ClickHouse/issues/31315). [#32288](https://github.com/ClickHouse/ClickHouse/pull/32288) ([tavplubix](https://github.com/tavplubix)). +* Fix parsing error while NaN deserializing for `Nullable(Float)` for `Quoted` escaping rule. [#32190](https://github.com/ClickHouse/ClickHouse/pull/32190) ([Kruglov Pavel](https://github.com/Avogar)). +* XML dictionaries: identifiers, used in table create query, can be qualified to `default_database` during upgrade to newer version. Closes [#31963](https://github.com/ClickHouse/ClickHouse/issues/31963). [#32187](https://github.com/ClickHouse/ClickHouse/pull/32187) ([Maksim Kita](https://github.com/kitaisreal)). +* Number of active replicas might be determined incorrectly when inserting with quorum if setting `replicated_can_become_leader` is disabled on some replicas. It's fixed. [#32157](https://github.com/ClickHouse/ClickHouse/pull/32157) ([tavplubix](https://github.com/tavplubix)). +* Dictionaries: fix cases when `{condition}` does not work for custom database queries. [#32117](https://github.com/ClickHouse/ClickHouse/pull/32117) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix `CAST` from `Nullable` with `cast_keep_nullable` (`PARAMETER_OUT_OF_BOUND` error before for i.e. `toUInt32OrDefault(toNullable(toUInt32(1)))`). [#32080](https://github.com/ClickHouse/ClickHouse/pull/32080) ([Azat Khuzhin](https://github.com/azat)). +* Fix CREATE TABLE of Join Storage in some obscure cases. Close [#31680](https://github.com/ClickHouse/ClickHouse/issues/31680). [#32066](https://github.com/ClickHouse/ClickHouse/pull/32066) ([SuperDJY](https://github.com/cmsxbc)). +* Fixed `Directory ... already exists and is not empty` error when detaching part. [#32063](https://github.com/ClickHouse/ClickHouse/pull/32063) ([tavplubix](https://github.com/tavplubix)). +* `MaterializedMySQL` (experimental feature): Fix misinterpretation of `DECIMAL` data from MySQL. [#31990](https://github.com/ClickHouse/ClickHouse/pull/31990) ([Håvard Kvålen](https://github.com/havardk)). +* `FileLog` (experimental feature) engine unnesessary created meta data directory when create table failed. Fix [#31962](https://github.com/ClickHouse/ClickHouse/issues/31962). [#31967](https://github.com/ClickHouse/ClickHouse/pull/31967) ([flynn](https://github.com/ucasfl)). +* Some `GET_PART` entry might hang in replication queue if part is lost on all replicas and there are no other parts in the same partition. It's fixed in cases when partition key contains only columns of integer types or `Date[Time]`. Fixes [#31485](https://github.com/ClickHouse/ClickHouse/issues/31485). [#31887](https://github.com/ClickHouse/ClickHouse/pull/31887) ([tavplubix](https://github.com/tavplubix)). +* Fix functions `empty` and `notEmpty` with arguments of `UUID` type. Fixes [#31819](https://github.com/ClickHouse/ClickHouse/issues/31819). [#31883](https://github.com/ClickHouse/ClickHouse/pull/31883) ([Anton Popov](https://github.com/CurtizJ)). +* Change configuration path from `keeper_server.session_timeout_ms` to `keeper_server.coordination_settings.session_timeout_ms` when constructing a `KeeperTCPHandler`. Same with `operation_timeout`. [#31859](https://github.com/ClickHouse/ClickHouse/pull/31859) ([JackyWoo](https://github.com/JackyWoo)). +* Fix invalid cast of Nullable type when nullable primary key is used. (Nullable primary key is a discouraged feature - please do not use). This fixes [#31075](https://github.com/ClickHouse/ClickHouse/issues/31075). [#31823](https://github.com/ClickHouse/ClickHouse/pull/31823) ([Amos Bird](https://github.com/amosbird)). +* Fix crash in recursive UDF in SQL. Closes [#30856](https://github.com/ClickHouse/ClickHouse/issues/30856). [#31820](https://github.com/ClickHouse/ClickHouse/pull/31820) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash with empty result of ODBC query (with some ODBC drivers). Closes [#31465](https://github.com/ClickHouse/ClickHouse/issues/31465). [#31766](https://github.com/ClickHouse/ClickHouse/pull/31766) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix disabling query profiler (In case of `query_profiler_real_time_period_ns>0`/`query_profiler_cpu_time_period_ns>0` query profiler can stayed enabled even after query finished). [#31740](https://github.com/ClickHouse/ClickHouse/pull/31740) ([Azat Khuzhin](https://github.com/azat)). +* Fixed rare segfault on concurrent `ATTACH PARTITION` queries. [#31738](https://github.com/ClickHouse/ClickHouse/pull/31738) ([tavplubix](https://github.com/tavplubix)). +* Fix race in JSONEachRowWithProgress output format when data and lines with progress are mixed in output. [#31736](https://github.com/ClickHouse/ClickHouse/pull/31736) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed `there are no such cluster here` error on execution of `ON CLUSTER` query if specified cluster name is name of `Replicated` database. [#31723](https://github.com/ClickHouse/ClickHouse/pull/31723) ([tavplubix](https://github.com/tavplubix)). +* Fix exception on some of the applications of `decrypt` function on Nullable columns. This closes [#31662](https://github.com/ClickHouse/ClickHouse/issues/31662). This closes [#31426](https://github.com/ClickHouse/ClickHouse/issues/31426). [#31707](https://github.com/ClickHouse/ClickHouse/pull/31707) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed function ngrams when string contains UTF-8 characters. [#31706](https://github.com/ClickHouse/ClickHouse/pull/31706) ([yandd](https://github.com/yandd)). +* Settings `input_format_allow_errors_num` and `input_format_allow_errors_ratio` did not work for parsing of domain types, such as `IPv4`, it's fixed. Fixes [#31686](https://github.com/ClickHouse/ClickHouse/issues/31686). [#31697](https://github.com/ClickHouse/ClickHouse/pull/31697) ([tavplubix](https://github.com/tavplubix)). +* Fixed null pointer exception in `MATERIALIZE COLUMN`. [#31679](https://github.com/ClickHouse/ClickHouse/pull/31679) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* `RENAME TABLE` query worked incorrectly on attempt to rename an DDL dictionary in `Ordinary` database, it's fixed. [#31638](https://github.com/ClickHouse/ClickHouse/pull/31638) ([tavplubix](https://github.com/tavplubix)). +* Implement `sparkbar` aggregate function as it was intended, see: [#26175](https://github.com/ClickHouse/ClickHouse/issues/26175)#issuecomment-960353867, [comment](https://github.com/ClickHouse/ClickHouse/issues/26175#issuecomment-961155065). [#31624](https://github.com/ClickHouse/ClickHouse/pull/31624) ([小路](https://github.com/nicelulu)). +* Fix invalid generated JSON when only column names contain invalid UTF-8 sequences. [#31534](https://github.com/ClickHouse/ClickHouse/pull/31534) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Disable `partial_merge_join_left_table_buffer_bytes` before bug in this optimization is fixed. See [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009)). Remove redundant option `partial_merge_join_optimizations`. [#31528](https://github.com/ClickHouse/ClickHouse/pull/31528) ([Vladimir C](https://github.com/vdimir)). +* Fix progress for short `INSERT SELECT` queries. [#31510](https://github.com/ClickHouse/ClickHouse/pull/31510) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong behavior with group by and positional arguments. Closes [#31280](https://github.com/ClickHouse/ClickHouse/issues/31280)#issuecomment-968696186. [#31420](https://github.com/ClickHouse/ClickHouse/pull/31420) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Resolve `nullptr` in STS credentials provider for S3. [#31409](https://github.com/ClickHouse/ClickHouse/pull/31409) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Remove `notLike` function from index analysis, because it was wrong. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). +* Fix bug in Keeper which can lead to inability to start when some coordination logs was lost and we have more fresh snapshot than our latest log. [#31150](https://github.com/ClickHouse/ClickHouse/pull/31150) ([alesapin](https://github.com/alesapin)). +* Rewrite right distributed table in local join. solves [#25809](https://github.com/ClickHouse/ClickHouse/issues/25809). [#31105](https://github.com/ClickHouse/ClickHouse/pull/31105) ([abel-cheng](https://github.com/abel-cheng)). +* Fix `Merge` table with aliases and where (it did not work before at all). Closes [#28802](https://github.com/ClickHouse/ClickHouse/issues/28802). [#31044](https://github.com/ClickHouse/ClickHouse/pull/31044) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix JSON_VALUE/JSON_QUERY with quoted identifiers. This allows to have spaces in json path. Closes [#30971](https://github.com/ClickHouse/ClickHouse/issues/30971). [#31003](https://github.com/ClickHouse/ClickHouse/pull/31003) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Using `formatRow` function with not row-oriented formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug which broke select queries if they happened after dropping materialized view. Found in [#30691](https://github.com/ClickHouse/ClickHouse/issues/30691). [#30997](https://github.com/ClickHouse/ClickHouse/pull/30997) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Skip `max_partition_size_to_drop check` in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)). +* Fix some corner cases with `INTERSECT` and `EXCEPT` operators. Closes [#30803](https://github.com/ClickHouse/ClickHouse/issues/30803). [#30965](https://github.com/ClickHouse/ClickHouse/pull/30965) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement + +* Fix incorrect filtering result on non-x86 builds. This closes [#31417](https://github.com/ClickHouse/ClickHouse/issues/31417). This closes [#31524](https://github.com/ClickHouse/ClickHouse/issues/31524). [#31574](https://github.com/ClickHouse/ClickHouse/pull/31574) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make ClickHouse build fully reproducible (byte identical on different machines). This closes [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31899](https://github.com/ClickHouse/ClickHouse/pull/31899) ([alexey-milovidov](https://github.com/alexey-milovidov)). Remove filesystem path to the build directory from binaries to enable reproducible builds. This needed for [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31838](https://github.com/ClickHouse/ClickHouse/pull/31838) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Use our own CMakeLists for `zlib-ng`, `cassandra`, `mariadb-connector-c` and `xz`, `re2`, `sentry`, `gsasl`, `arrow`, `protobuf`. This is needed for [#20151](https://github.com/ClickHouse/ClickHouse/issues/20151). Part of [#9226](https://github.com/ClickHouse/ClickHouse/issues/9226). A small step towards removal of annoying trash from the build system. [#30599](https://github.com/ClickHouse/ClickHouse/pull/30599) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Hermetic builds: use fixed version of libc and make sure that no source or binary files from the host OS are using during build. This closes [#27133](https://github.com/ClickHouse/ClickHouse/issues/27133). This closes [#21435](https://github.com/ClickHouse/ClickHouse/issues/21435). This closes [#30462](https://github.com/ClickHouse/ClickHouse/issues/30462). [#30011](https://github.com/ClickHouse/ClickHouse/pull/30011) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Adding function `getFuzzerData()` to easily fuzz particular functions. This closes [#23227](https://github.com/ClickHouse/ClickHouse/issues/23227). [#27526](https://github.com/ClickHouse/ClickHouse/pull/27526) ([Alexey Boykov](https://github.com/mathalex)). +* More correct setting up capabilities inside Docker. [#31802](https://github.com/ClickHouse/ClickHouse/pull/31802) ([Constantine Peresypkin](https://github.com/pkit)). +* Enable clang `-fstrict-vtable-pointers`, `-fwhole-program-vtables` compile options. [#20151](https://github.com/ClickHouse/ClickHouse/pull/20151) ([Maksim Kita](https://github.com/kitaisreal)). +* Avoid downloading toolchain tarballs for cross-compiling for FreeBSD. [#31672](https://github.com/ClickHouse/ClickHouse/pull/31672) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Initial support for risc-v. See development/build-cross-riscv for quirks and build command that was tested. [#31309](https://github.com/ClickHouse/ClickHouse/pull/31309) ([Vladimir Smirnov](https://github.com/Civil)). +* Support compile in arm machine with parameter "-DENABLE_TESTS=OFF". [#31007](https://github.com/ClickHouse/ClickHouse/pull/31007) ([zhanghuajie](https://github.com/zhanghuajieHIT)). + + ### ClickHouse release v21.11, 2021-11-09 #### Backward Incompatible Change diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a3991bc93c..bc0f119e3f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -149,6 +149,10 @@ if (ENABLE_FUZZING) set (ENABLE_JEMALLOC 0) set (ENABLE_CHECK_HEAVY_BUILDS 1) set (GLIBC_COMPATIBILITY OFF) + + # For codegen_select_fuzzer + set (ENABLE_PROTOBUF 1) + set (USE_INTERNAL_PROTOBUF_LIBRARY 1) endif() # Global libraries diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 000233738f7..c0b0801bd2e 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -24,8 +24,6 @@ set (SRCS if (ENABLE_REPLXX) list (APPEND SRCS ReplxxLineReader.cpp) -elseif (ENABLE_READLINE) - list (APPEND SRCS ReadlineLineReader.cpp) endif () if (USE_DEBUG_HELPERS) @@ -52,28 +50,6 @@ if (OS_DARWIN AND NOT MAKE_STATIC_LIBRARIES) target_link_libraries(common PUBLIC -Wl,-U,_inside_main) endif() -# Allow explicit fallback to readline -if (NOT ENABLE_REPLXX AND ENABLE_READLINE) - message (STATUS "Attempt to fallback to readline explicitly") - set (READLINE_PATHS "/usr/local/opt/readline/lib") - # First try find custom lib for macos users (default lib without history support) - find_library (READLINE_LIB NAMES readline PATHS ${READLINE_PATHS} NO_DEFAULT_PATH) - if (NOT READLINE_LIB) - find_library (READLINE_LIB NAMES readline PATHS ${READLINE_PATHS}) - endif () - - set(READLINE_INCLUDE_PATHS "/usr/local/opt/readline/include") - find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS} NO_DEFAULT_PATH) - if (NOT READLINE_INCLUDE_DIR) - find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS}) - endif () - if (READLINE_INCLUDE_DIR AND READLINE_LIB) - target_link_libraries(common PUBLIC ${READLINE_LIB}) - target_compile_definitions(common PUBLIC USE_READLINE=1) - message (STATUS "Using readline: ${READLINE_INCLUDE_DIR} : ${READLINE_LIB}") - endif () -endif () - target_link_libraries (common PUBLIC ${CITYHASH_LIBRARIES} diff --git a/base/base/LineReader.cpp b/base/base/LineReader.cpp index 5beebb58b3b..9491f957762 100644 --- a/base/base/LineReader.cpp +++ b/base/base/LineReader.cpp @@ -10,16 +10,6 @@ #include -#ifdef OS_LINUX -/// We can detect if code is linked with one or another readline variants or open the library dynamically. -# include -extern "C" -{ - char * readline(const char *) __attribute__((__weak__)); - char * (*readline_ptr)(const char *) = readline; -} -#endif - #ifdef HAS_RESERVED_IDENTIFIER #pragma clang diagnostic ignored "-Wreserved-identifier" #endif @@ -152,33 +142,6 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt) { input.clear(); -#ifdef OS_LINUX - if (!readline_ptr) - { - for (const auto * name : {"libreadline.so", "libreadline.so.0", "libeditline.so", "libeditline.so.0"}) - { - void * dl_handle = dlopen(name, RTLD_LAZY); - if (dl_handle) - { - readline_ptr = reinterpret_cast(dlsym(dl_handle, "readline")); - if (readline_ptr) - { - break; - } - } - } - } - - /// Minimal support for readline - if (readline_ptr) - { - char * line_read = (*readline_ptr)(prompt.c_str()); - if (!line_read) - return ABORT; - input = line_read; - } - else -#endif { std::cout << prompt; std::getline(std::cin, input); diff --git a/base/base/LineReader.h b/base/base/LineReader.h index 0e36a9e01d1..12a856e2051 100644 --- a/base/base/LineReader.h +++ b/base/base/LineReader.h @@ -53,7 +53,6 @@ protected: String input; -private: bool multiline; Patterns extenders; diff --git a/base/base/ReadlineLineReader.cpp b/base/base/ReadlineLineReader.cpp deleted file mode 100644 index de444a0b1d9..00000000000 --- a/base/base/ReadlineLineReader.cpp +++ /dev/null @@ -1,187 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include - -#include - -namespace -{ - -/// Trim ending whitespace inplace -void trim(String & s) -{ - s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end()); -} - -} - -static const LineReader::Suggest * suggest; - -/// Points to current word to suggest. -static LineReader::Suggest::Words::const_iterator pos; -/// Points after the last possible match. -static LineReader::Suggest::Words::const_iterator end; - -/// Set iterators to the matched range of words if any. -static void findRange(const char * prefix, size_t prefix_length) -{ - std::string prefix_str(prefix); - if (auto completions = suggest->getCompletions(prefix_str, prefix_length)) - std::tie(pos, end) = *completions; -} - -/// Iterates through matched range. -static char * nextMatch() -{ - if (pos >= end) - return nullptr; - - /// readline will free memory by itself. - char * word = strdup(pos->c_str()); - ++pos; - return word; -} - -static char * generate(const char * text, int state) -{ - if (!suggest->ready) - return nullptr; - if (state == 0) - findRange(text, strlen(text)); - - /// Do not append whitespace after word. For unknown reason, rl_completion_append_character = '\0' does not work. - rl_completion_suppress_append = 1; - - return nextMatch(); -}; - -ReadlineLineReader::ReadlineLineReader( - const Suggest & suggest_, const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_) - : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)) -{ - suggest = &suggest_; - - if (!history_file_path.empty()) - { - int res = read_history(history_file_path.c_str()); - if (res) - std::cerr << "Cannot read history from file " + history_file_path + ": "+ errnoToString(errno) << std::endl; - } - - /// Added '.' to the default list. Because it is used to separate database and table. - rl_basic_word_break_characters = word_break_characters; - - /// Not append whitespace after single suggestion. Because whitespace after function name is meaningless. - rl_completion_append_character = '\0'; - - rl_completion_entry_function = generate; - - /// Install Ctrl+C signal handler that will be used in interactive mode. - - if (rl_initialize()) - throw std::runtime_error("Cannot initialize readline"); - - auto clear_prompt_or_exit = [](int) - { - /// This is signal safe. - ssize_t res = write(STDOUT_FILENO, "\n", 1); - - /// Allow to quit client while query is in progress by pressing Ctrl+C twice. - /// (First press to Ctrl+C will try to cancel query by InterruptListener). - if (res == 1 && rl_line_buffer[0] && !RL_ISSTATE(RL_STATE_DONE)) - { - rl_replace_line("", 0); - if (rl_forced_update_display()) - _exit(0); - } - else - { - /// A little dirty, but we struggle to find better way to correctly - /// force readline to exit after returning from the signal handler. - _exit(0); - } - }; - - if (signal(SIGINT, clear_prompt_or_exit) == SIG_ERR) - throw std::runtime_error(std::string("Cannot set signal handler for readline: ") + errnoToString(errno)); - - rl_variable_bind("completion-ignore-case", "on"); - // TODO: it doesn't work - // history_write_timestamps = 1; -} - -ReadlineLineReader::~ReadlineLineReader() -{ -} - -LineReader::InputStatus ReadlineLineReader::readOneLine(const String & prompt) -{ - input.clear(); - - const char* cinput = readline(prompt.c_str()); - if (cinput == nullptr) - return (errno != EAGAIN) ? ABORT : RESET_LINE; - input = cinput; - - trim(input); - return INPUT_LINE; -} - -void ReadlineLineReader::addToHistory(const String & line) -{ - add_history(line.c_str()); - - // Flush changes to the disk - // NOTE readline builds a buffer of all the lines to write, and write them in one syscall. - // Thus there is no need to lock the history file here. - write_history(history_file_path.c_str()); -} - -#if RL_VERSION_MAJOR >= 7 - -#define BRACK_PASTE_PREF "\033[200~" -#define BRACK_PASTE_SUFF "\033[201~" - -#define BRACK_PASTE_LAST '~' -#define BRACK_PASTE_SLEN 6 - -/// This handler bypasses some unused macro/event checkings and remove trailing newlines before insertion. -static int clickhouse_rl_bracketed_paste_begin(int /* count */, int /* key */) -{ - std::string buf; - buf.reserve(128); - - RL_SETSTATE(RL_STATE_MOREINPUT); - SCOPE_EXIT(RL_UNSETSTATE(RL_STATE_MOREINPUT)); - int c; - while ((c = rl_read_key()) >= 0) - { - if (c == '\r') - c = '\n'; - buf.push_back(c); - if (buf.size() >= BRACK_PASTE_SLEN && c == BRACK_PASTE_LAST && buf.substr(buf.size() - BRACK_PASTE_SLEN) == BRACK_PASTE_SUFF) - { - buf.resize(buf.size() - BRACK_PASTE_SLEN); - break; - } - } - trim(buf); - return static_cast(rl_insert_text(buf.c_str())) == buf.size() ? 0 : 1; -} - -#endif - -void ReadlineLineReader::enableBracketedPaste() -{ -#if RL_VERSION_MAJOR >= 7 - rl_variable_bind("enable-bracketed-paste", "on"); - - /// Use our bracketed paste handler to get better user experience. See comments above. - rl_bind_keyseq(BRACK_PASTE_PREF, clickhouse_rl_bracketed_paste_begin); -#endif -}; diff --git a/base/base/ReadlineLineReader.h b/base/base/ReadlineLineReader.h deleted file mode 100644 index 95bd23b4634..00000000000 --- a/base/base/ReadlineLineReader.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include "LineReader.h" - -#include -#include - -class ReadlineLineReader : public LineReader -{ -public: - ReadlineLineReader(const Suggest & suggest, const String & history_file_path, bool multiline, Patterns extenders_, Patterns delimiters_); - ~ReadlineLineReader() override; - - void enableBracketedPaste() override; - -private: - InputStatus readOneLine(const String & prompt) override; - void addToHistory(const String & line) override; -}; diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 3c2ac1f8891..5d99da99c8c 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -22,7 +22,14 @@ namespace /// Trim ending whitespace inplace void trim(String & s) { - s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end()); + s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), s.end()); +} + +/// Check if string ends with given character after skipping whitespaces. +bool ends_with(const std::string_view & s, const std::string_view & p) +{ + auto ss = std::string_view(s.data(), s.rend() - std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); })); + return ss.ends_with(p); } std::string getEditor() @@ -189,8 +196,28 @@ ReplxxLineReader::ReplxxLineReader( rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); }); rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); }); + auto commit_action = [this](char32_t code) + { + std::string_view str = rx.get_state().text(); + + /// Always commit line when we see extender at the end. It will start a new prompt. + for (const auto * extender : extenders) + if (ends_with(str, extender)) + return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); + + /// If we see an delimiter at the end, commit right away. + for (const auto * delimiter : delimiters) + if (ends_with(str, delimiter)) + return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); + + /// If we allow multiline and there is already something in the input, start a newline. + if (multiline && !input.empty()) + return rx.invoke(Replxx::ACTION::NEW_LINE, code); + return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); + }; /// bind C-j to ENTER action. - rx.bind_key(Replxx::KEY::control('J'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }); + rx.bind_key(Replxx::KEY::control('J'), commit_action); + rx.bind_key(Replxx::KEY::ENTER, commit_action); /// By default COMPLETE_NEXT/COMPLETE_PREV was binded to C-p/C-n, re-bind /// to M-P/M-N (that was used for HISTORY_COMMON_PREFIX_SEARCH before, but diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index f13110d7179..8e7c061088a 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54457) +SET(VERSION_REVISION 54458) SET(VERSION_MAJOR 21) -SET(VERSION_MINOR 12) +SET(VERSION_MINOR 13) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 503a418dedf0011e9040c3a1b6913e0b5488be4c) -SET(VERSION_DESCRIBE v21.12.1.1-prestable) -SET(VERSION_STRING 21.12.1.1) +SET(VERSION_GITHASH 4cc45c1e15912ee300bca7cc8b8da2b888a70e2a) +SET(VERSION_DESCRIBE v21.13.1.1-prestable) +SET(VERSION_STRING 21.13.1.1) # end of autochange diff --git a/cmake/tools.cmake b/cmake/tools.cmake index eb3624f3b3b..69a37304f58 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -91,6 +91,9 @@ endif () if (LINKER_NAME) if (COMPILER_CLANG AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12.0.0 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 12.0.0)) find_program (LLD_PATH NAMES ${LINKER_NAME}) + if (NOT LLD_PATH) + message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.") + endif () set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}") set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_PATH}") else () diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index fd52ce4a4f3..fb11879fb21 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -1,6 +1,6 @@ if (SANITIZE OR NOT ( ((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_ARM OR ARCH_PPC64LE OR ARCH_RISCV64)) OR - (OS_DARWIN AND (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo" OR CMAKE_BUILD_TYPE STREQUAL "Debug")) + (OS_DARWIN AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")) )) if (ENABLE_JEMALLOC) message (${RECONFIGURE_MESSAGE_LEVEL} diff --git a/contrib/libpqxx b/contrib/libpqxx index 357608d11b7..63e20f9485b 160000 --- a/contrib/libpqxx +++ b/contrib/libpqxx @@ -1 +1 @@ -Subproject commit 357608d11b7a1961c3fb7db2ef9a5dbb2e87da77 +Subproject commit 63e20f9485b8cbeabf99008123248fc9f033e766 diff --git a/contrib/poco b/contrib/poco index 258b9ba6cd2..520a90e02e3 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 258b9ba6cd245ff88e9346f75c43464c403f329d +Subproject commit 520a90e02e3e5cb90afeae1846d161dbc508a6f1 diff --git a/contrib/replxx-cmake/CMakeLists.txt b/contrib/replxx-cmake/CMakeLists.txt index 07f24bae25d..222a38095cb 100644 --- a/contrib/replxx-cmake/CMakeLists.txt +++ b/contrib/replxx-cmake/CMakeLists.txt @@ -8,7 +8,7 @@ if (NOT ENABLE_REPLXX) add_library(replxx INTERFACE) target_compile_definitions(replxx INTERFACE USE_REPLXX=0) - message (STATUS "Not using replxx (Beware! Runtime fallback to readline is possible!)") + message (STATUS "Not using replxx") return() endif() diff --git a/contrib/unixodbc-cmake/linux_x86_64/private/config.h b/contrib/unixodbc-cmake/linux_x86_64/private/config.h index d80a4da4665..59cee9e8565 100644 --- a/contrib/unixodbc-cmake/linux_x86_64/private/config.h +++ b/contrib/unixodbc-cmake/linux_x86_64/private/config.h @@ -202,10 +202,10 @@ #define HAVE_READDIR 1 /* Add readline support */ -#define HAVE_READLINE 1 +/* #undef HAVE_READLINE */ /* Define to 1 if you have the header file. */ -#define HAVE_READLINE_HISTORY_H 1 +/* #undef HAVE_READLINE_HISTORY_H */ /* Use the scandir lib */ /* #undef HAVE_SCANDIR */ diff --git a/debian/changelog b/debian/changelog index a2709485e44..3c1be00d664 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.12.1.1) unstable; urgency=low +clickhouse (21.13.1.1) unstable; urgency=low * Modified source code - -- clickhouse-release Tue, 02 Nov 2021 00:56:42 +0300 + -- clickhouse-release Thu, 09 Dec 2021 00:32:58 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index e84cb601c0f..9ce06939a85 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -5,7 +5,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=21.12.1.* +ARG version=21.13.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 6a6d0e7212c..28e84d359b3 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -52,7 +52,6 @@ RUN apt-get update \ llvm-${LLVM_VERSION} \ llvm-${LLVM_VERSION}-dev \ libicu-dev \ - libreadline-dev \ moreutils \ ninja-build \ pigz \ diff --git a/docker/packager/other/fuzzer.sh b/docker/packager/other/fuzzer.sh index baaf3c4bedf..431352f1126 100755 --- a/docker/packager/other/fuzzer.sh +++ b/docker/packager/other/fuzzer.sh @@ -31,5 +31,6 @@ do mv "$FUZZER_PATH" /output/fuzzers done + tar -zcvf /output/fuzzers.tar.gz /output/fuzzers rm -rf /output/fuzzers diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 96e7e73af33..bfdf65cd56c 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -5,7 +5,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=21.12.1.* +ARG version=21.13.1.* ARG gosu_ver=1.10 # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 76967da9f9a..c24c013646f 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=21.12.1.* +ARG version=21.13.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 351b4a3c541..764fa9a0f76 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -1,5 +1,5 @@ #!/bin/bash -# shellcheck disable=SC2086,SC2001,SC2046 +# shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031 set -eux set -o pipefail @@ -35,7 +35,7 @@ function clone fi git diff --name-only master HEAD | tee ci-changed-files.txt else - if [ -v COMMIT_SHA ]; then + if [ -v SHA_TO_TEST ]; then git fetch --depth 2 origin "$SHA_TO_TEST" git checkout "$SHA_TO_TEST" echo "Checked out nominal SHA $SHA_TO_TEST for master" @@ -165,7 +165,7 @@ thread apply all backtrace continue " > script.gdb - gdb -batch -command script.gdb -p $server_pid & + sudo gdb -batch -command script.gdb -p $server_pid & # Check connectivity after we attach gdb, because it might cause the server # to freeze and the fuzzer will fail. @@ -189,6 +189,7 @@ continue --receive_data_timeout_ms=10000 \ --stacktrace \ --query-fuzzer-runs=1000 \ + --testmode \ --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \ $NEW_TESTS_OPT \ > >(tail -n 100000 > fuzzer.log) \ diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index add4dad0132..89c2b19236e 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -7,7 +7,6 @@ RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get -y install \ tzdata \ python3 \ - libreadline-dev \ libicu-dev \ bsdutils \ gdb \ diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 5695be70b9a..e86f17dae70 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -21,7 +21,6 @@ RUN apt-get update \ cgroupfs-mount \ python3-pip \ tzdata \ - libreadline-dev \ libicu-dev \ bsdutils \ curl \ @@ -76,7 +75,7 @@ RUN python3 -m pip install \ minio \ protobuf \ psycopg2-binary==2.8.6 \ - pymongo \ + pymongo==3.11.0 \ pytest \ pytest-timeout \ pytest-xdist \ diff --git a/docker/test/integration/runner/compose/docker_compose_mongo.yml b/docker/test/integration/runner/compose/docker_compose_mongo.yml index e794966bd08..0bdd054420a 100644 --- a/docker/test/integration/runner/compose/docker_compose_mongo.yml +++ b/docker/test/integration/runner/compose/docker_compose_mongo.yml @@ -1,7 +1,7 @@ version: '2.3' services: mongo1: - image: mongo:3.6 + image: mongo:5.0 restart: always environment: MONGO_INITDB_ROOT_USERNAME: root @@ -9,3 +9,9 @@ services: ports: - ${MONGO_EXTERNAL_PORT}:${MONGO_INTERNAL_PORT} command: --profile=2 --verbose + + mongo2: + image: mongo:5.0 + restart: always + ports: + - "27018:27017" diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index b6a06be2ac7..401656c9d09 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -61,7 +61,7 @@ function configure cp -rv right/config left ||: # Start a temporary server to rename the tables - while killall clickhouse-server; do echo . ; sleep 1 ; done + while pkill clickhouse-serv; do echo . ; sleep 1 ; done echo all killed set -m # Spawn temporary in its own process groups @@ -88,7 +88,7 @@ function configure clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||: clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||: - while killall clickhouse-server; do echo . ; sleep 1 ; done + while pkill clickhouse-serv; do echo . ; sleep 1 ; done echo all killed # Make copies of the original db for both servers. Use hardlinks instead @@ -106,7 +106,7 @@ function configure function restart { - while killall clickhouse-server; do echo . ; sleep 1 ; done + while pkill clickhouse-serv; do echo . ; sleep 1 ; done echo all killed # Change the jemalloc settings here. @@ -261,16 +261,24 @@ function run_tests # Use awk because bash doesn't support floating point arithmetic. profile_seconds=$(awk "BEGIN { print ($profile_seconds_left > 0 ? 10 : 0) }") - TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n") - # The grep is to filter out set -x output and keep only time output. - # The '2>&1 >/dev/null' redirects stderr to stdout, and discards stdout. - { \ - time "$script_dir/perf.py" --host localhost localhost --port $LEFT_SERVER_PORT $RIGHT_SERVER_PORT \ - --runs "$CHPC_RUNS" --max-queries "$CHPC_MAX_QUERIES" \ - --profile-seconds "$profile_seconds" \ - -- "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; \ - } 2>&1 >/dev/null | tee >(grep -v ^+ >> "wall-clock-times.tsv") \ - || echo "Test $test_name failed with error code $?" >> "$test_name-err.log" + ( + set +x + argv=( + --host localhost localhost + --port "$LEFT_SERVER_PORT" "$RIGHT_SERVER_PORT" + --runs "$CHPC_RUNS" + --max-queries "$CHPC_MAX_QUERIES" + --profile-seconds "$profile_seconds" + + "$test" + ) + TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n") + # one more subshell to suppress trace output for "set +x" + ( + time "$script_dir/perf.py" "${argv[@]}" > "$test_name-raw.tsv" 2> "$test_name-err.log" + ) 2>>wall-clock-times.tsv >/dev/null \ + || echo "Test $test_name failed with error code $?" >> "$test_name-err.log" + ) 2>/dev/null profile_seconds_left=$(awk -F' ' \ 'BEGIN { s = '$profile_seconds_left'; } /^profile-total/ { s -= $2 } END { print s }' \ @@ -278,8 +286,6 @@ function run_tests current_test=$((current_test + 1)) done - unset TIMEFORMAT - wait } @@ -291,7 +297,7 @@ function get_profiles_watchdog for pid in $(pgrep -f clickhouse) do - gdb -p "$pid" --batch --ex "info proc all" --ex "thread apply all bt" --ex quit &> "$pid.gdb.log" & + sudo gdb -p "$pid" --batch --ex "info proc all" --ex "thread apply all bt" --ex quit &> "$pid.gdb.log" & done wait @@ -1409,7 +1415,7 @@ case "$stage" in while env kill -- -$watchdog_pid ; do sleep 1; done # Stop the servers to free memory for the subsequent query analysis. - while killall clickhouse; do echo . ; sleep 1 ; done + while pkill clickhouse-serv; do echo . ; sleep 1 ; done echo Servers stopped. ;& "analyze_queries") diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 301c5cc7d73..e4366852232 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -354,11 +354,9 @@ for query_index in queries_to_run: print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}') if elapsed > args.max_query_seconds: - # Stop processing pathologically slow queries, to avoid timing out - # the entire test task. This shouldn't really happen, so we don't - # need much handling for this case and can just exit. + # Do not stop processing pathologically slow queries, + # since this may hide errors in other queries. print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr) - exit(2) # Be careful with the counter, after this line it's the next iteration # already. diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 680392df43e..8202a07f017 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -61,6 +61,7 @@ chmod 777 -R /var/lib/clickhouse clickhouse-client --query "SHOW DATABASES" clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary" + service clickhouse-server restart # Wait for server to start accepting connections @@ -109,15 +110,25 @@ function run_tests() fi set +e - clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \ + clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \ + --skip 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" \ "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt + + clickhouse-test --timeout 1200 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \ + 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt + set -e } export -f run_tests timeout "$MAX_RUN_TIME" bash -c run_tests ||: -./process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +echo "Files in current directory" +ls -la ./ +echo "Files in root directory" +ls -la / + +/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 7de8c061673..05d26924b15 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -49,7 +49,6 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 ENV MAX_RUN_TIME=0 - # Download Minio-related binaries RUN wget 'https://dl.min.io/server/minio/release/linux-amd64/minio' \ && chmod +x ./minio \ diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 93f64fdec66..d6d9f189e89 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -96,6 +96,13 @@ function run_tests() ADDITIONAL_OPTIONS+=('8') fi + if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then + ADDITIONAL_OPTIONS+=('--run-by-hash-num') + ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_NUM") + ADDITIONAL_OPTIONS+=('--run-by-hash-total') + ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_TOTAL") + fi + set +e clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ @@ -108,7 +115,12 @@ export -f run_tests timeout "$MAX_RUN_TIME" bash -c run_tests ||: -./process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +echo "Files in current directory" +ls -la ./ +echo "Files in root directory" +ls -la / + +/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv clickhouse-client -q "system flush logs" ||: diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 673e4c11570..2ed4050d514 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -1,6 +1,7 @@ #!/bin/bash # shellcheck disable=SC2094 # shellcheck disable=SC2086 +# shellcheck disable=SC2024 set -x @@ -55,9 +56,41 @@ function configure() echo "1" \ > /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml + local total_mem + total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB + total_mem=$(( total_mem*1024 )) # bytes # Set maximum memory usage as half of total memory (less chance of OOM). - echo "0.5" \ - > /etc/clickhouse-server/config.d/max_server_memory_usage_to_ram_ratio.xml + # + # But not via max_server_memory_usage but via max_memory_usage_for_user, + # so that we can override this setting and execute service queries, like: + # - hung check + # - show/drop database + # - ... + # + # So max_memory_usage_for_user will be a soft limit, and + # max_server_memory_usage will be hard limit, and queries that should be + # executed regardless memory limits will use max_memory_usage_for_user=0, + # instead of relying on max_untracked_memory + local max_server_mem + max_server_mem=$((total_mem*75/100)) # 75% + echo "Setting max_server_memory_usage=$max_server_mem" + cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml < + ${max_server_mem} + +EOL + local max_users_mem + max_users_mem=$((total_mem*50/100)) # 50% + echo "Setting max_memory_usage_for_user=$max_users_mem" + cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml < + + + ${max_users_mem} + + + +EOL } function stop() @@ -110,7 +143,7 @@ quit # FIXME Hung check may work incorrectly because of attached gdb # 1. False positives are possible # 2. We cannot attach another gdb to get stacktraces if some queries hung - gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log & + sudo gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log & } configure diff --git a/docker/test/stress/stress b/docker/test/stress/stress index acb45b05636..c89c5ff5e27 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -75,6 +75,9 @@ def call_with_retry(query, timeout=30, retry_count=5): else: break +def make_query_command(query): + return f"""clickhouse client -q "{query}" --max_untracked_memory=1Gi --memory_profiler_step=1Gi --max_memory_usage_for_user=0""" + def prepare_for_hung_check(drop_databases): # FIXME this function should not exist, but... @@ -88,40 +91,41 @@ def prepare_for_hung_check(drop_databases): logging.info("Will terminate gdb (if any)") call_with_retry("kill -TERM $(pidof gdb)") - # Some tests set too low memory limit for default user and forget to reset in back. - # It may cause SYSTEM queries to fail, let's disable memory limit. - call_with_retry("clickhouse client --max_memory_usage_for_user=0 -q 'SELECT 1 FORMAT Null'") + call_with_retry(make_query_command('SELECT 1 FORMAT Null')) # Some tests execute SYSTEM STOP MERGES or similar queries. # It may cause some ALTERs to hang. # Possibly we should fix tests and forbid to use such queries without specifying table. - call_with_retry("clickhouse client -q 'SYSTEM START MERGES'") - call_with_retry("clickhouse client -q 'SYSTEM START DISTRIBUTED SENDS'") - call_with_retry("clickhouse client -q 'SYSTEM START TTL MERGES'") - call_with_retry("clickhouse client -q 'SYSTEM START MOVES'") - call_with_retry("clickhouse client -q 'SYSTEM START FETCHES'") - call_with_retry("clickhouse client -q 'SYSTEM START REPLICATED SENDS'") - call_with_retry("clickhouse client -q 'SYSTEM START REPLICATION QUEUES'") + call_with_retry(make_query_command('SYSTEM START MERGES')) + call_with_retry(make_query_command('SYSTEM START DISTRIBUTED SENDS')) + call_with_retry(make_query_command('SYSTEM START TTL MERGES')) + call_with_retry(make_query_command('SYSTEM START MOVES')) + call_with_retry(make_query_command('SYSTEM START FETCHES')) + call_with_retry(make_query_command('SYSTEM START REPLICATED SENDS')) + call_with_retry(make_query_command('SYSTEM START REPLICATION QUEUES')) + call_with_retry(make_query_command('SYSTEM DROP MARK CACHE')) # Issue #21004, live views are experimental, so let's just suppress it - call_with_retry("""clickhouse client -q "KILL QUERY WHERE upper(query) LIKE 'WATCH %'" """) + call_with_retry(make_query_command("KILL QUERY WHERE upper(query) LIKE 'WATCH %'")) # Kill other queries which known to be slow # It's query from 01232_preparing_sets_race_condition_long, it may take up to 1000 seconds in slow builds - call_with_retry("""clickhouse client -q "KILL QUERY WHERE query LIKE 'insert into tableB select %'" """) + call_with_retry(make_query_command("KILL QUERY WHERE query LIKE 'insert into tableB select %'")) # Long query from 00084_external_agregation - call_with_retry("""clickhouse client -q "KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'" """) + call_with_retry(make_query_command("KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'")) if drop_databases: for i in range(5): try: # Here we try to drop all databases in async mode. If some queries really hung, than drop will hung too. # Otherwise we will get rid of queries which wait for background pool. It can take a long time on slow builds (more than 900 seconds). - databases = check_output('clickhouse client -q "SHOW DATABASES"', shell=True, timeout=30).decode('utf-8').strip().split() + # + # Also specify max_untracked_memory to allow 1GiB of memory to overcommit. + databases = check_output(make_query_command('SHOW DATABASES'), shell=True, timeout=30).decode('utf-8').strip().split() for db in databases: if db == "system": continue - command = f'clickhouse client -q "DROP DATABASE {db}"' + command = make_query_command(f'DROP DATABASE {db}') # we don't wait for drop Popen(command, shell=True) break @@ -133,9 +137,15 @@ def prepare_for_hung_check(drop_databases): # Wait for last queries to finish if any, not longer than 300 seconds - call("""clickhouse client -q "select sleepEachRow(( - select maxOrDefault(300 - elapsed) + 1 from system.processes where query not like '%from system.processes%' and elapsed < 300 - ) / 300) from numbers(300) format Null" """, shell=True, stderr=STDOUT, timeout=330) + call(make_query_command(""" + select sleepEachRow(( + select maxOrDefault(300 - elapsed) + 1 + from system.processes + where query not like '%from system.processes%' and elapsed < 300 + ) / 300) + from numbers(300) + format Null + """), shell=True, stderr=STDOUT, timeout=330) # Even if all clickhouse-test processes are finished, there are probably some sh scripts, # which still run some new queries. Let's ignore them. @@ -188,7 +198,24 @@ if __name__ == "__main__": if args.hung_check: have_long_running_queries = prepare_for_hung_check(args.drop_databases) logging.info("Checking if some queries hung") - cmd = "{} {} {}".format(args.test_cmd, "--hung-check", "00001_select_1") + cmd = ' '.join([args.test_cmd, + # Do not track memory allocations up to 1Gi, + # this will allow to ignore server memory limit (max_server_memory_usage) for this query. + # + # NOTE: memory_profiler_step should be also adjusted, because: + # + # untracked_memory_limit = min(settings.max_untracked_memory, settings.memory_profiler_step) + # + # NOTE: that if there will be queries with GROUP BY, this trick + # will not work due to CurrentMemoryTracker::check() from + # Aggregator code. + # But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY. + "--client-option", "max_untracked_memory=1Gi", + "--client-option", "max_memory_usage_for_user=0", + "--client-option", "memory_profiler_step=1Gi", + "--hung-check", + "00001_select_1" + ]) res = call(cmd, shell=True, stderr=STDOUT) hung_check_status = "No queries hung\tOK\n" if res != 0 and have_long_running_queries: diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index 8ea3cd46973..d15f237587b 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -21,7 +21,6 @@ RUN apt-get update \ cgroupfs-mount \ python3-pip \ tzdata \ - libreadline-dev \ libicu-dev \ bsdutils \ curl \ diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index d08de080e6b..4b7473f76ad 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -262,7 +262,7 @@ In the example below, the index can’t be used. SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' ``` -To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](../../../operations/settings/settings.md#settings-force_index_by_date) and [force_primary_key](../../../operations/settings/settings.md). +To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](../../../operations/settings/settings.md#settings-force_index_by_date) and [force_primary_key](../../../operations/settings/settings.md#force-primary-key). The key for partitioning by month allows reading only those data blocks which contain dates from the proper range. In this case, the data block may contain data for many dates (up to an entire month). Within a block, data is sorted by primary key, which might not contain the date as the first column. Because of this, using a query with only a date condition that does not specify the primary key prefix will cause more data to be read than for a single date. diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md new file mode 100644 index 00000000000..b30715082ec --- /dev/null +++ b/docs/en/interfaces/grpc.md @@ -0,0 +1,99 @@ +--- +toc_priority: 19 +toc_title: gRPC Interface +--- + +# gRPC Interface {#grpc-interface} + +## Introduction {#grpc-interface-introduction} + +ClickHouse supports [gRPC](https://grpc.io/) interface. It is an open source remote procedure call system that uses HTTP/2 and [Protocol Buffers](https://en.wikipedia.org/wiki/Protocol_Buffers). The implementation of gRPC in ClickHouse supports: + +- SSL; +- authentication; +- sessions; +- compression; +- parallel queries through the same channel; +- cancellation of queries; +- getting progress and logs; +- external tables. + +The specification of the interface is described in [clickhouse_grpc.proto](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto). + +## gRPC Configuration {#grpc-interface-configuration} + +To use the gRPC interface set `grpc_port` in the main [server configuration](../operations/configuration-files.md). Other configuration options see in the following example: + +```xml +9100 + + false + + + /path/to/ssl_cert_file + /path/to/ssl_key_file + + + false + + + /path/to/ssl_ca_cert_file + + + deflate + + + medium + + + -1 + -1 + + + false + +``` + +## Built-in Client {#grpc-client} + +You can write a client in any of the programming languages supported by gRPC using the provided [specification](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto). +Or you can use a built-in Python client. It is placed in [utils/grpc-client/clickhouse-grpc-client.py](https://github.com/ClickHouse/ClickHouse/blob/master/utils/grpc-client/clickhouse-grpc-client.py) in the repository. The built-in client requires [grpcio and grpcio-tools](https://grpc.io/docs/languages/python/quickstart) Python modules. + +The client supports the following arguments: + +- `--help` – Shows a help message and exits. +- `--host HOST, -h HOST` – A server name. Default value: `localhost`. You can use IPv4 or IPv6 addresses also. +- `--port PORT` – A port to connect to. This port should be enabled in the ClickHouse server configuration (see `grpc_port`). Default value: `9100`. +- `--user USER_NAME, -u USER_NAME` – A user name. Default value: `default`. +- `--password PASSWORD` – A password. Default value: empty string. +- `--query QUERY, -q QUERY` – A query to process when using non-interactive mode. +- `--database DATABASE, -d DATABASE` – A default database. If not specified, the current database set in the server settings is used (`default` by default). +- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – A result output [format](formats.md). Default value for interactive mode: `PrettyCompact`. +- `--debug` – Enables showing debug information. + +To run the client in an interactive mode call it without `--query` argument. + +In a batch mode query data can be passed via `stdin`. + +**Client Usage Example** + +In the following example a table is created and loaded with data from a CSV file. Then the content of the table is queried. + +``` bash +./clickhouse-grpc-client.py -q "CREATE TABLE grpc_example_table (id UInt32, text String) ENGINE = MergeTree() ORDER BY id;" +echo "0,Input data for" > a.txt ; echo "1,gRPC protocol example" >> a.txt +cat a.txt | ./clickhouse-grpc-client.py -q "INSERT INTO grpc_example_table FORMAT CSV" + +./clickhouse-grpc-client.py --format PrettyCompact -q "SELECT * FROM grpc_example_table;" +``` + +Result: + +``` text +┌─id─┬─text──────────────────┐ +│ 0 │ Input data for │ +│ 1 │ gRPC protocol example │ +└────┴───────────────────────┘ +``` diff --git a/docs/en/interfaces/index.md b/docs/en/interfaces/index.md index 10f15ae47d6..7b73cec22a0 100644 --- a/docs/en/interfaces/index.md +++ b/docs/en/interfaces/index.md @@ -6,10 +6,11 @@ toc_title: Introduction # Interfaces {#interfaces} -ClickHouse provides two network interfaces (both can be optionally wrapped in TLS for additional security): +ClickHouse provides three network interfaces (they can be optionally wrapped in TLS for additional security): - [HTTP](http.md), which is documented and easy to use directly. - [Native TCP](../interfaces/tcp.md), which has less overhead. +- [gRPC](grpc.md). In most cases it is recommended to use appropriate tool or library instead of interacting with those directly. Officially supported by Yandex are the following: @@ -24,4 +25,3 @@ There are also a wide range of third-party libraries for working with ClickHouse - [Integrations](../interfaces/third-party/integrations.md) - [Visual interfaces](../interfaces/third-party/gui.md) -[Original article](https://clickhouse.com/docs/en/interfaces/) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index b1c53b61b12..eb4673be18a 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -105,7 +105,7 @@ ClickHouse Keeper is bundled into the ClickHouse server package, just add config clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon ``` -## Four Latter Word Commands +## Four Letter Word Commands ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 0fd1e54955c..af75d130ed3 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -356,3 +356,24 @@ Possible values: - 1 — Parts are detached. Default value: `0`. + +## merge_tree_clear_old_temporary_directories_interval_seconds {#setting-merge-tree-clear-old-temporary-directories-interval-seconds} + +Sets the interval in seconds for ClickHouse to execute the cleanup of old temporary directories. + +Possible values: + +- Any positive integer. + +Default value: `60` seconds. + +## merge_tree_clear_old_parts_interval_seconds {#setting-merge-tree-clear-old-parts-interval-seconds} + +Sets the interval in seconds for ClickHouse to execute the cleanup of old parts, WALs, and mutations. + +Possible values: + +- Any positive integer. + +Default value: `1` second. + diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index cea0a47781f..30d129d9b29 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -885,26 +885,6 @@ Possible values: Default value: 2013265920. -## merge_tree_clear_old_temporary_directories_interval_seconds {#setting-merge-tree-clear-old-temporary-directories-interval-seconds} - -Sets the interval in seconds for ClickHouse to execute the cleanup of old temporary directories. - -Possible values: - -- Any positive integer. - -Default value: `60` seconds. - -## merge_tree_clear_old_parts_interval_seconds {#setting-merge-tree-clear-old-parts-interval-seconds} - -Sets the interval in seconds for ClickHouse to execute the cleanup of old parts, WALs, and mutations. - -Possible values: - -- Any positive integer. - -Default value: `1` second. - ## min_bytes_to_use_direct_io {#settings-min-bytes-to-use-direct-io} The minimum data volume required for using direct I/O access to the storage disk. @@ -992,9 +972,16 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING' Setting up query threads logging. -Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter. +Query threads log into [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting have effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter. -Example: +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: `1`. + +**Example** ``` text log_query_threads=1 @@ -4057,6 +4044,41 @@ Possible values: Default value: `0`. +## alter_partition_verbose_result {#alter-partition-verbose-result} + +Enables or disables the display of information about the parts to which the manipulation operations with partitions and parts have been successfully applied. +Applicable to [ATTACH PARTITION|PART](../../sql-reference/statements/alter/partition.md#alter_attach-partition) and to [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md#alter_freeze-partition). + +Possible values: + +- 0 — disable verbosity. +- 1 — enable verbosity. + +Default value: `0`. + +**Example** + +```sql +CREATE TABLE test(a Int64, d Date, s String) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY a; +INSERT INTO test VALUES(1, '2021-01-01', ''); +INSERT INTO test VALUES(1, '2021-01-01', ''); +ALTER TABLE test DETACH PARTITION ID '202101'; + +ALTER TABLE test ATTACH PARTITION ID '202101' SETTINGS alter_partition_verbose_result = 1; + +┌─command_type─────┬─partition_id─┬─part_name────┬─old_part_name─┐ +│ ATTACH PARTITION │ 202101 │ 202101_7_7_0 │ 202101_5_5_0 │ +│ ATTACH PARTITION │ 202101 │ 202101_8_8_0 │ 202101_6_6_0 │ +└──────────────────┴──────────────┴──────────────┴───────────────┘ + +ALTER TABLE test FREEZE SETTINGS alter_partition_verbose_result = 1; + +┌─command_type─┬─partition_id─┬─part_name────┬─backup_name─┬─backup_path───────────────────┬─part_backup_path────────────────────────────────────────────┐ +│ FREEZE ALL │ 202101 │ 202101_7_7_0 │ 8 │ /var/lib/clickhouse/shadow/8/ │ /var/lib/clickhouse/shadow/8/data/default/test/202101_7_7_0 │ +│ FREEZE ALL │ 202101 │ 202101_8_8_0 │ 8 │ /var/lib/clickhouse/shadow/8/ │ /var/lib/clickhouse/shadow/8/data/default/test/202101_8_8_0 │ +└──────────────┴──────────────┴──────────────┴─────────────┴───────────────────────────────┴─────────────────────────────────────────────────────────────┘ +``` + ## format_capn_proto_enum_comparising_mode {#format-capn-proto-enum-comparising-mode} Determines how to map ClickHouse `Enum` data type and [CapnProto](../../interfaces/formats.md#capnproto) `Enum` data type from schema. diff --git a/docs/en/operations/system-tables/query_views_log.md b/docs/en/operations/system-tables/query_views_log.md index 25d7f6522f5..6a6bbef45e2 100644 --- a/docs/en/operations/system-tables/query_views_log.md +++ b/docs/en/operations/system-tables/query_views_log.md @@ -4,8 +4,8 @@ Contains information about the dependent views executed when running a query, fo To start logging: -1. Configure parameters in the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) section. -2. Set [log_query_views](../../operations/settings/settings.md#settings-log-query-views) to 1. +1. Configure parameters in the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) section. +2. Set [log_query_views](../../operations/settings/settings.md#settings-log-query-views) to 1. The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query. diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 6866c4db491..477d3b52965 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -34,7 +34,7 @@ Use `perf top` to watch the time spent in the kernel for memory management. Permanent huge pages also do not need to be allocated. !!! warning "Attention" - If your system has less than 16 GB of RAM you may experience various memory exceptions because default settings does not match this amount of RAM. Recommended amount of RAM is 32 GB or more. You can use ClickHouse in system with small amount of RAM, even with 2 GB of RAM, but it requires an additional tuning and able to process small ingestion rate. + If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. The recommended amount of RAM is 32 GB or more. You can use ClickHouse in a system with a small amount of RAM, even with 2 GB of RAM, but it requires additional tuning and can ingest at a low rate. ## Storage Subsystem {#storage-subsystem} diff --git a/docs/en/operations/utilities/odbc-bridge.md b/docs/en/operations/utilities/odbc-bridge.md index 70b413c9c1f..e5967085c49 100644 --- a/docs/en/operations/utilities/odbc-bridge.md +++ b/docs/en/operations/utilities/odbc-bridge.md @@ -26,7 +26,7 @@ Query is send in post body. Response is returned in RowBinary format. ```bash $ clickhouse-odbc-bridge --http-port 9018 --daemon -$ curl -d "query=SELECT PageID, ImpID, AdType FROM Keys ORDER BY PageID, ImpID" --data-urlencode "connection_string=DSN=ClickHouse;DATABASE=stat" --data-urlencode "columns=columns format version: 1 +$ curl -d "query=SELECT PageID, ImpID, AdType FROM Keys ORDER BY PageID, ImpID" --data-urlencode "connection_string=DSN=ClickHouse;DATABASE=stat" --data-urlencode "sample_block=columns format version: 1 3 columns: \`PageID\` String \`ImpID\` String diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md new file mode 100644 index 00000000000..47c696129c7 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -0,0 +1,64 @@ +--- +toc_priority: 311 +toc_title: sparkbar +--- + +# sparkbar {#sparkbar} + +The function plots a frequency histogram for values `x` and the repetition rate `y` of these values over the interval `[min_x, max_x]`. + + +If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end. + +**Syntax** + +``` sql +sparkbar(width[, min_x, max_x])(x, y) +``` + +**Parameters** + +- `width` — The number of segments. Type: [Integer](../../../sql-reference/data-types/int-uint.md). +- `min_x` — The interval start. Optional parameter. +- `max_x` — The interval end. Optional parameter. + +**Arguments** + +- `x` — The field with values. +- `y` — The field with the frequency of values. + +**Returned value** + +- The frequency histogram. + +**Example** + +Query: + +``` sql +CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192; + +INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11'); + +SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data; + +SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data; +``` + +Result: + +``` text + +┌─sparkbar(9)(event_date, cnt)─┐ +│ │ +│ ▁▅▄▃██▅ ▁ │ +│ │ +└──────────────────────────────┘ + +┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐ +│ │ +│▁▄▄▂▅▇█▁ │ +│ │ +└──────────────────────────────────────────────────────────────────────────┘ +``` + diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md new file mode 100644 index 00000000000..2ea44a6e585 --- /dev/null +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -0,0 +1,112 @@ +--- +toc_priority: 68 +toc_title: Time Window +--- + +# Time Window Functions {#time-window-functions} + +Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below: + +## tumble {#time-window-functions-tumble} + +A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`). + +``` sql +tumble(time_attr, interval [, timezone]) +``` + +**Arguments** +- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. +- `interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +**Returned values** + +- The inclusive lower and exclusive upper bound of the corresponding tumbling window. + +Type: `Tuple(DateTime, DateTime)` + +**Example** + +Query: + +``` sql +SELECT tumble(now(), toIntervalDay('1')) +``` + +Result: + +``` text +┌─tumble(now(), toIntervalDay('1'))─────────────┐ +│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │ +└───────────────────────────────────────────────┘ +``` + +## hop {#time-window-functions-hop} + +A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. + +``` sql +hop(time_attr, hop_interval, window_interval [, timezone]) +``` + +**Arguments** + +- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. +- `hop_interval` - Hop interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. +- `window_interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +**Returned values** + +- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. + +Type: `Tuple(DateTime, DateTime)` + +**Example** + +Query: + +``` sql +SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) +``` + +Result: + +``` text +┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐ +│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │ +└───────────────────────────────────────────────────────────┘ +``` + +## tumbleStart {#time-window-functions-tumblestart} + +Returns the inclusive lower bound of the corresponding tumbling window. + +``` sql +tumbleStart(time_attr, interval [, timezone]); +``` + +## tumbleEnd {#time-window-functions-tumbleend} + +Returns the exclusive upper bound of the corresponding tumbling window. + +``` sql +tumbleEnd(time_attr, interval [, timezone]); +``` + +## hopStart {#time-window-functions-hopstart} + +Returns the inclusive lower bound of the corresponding hopping window. + +``` sql +hopStart(time_attr, hop_interval, window_interval [, timezone]); +``` + +## hopEnd {#time-window-functions-hopend} + +Returns the exclusive upper bound of the corresponding hopping window. + +``` sql +hopEnd(time_attr, hop_interval, window_interval [, timezone]); +``` \ No newline at end of file diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index b72b75d6de6..8d06e8ea1cc 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -175,6 +175,7 @@ in which the `Strings` represents the named fields of the tuple and `T` are the ``` sql tupleToNameValuePairs(tuple) +``` **Arguments** @@ -196,7 +197,7 @@ CREATE TABLE tupletest (`col` Tuple(user_ID UInt64, session_ID UInt64) ENGINE = INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100)); SELECT tupleToNameValuePairs(col) FROM tupletest; -``` +``` Result: diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index ec34c57a4cd..8d7d7b48c05 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -5,7 +5,7 @@ toc_title: VIEW # CREATE VIEW {#create-view} -Creates a new view. Views can be [normal](#normal), [materialized](#materialized) and [live](#live-view) (the latter is an experimental feature). +Creates a new view. Views can be [normal](#normal), [materialized](#materialized), [live](#live-view), and [window](#window-view) (live view and window view are experimental features). ## Normal View {#normal} @@ -243,3 +243,119 @@ Most common uses of live view tables include: **See Also** - [ALTER LIVE VIEW](../alter/view.md#alter-live-view) + +## Window View [Experimental] {#window-view} + +!!! important "Important" + This is an experimental feature that may change in backwards-incompatible ways in the future releases. + Enable usage of window views and `WATCH` query using [allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view) setting. Input the command `set allow_experimental_window_view = 1`. + +``` sql +CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY time_window_function +``` + +Window view can aggregate data by time window and output the results when the window is ready to fire. It stores the partial aggregation results in an inner(or specified) table to reduce latency and can push the processing result to a specified table or push notifications using the WATCH query. + +Creating a window view is similar to creating `MATERIALIZED VIEW`. Window view needs an inner storage engine to store intermediate data. The inner storage will use `AggregatingMergeTree` as the default engine. + +### Time Window Functions {#window-view-timewindowfunctions} + +[Time window functions](../../functions/time-window-functions.md) are used to get the lower and upper window bound of records. The window view needs to be used with a time window function. + +### TIME ATTRIBUTES {#window-view-timeattributes} + +Window view supports **processing time** and **event time** process. + +**Processing time** allows window view to produce results based on the local machine's time and is used by default. It is the most straightforward notion of time but does not provide determinism. The processing time attribute can be defined by setting the `time_attr` of the time window function to a table column or using the function `now()`. The following query creates a window view with processing time. + +``` sql +CREATE WINDOW VIEW wv AS SELECT count(number), tumbleStart(w_id) as w_start from date GROUP BY tumble(now(), INTERVAL '5' SECOND) as w_id +``` + +**Event time** is the time that each individual event occurred on its producing device. This time is typically embedded within the records when it is generated. Event time processing allows for consistent results even in case of out-of-order events or late events. Window view supports event time processing by using `WATERMARK` syntax. + +Window view provides three watermark strategies: + +* `STRICTLY_ASCENDING`: Emits a watermark of the maximum observed timestamp so far. Rows that have a timestamp smaller to the max timestamp are not late. +* `ASCENDING`: Emits a watermark of the maximum observed timestamp so far minus 1. Rows that have a timestamp equal and smaller to the max timestamp are not late. +* `BOUNDED`: WATERMARK=INTERVAL. Emits watermarks, which are the maximum observed timestamp minus the specified delay. + +The following queries are examples of creating a window view with `WATERMARK`: + +``` sql +CREATE WINDOW VIEW wv WATERMARK=STRICTLY_ASCENDING AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND); +CREATE WINDOW VIEW wv WATERMARK=ASCENDING AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND); +CREATE WINDOW VIEW wv WATERMARK=INTERVAL '3' SECOND AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND); +``` + +By default, the window will be fired when the watermark comes, and elements that arrived behind the watermark will be dropped. Window view supports late event processing by setting `ALLOWED_LATENESS=INTERVAL`. An example of lateness handling is: + +``` sql +CREATE WINDOW VIEW test.wv TO test.dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTERVAL '2' SECOND AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM test.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid; +``` + +Note that elements emitted by a late firing should be treated as updated results of a previous computation. Instead of firing at the end of windows, the window view will fire immediately when the late event arrives. Thus, it will result in multiple outputs for the same window. Users need to take these duplicated results into account or deduplicate them. + +### Monitoring New Windows {#window-view-monitoring} + +Window view supports the `WATCH` query to constantly append the processing results to the console or use `TO` syntax to output the results to a table. + +``` sql +WATCH [db.]name [LIMIT n] +``` + +`WATCH` query acts similar as in `LIVE VIEW`. A `LIMIT` can be specified to set the number of updates to receive before terminating the query. + +### Settings {#window-view-settings} + +- `window_view_clean_interval`: The clean interval of window view in seconds to free outdated data. The system will retain the windows that have not been fully triggered according to the system time or `WATERMARK` configuration, and the other data will be deleted. +- `window_view_heartbeat_interval`: The heartbeat interval in seconds to indicate the watch query is alive. + +### Example {#window-view-example} + +Suppose we need to count the number of click logs per 10 seconds in a log table called `data`, and its table structure is: + +``` sql +CREATE TABLE data ( `id` UInt64, `timestamp` DateTime) ENGINE = Memory; +``` + +First, we create a window view with tumble window of 10 seconds interval: + +``` sql +CREATE WINDOW VIEW wv as select count(id), tumbleStart(w_id) as window_start from data group by tumble(timestamp, INTERVAL '10' SECOND) as w_id +``` + +Then, we use the `WATCH` query to get the results. + +``` sql +WATCH wv +``` + +When logs are inserted into table `data`, + +``` sql +INSERT INTO data VALUES(1,now()) +``` + +The `WATCH` query should print the results as follows: + +``` text +┌─count(id)─┬────────window_start─┐ +│ 1 │ 2020-01-14 16:56:40 │ +└───────────┴─────────────────────┘ +``` + +Alternatively, we can attach the output to another table using `TO` syntax. + +``` sql +CREATE WINDOW VIEW wv TO dst AS SELECT count(id), tumbleStart(w_id) as window_start FROM data GROUP BY tumble(timestamp, INTERVAL '10' SECOND) as w_id +``` + +Additional examples can be found among stateful tests of ClickHouse (they are named `*window_view*` there). + +### Window View Usage {#window-view-usage} + +The window view is useful in the following scenarios: + +* **Monitoring**: Aggregate and calculate the metrics logs by time, and output the results to a target table. The dashboard can use the target table as a source table. +* **Analyzing**: Automatically aggregate and preprocess data in the time window. This can be useful when analyzing a large number of logs. The preprocessing eliminates repeated calculations in multiple queries and reduces query latency. diff --git a/docs/ru/interfaces/grpc.md b/docs/ru/interfaces/grpc.md new file mode 100644 index 00000000000..89032c9372c --- /dev/null +++ b/docs/ru/interfaces/grpc.md @@ -0,0 +1,99 @@ +--- +toc_priority: 18 +toc_title: gRPC интерфейс +--- + +# Интерфейс gRPC {#grpc-interface} + +## Введение {#grpc-interface-introduction} + +ClickHouse поддерживает интерфейс [gRPC](https://grpc.io/). Это система удаленного вызова процедур с открытым исходным кодом, которая использует HTTP/2 и [Protocol Buffers](https://ru.wikipedia.org/wiki/Protocol_Buffers). В реализации gRPC в ClickHouse поддерживаются: + +- SSL; +- аутентификация; +- сессии; +- сжатие; +- параллельные запросы, выполняемые через один канал; +- отмена запросов; +- получение прогресса операций и логов; +- внешние таблицы. + +Спецификация интерфейса содержится в [clickhouse_grpc.proto](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto). + +## Конфигурация gRPC {#grpc-interface-configuration} + +Чтобы сделать доступным интерфейс gRPC, нужно задать порт с помощью настройки `grpc_port` в [конфигурации сервера](../operations/configuration-files.md). Другие настройки приведены в примере: + +```xml +9100 + + false + + + /path/to/ssl_cert_file + /path/to/ssl_key_file + + + false + + + /path/to/ssl_ca_cert_file + + + deflate + + + medium + + + -1 + -1 + + + false + +``` + +## Встроенный клиент {#grpc-client} + +Можно написать клиент на любом языке программирования, который поддерживается gRPC, с использованием [спецификации](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto). +Также можно воспользоваться встроенным Python клиентом. Он расположен в [utils/grpc-client/clickhouse-grpc-client.py](https://github.com/ClickHouse/ClickHouse/blob/master/utils/grpc-client/clickhouse-grpc-client.py) в репозитории. Для работы встроенного клиента требуются Python модули [grpcio и grpcio-tools](https://grpc.io/docs/languages/python/quickstart). + +Клиент поддерживает аргументы: + +- `--help` – вывести справку и завершить работу. +- `--host HOST, -h HOST` – имя сервера. Значение по умолчанию: `localhost`. Можно задать адрес IPv4 или IPv6. +- `--port PORT` – номер порта. Этот порт должен быть задан в конфигурации сервера ClickHouse настройкой `grpc_port`. Значение по умолчанию: `9100`. +- `--user USER_NAME, -u USER_NAME` – имя пользователя. Значение по умолчанию: `default`. +- `--password PASSWORD` – пароль. Значение по умолчанию: пустая строка. +- `--query QUERY, -q QUERY` – запрос, который выполнится, когда используется неинтерактивный режим работы. +- `--database DATABASE, -d DATABASE` – база данных по умолчанию. Если не указана, то будет использована база данных, заданная в настройках сервера (по умолчанию `default`). +- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – [формат](formats.md) вывода результата. Значение по умолчанию для интерактивного режима: `PrettyCompact`. +- `--debug` – вывод отладочной информации. + +Чтобы запустить клиент в интерактивном режиме, не указывайте аргумент `--query`. + +В неинтерактивном режиме данные запроса можно передать через `stdin`. + +**Пример использования клиента** + +В примере создается таблица, и в нее загружаются данные из CSV файла. Затем выводится содержимое таблицы. + +``` bash +./clickhouse-grpc-client.py -q "CREATE TABLE grpc_example_table (id UInt32, text String) ENGINE = MergeTree() ORDER BY id;" +echo "0,Input data for" > a.txt ; echo "1,gRPC protocol example" >> a.txt +cat a.txt | ./clickhouse-grpc-client.py -q "INSERT INTO grpc_example_table FORMAT CSV" + +./clickhouse-grpc-client.py --format PrettyCompact -q "SELECT * FROM grpc_example_table;" +``` + +Результат: + +``` text +┌─id─┬─text──────────────────┐ +│ 0 │ Input data for │ +│ 1 │ gRPC protocol example │ +└────┴───────────────────────┘ +``` diff --git a/docs/ru/interfaces/index.md b/docs/ru/interfaces/index.md index 12e8853823e..b23a402e0b7 100644 --- a/docs/ru/interfaces/index.md +++ b/docs/ru/interfaces/index.md @@ -6,12 +6,13 @@ toc_title: "Введение" # Интерфейсы {#interfaces} -ClickHouse предоставляет два сетевых интерфейса (оба могут быть дополнительно обернуты в TLS для дополнительной безопасности): +ClickHouse предоставляет три сетевых интерфейса (они могут быть обернуты в TLS для дополнительной безопасности): - [HTTP](http.md), который задокументирован и прост для использования напрямую; -- [Native TCP](tcp.md), который имеет меньше накладных расходов. +- [Native TCP](tcp.md), который имеет меньше накладных расходов; +- [gRPC](grpc.md). -В большинстве случаев рекомендуется использовать подходящий инструмент или библиотеку, а не напрямую взаимодействовать с ClickHouse по сути. Официально поддерживаемые Яндексом: +В большинстве случаев рекомендуется использовать подходящий инструмент или библиотеку, а не напрямую взаимодействовать с ClickHouse. Официально поддерживаемые Яндексом: - [Консольный клиент](cli.md); - [JDBC-драйвер](jdbc.md); diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index c194c70ebbc..1b0c7fc5897 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -999,14 +999,14 @@ ClickHouse проверяет условия для `min_part_size` и `min_part Настройки логирования информации о зависимых представлениях (materialized, live и т.п.) в запросах принятых с настройкой [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views). -Запросы сохраняются в таблицу system.query_views_log. Вы можете изменить название этой таблицы в параметре `table` (см. ниже). +Запросы логируются в таблице [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). При настройке логирования используются следующие параметры: - `database` – имя базы данных. -- `table` – имя таблицы куда будут записываться использованные представления. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `table` – имя системной таблицы, где будут логироваться запросы. +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать, если задан параметр `engine`. +- `engine` — устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать, если задан параметр `partition_by`. - `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md index 31cc229c6aa..e30539498b3 100644 --- a/docs/ru/operations/settings/merge-tree-settings.md +++ b/docs/ru/operations/settings/merge-tree-settings.md @@ -355,3 +355,23 @@ Eсли суммарное число активных кусков во все - 1 — куски данных открепляются. Значение по умолчанию: `0`. + +## merge_tree_clear_old_temporary_directories_interval_seconds {#setting-merge-tree-clear-old-temporary-directories-interval-seconds} + +Задает интервал в секундах для удаления старых временных каталогов на сервере ClickHouse. + +Возможные значения: + +- Положительное целое число. + +Значение по умолчанию: `60` секунд. + +## merge_tree_clear_old_parts_interval_seconds {#setting-merge-tree-clear-old-parts-interval-seconds} + +Задает интервал в секундах для удаления старых кусков данных, журналов предзаписи (WAL) и мутаций на сервере ClickHouse. + +Возможные значения: + +- Положительное целое число. + +Значение по умолчанию: `1` секунда. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index be4137731c0..1b4da512c9f 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -807,26 +807,6 @@ ClickHouse может парсить только базовый формат `Y Значение по умолчанию: 2013265920. -## merge_tree_clear_old_temporary_directories_interval_seconds {#setting-merge-tree-clear-old-temporary-directories-interval-seconds} - -Задает интервал в секундах для удаления старых временных каталогов на сервере ClickHouse. - -Возможные значения: - -- Положительное целое число. - -Значение по умолчанию: `60` секунд. - -## merge_tree_clear_old_parts_interval_seconds {#setting-merge-tree-clear-old-parts-interval-seconds} - -Задает интервал в секундах для удаления старых кусков данных, журналов предзаписи (WAL) и мутаций на сервере ClickHouse . - -Возможные значения: - -- Положительное целое число. - -Значение по умолчанию: `1` секунда. - ## min_bytes_to_use_direct_io {#settings-min-bytes-to-use-direct-io} Минимальный объём данных, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. @@ -912,11 +892,18 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING' ## log_query_threads {#settings-log-query-threads} -Установка логирования информации о потоках выполнения запроса. +Управляет логированием информации о потоках выполнения запросов. -Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query_thread_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log). +Информация о потоках выполнения запросов сохраняется в системной таблице [system.query_thread_log](../../operations/system-tables/query_thread_log.md). Работает только в том случае, если включена настройка [log_queries](#settings-log-queries). Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query_thread_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log). -Пример: +Возможные значения: + +- 0 — отключено. +- 1 — включено. + +Значение по умолчанию: `1`. + +**Пример** ``` text log_query_threads=1 @@ -3808,6 +3795,40 @@ SELECT * FROM positional_arguments ORDER BY 2,3; Значение по умолчанию: `0`. +## alter_partition_verbose_result {#alter-partition-verbose-result} + +Включает или отключает вывод информации о кусках, к которым были успешно применены операции манипуляции с партициями и кусками. Применимо к [ATTACH PARTITION|PART](../../sql-reference/statements/alter/partition.md#alter_attach-partition) и к [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md#alter_freeze-partition) + +Возможные значения: + +- 0 — отображение отключено. +- 1 — отображение включено. + +Значение по умолчанию: `0`. + +**Пример** + +```sql +CREATE TABLE test(a Int64, d Date, s String) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY a; +INSERT INTO test VALUES(1, '2021-01-01', ''); +INSERT INTO test VALUES(1, '2021-01-01', ''); +ALTER TABLE test DETACH PARTITION ID '202101'; + +ALTER TABLE test ATTACH PARTITION ID '202101' SETTINGS alter_partition_verbose_result = 1; + +┌─command_type─────┬─partition_id─┬─part_name────┬─old_part_name─┐ +│ ATTACH PARTITION │ 202101 │ 202101_7_7_0 │ 202101_5_5_0 │ +│ ATTACH PARTITION │ 202101 │ 202101_8_8_0 │ 202101_6_6_0 │ +└──────────────────┴──────────────┴──────────────┴───────────────┘ + +ALTER TABLE test FREEZE SETTINGS alter_partition_verbose_result = 1; + +┌─command_type─┬─partition_id─┬─part_name────┬─backup_name─┬─backup_path───────────────────┬─part_backup_path────────────────────────────────────────────┐ +│ FREEZE ALL │ 202101 │ 202101_7_7_0 │ 8 │ /var/lib/clickhouse/shadow/8/ │ /var/lib/clickhouse/shadow/8/data/default/test/202101_7_7_0 │ +│ FREEZE ALL │ 202101 │ 202101_8_8_0 │ 8 │ /var/lib/clickhouse/shadow/8/ │ /var/lib/clickhouse/shadow/8/data/default/test/202101_8_8_0 │ +└──────────────┴──────────────┴──────────────┴─────────────┴───────────────────────────────┴─────────────────────────────────────────────────────────────┘ +``` + ## format_capn_proto_enum_comparising_mode {#format-capn-proto-enum-comparising-mode} Определяет, как сопоставить тип данных ClickHouse `Enum` и тип данных `Enum` формата [CapnProto](../../interfaces/formats.md#capnproto) из схемы. diff --git a/docs/ru/operations/system-tables/query_log.md b/docs/ru/operations/system-tables/query_log.md index 644cee853cc..aa4d01a4d47 100644 --- a/docs/ru/operations/system-tables/query_log.md +++ b/docs/ru/operations/system-tables/query_log.md @@ -55,6 +55,7 @@ ClickHouse не удаляет данные из таблица автомати - `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — тип запроса. - `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена баз данных, присутствующих в запросе. - `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена таблиц, присутствующих в запросе. +- `views` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена представлений (материализованные или live), которые представленны в запросе. - `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена столбцов, присутствующих в запросе. - `projections` ([String](../../sql-reference/data-types/string.md)) — имена проекций, использованных при выполнении запроса. - `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — код исключения. diff --git a/docs/ru/operations/system-tables/query_thread_log.md b/docs/ru/operations/system-tables/query_thread_log.md index 00538c9c9ae..c23d2828520 100644 --- a/docs/ru/operations/system-tables/query_thread_log.md +++ b/docs/ru/operations/system-tables/query_thread_log.md @@ -112,5 +112,5 @@ ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'Compr **Смотрите также** -- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — описание системной таблицы `query_log`, которая содержит общую информацию о выполненных запросах. - +- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — описание системной таблицы `query_log`, которая содержит общую информацию о выполненных запросах. +- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — описание системной таблицы `query_views_log`, которая содержит информацию о всех представлениях, участвующих в выполненных запросах. diff --git a/docs/ru/operations/system-tables/query_views_log.md b/docs/ru/operations/system-tables/query_views_log.md deleted file mode 120000 index f606e4108ca..00000000000 --- a/docs/ru/operations/system-tables/query_views_log.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/system-tables/query_views_log.md \ No newline at end of file diff --git a/docs/ru/operations/system-tables/query_views_log.md b/docs/ru/operations/system-tables/query_views_log.md new file mode 100644 index 00000000000..8b1a8d387a6 --- /dev/null +++ b/docs/ru/operations/system-tables/query_views_log.md @@ -0,0 +1,84 @@ +# system.query_views_log {#system_tables-query_views_log} + +Содержит информацию о зависимых представлениях, выполняемых при выполнении запроса, например, тип представления или время выполнения. + +Чтобы начать ведение журнала: + +1. Настройте параметры в разделе [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log). +2. Включите настройку [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views). + +Период сброса данных из буфера в памяти задается в параметре `flush_interval_milliseconds` в разделе настроек сервера [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log ). Для принудительного сброса используйте запрос [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs). + +ClickHouse не удаляет данные из таблицы автоматически. Подробнее смотрите раздел [Системные таблицы](../../operations/system-tables/index.md#system-tables-introduction). + +Чтобы уменьшить количество запросов, регистрируемых в таблице `query_views_log`, вы можете включить настройку [log_queries_probability](../../operations/settings/settings.md#log-queries-probability). + +Столбцы: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата, когда произошло последнее событие с представлением. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения представления. +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения представления с точностью до микросекунд. +- `view_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — продолжительность выполнения представления (сумма его этапов) в миллисекундах. +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор начального запроса (при распределённом выполнении запроса). +- `view_name` ([String](../../sql-reference/data-types/string.md)) — имя представления. +- `view_uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — UUID представления. +- `view_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип представления. Возможные значения: + - `'Default' = 1` — [обычные представления](../../sql-reference/statements/create/view.md#normal). Не должно появляться в этом журнале. + - `'Materialized' = 2` — [материализованные представления](../../sql-reference/statements/create/view.md#materialized). + - `'Live' = 3` — [live представления](../../sql-reference/statements/create/view.md#live-view). +- `view_query` ([String](../../sql-reference/data-types/string.md)) — запрос, выполняемый представлением. +- `view_target` ([String](../../sql-reference/data-types/string.md)) — имя целевой таблицы представления. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных строк. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных байт. +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных строк. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных байт. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между объемом выделенной и освобожденной памяти в контексте этого представления. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — события профиля, которые измеряют различные показатели. Их описание можно найти в таблице [system.events](../../operations/system-tables/events.md#system_tables-events). +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — статус представления. Возможные значения: + - `'QueryStart' = 1` — успешное начало выполнения представления. Не должно отображаться. + - `'QueryFinish' = 2` — успешное завершение выполнения представления. + - `'ExceptionBeforeStart' = 3` — исключение до начала выполнения представления. + - `'ExceptionWhileProcessing' = 4` — исключение во время выполнения представления. +- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — код исключения. +- `exception` ([String](../../sql-reference/data-types/string.md)) — сообщение исключения. +- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [трассировка стека](https://ru.wikipedia.org/wiki/Трассировка_стека). Пустая строка, если запрос был успешно выполнен. + +**Пример** + +Запрос: + +``` sql +SELECT * FROM system.query_views_log LIMIT 1 \G; +``` + +Результат: + +``` text +Row 1: +────── +event_date: 2021-06-22 +event_time: 2021-06-22 13:23:07 +event_time_microseconds: 2021-06-22 13:23:07.738221 +view_duration_ms: 0 +initial_query_id: c3a1ac02-9cad-479b-af54-9e9c0a7afd70 +view_name: default.matview_inner +view_uuid: 00000000-0000-0000-0000-000000000000 +view_type: Materialized +view_query: SELECT * FROM default.table_b +view_target: default.`.inner.matview_inner` +read_rows: 4 +read_bytes: 64 +written_rows: 2 +written_bytes: 32 +peak_memory_usage: 4196188 +ProfileEvents: {'FileOpen':2,'WriteBufferFromFileDescriptorWrite':2,'WriteBufferFromFileDescriptorWriteBytes':187,'IOBufferAllocs':3,'IOBufferAllocBytes':3145773,'FunctionExecute':3,'DiskWriteElapsedMicroseconds':13,'InsertedRows':2,'InsertedBytes':16,'SelectedRows':4,'SelectedBytes':48,'ContextLock':16,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':698,'SoftPageFaults':4,'OSReadChars':463} +status: QueryFinish +exception_code: 0 +exception: +stack_trace: +``` + +**См. также** + +- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — описание системной таблицы `query_log`, которая содержит общую информацию о выполненных запросах. +- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — описание системной таблицы `query_thread_log`, которая содержит информацию о каждом потоке выполнения запроса. diff --git a/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md new file mode 100644 index 00000000000..b66d710744e --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md @@ -0,0 +1,66 @@ +--- +toc_priority: 311 +toc_title: sparkbar +--- + +# sparkbar {#sparkbar} + +Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. + +Если интервал для построения не указан, то в качестве нижней границы интервала будет взято минимальное значение `x`, а в качестве верхней границы — максимальное значение `x`. + + +**Синтаксис** + +``` sql +sparkbar(width[, min_x, max_x])(x, y) +``` + +**Параметры** + +- `width` — Количество столбцов гистограммы. Тип: [Integer](../../../sql-reference/data-types/int-uint.md). + +- `min_x` — Начало интервала. Необязательный параметр. +- `max_x` — Конец интервала. Необязательный параметр. + +**Аргументы** + +- `x` — Поле со значениями. +- `y` — Поле с частотой повторения значений. + + +**Возвращаемые значения** + +- Гистограмма частот. + +**Пример** + +Запрос: + +``` sql +CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192; + +INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11'); + +SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data; + +SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data; +``` + +Результат: + +``` text + +┌─sparkbar(9)(event_date, cnt)─┐ +│ │ +│ ▁▅▄▃██▅ ▁ │ +│ │ +└──────────────────────────────┘ + +┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐ +│ │ +│▁▄▄▂▅▇█▁ │ +│ │ +└──────────────────────────────────────────────────────────────────────────┘ +``` + diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index 5bbd760bfb6..4969bf0f2eb 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -357,7 +357,7 @@ Result: ## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} -То же, что и `multiMatchAny`, но возвращает 1 если любой pattern соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). Эта функция также находится в экспериментальном режиме и может быть очень медленной. За подробностями обращайтесь к [документации hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching). +То же, что и `multiMatchAny`, но возвращает 1 если любой шаблон соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). Эта функция основана на экспериментальной библиотеке [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) и может быть медленной для некоторых частных случаев. Производительность зависит от значения редакционного расстояния и используемых шаблонов, но всегда медленнее по сравнению с non-fuzzy вариантами. ## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} diff --git a/docs/zh/sql-reference/functions/time-window-functions.md b/docs/zh/sql-reference/functions/time-window-functions.md new file mode 100644 index 00000000000..ab28a47ad55 --- /dev/null +++ b/docs/zh/sql-reference/functions/time-window-functions.md @@ -0,0 +1,112 @@ +--- +toc_priority: 68 +toc_title: 时间窗口 +--- + +# 时间窗口函数 {#time-window-han-shu} + +时间窗口函数用于获取窗口的起始(包含边界)和结束时间(不包含边界)。系统支持的时间窗口函数如下: + +## tumble {#time-window-functions-tumble} + +tumble窗口是连续的、不重叠的固定大小(`interval`)时间窗口。 + +``` sql +tumble(time_attr, interval [, timezone]) +``` + +**参数** +- `time_attr` - [DateTime](../../sql-reference/data-types/datetime.md)类型的时间数据。 +- `interval` - [Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的窗口大小。 +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) 类型的时区(可选参数). + +**返回值** + +- tumble窗口的开始(包含边界)和结束时间(不包含边界) + +类型: `Tuple(DateTime, DateTime)` + +**示例** + +查询: + +``` sql +SELECT tumble(now(), toIntervalDay('1')) +``` + +结果: + +``` text +┌─tumble(now(), toIntervalDay('1'))─────────────┐ +│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │ +└───────────────────────────────────────────────┘ +``` + +## hop {#time-window-functions-hop} + +hop窗口是一个固定大小(`window_interval`)的时间窗口,并按照一个固定的滑动间隔(`hop_interval`)滑动。当滑动间隔小于窗口大小时,滑动窗口间存在重叠,此时一个数据可能存在于多个窗口。 + +``` sql +hop(time_attr, hop_interval, window_interval [, timezone]) +``` + +**参数** + +- `time_attr` - [DateTime](../../sql-reference/data-types/datetime.md)类型的时间数据。 +- `hop_interval` - [Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的滑动间隔,需要大于0。 +- `window_interval` - [Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的窗口大小,需要大于0。 +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) 类型的时区(可选参数)。 + +**返回值** + +- hop窗口的开始(包含边界)和结束时间(不包含边界)。由于一个数据可能存在于多个窗口,脱离window view单独调用该函数时只返回第一个窗口数据。 + +类型: `Tuple(DateTime, DateTime)` + +**示例** + +查询: + +``` sql +SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) +``` + +结果: + +``` text +┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐ +│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │ +└───────────────────────────────────────────────────────────┘ +``` + +## tumbleStart {#time-window-functions-tumblestart} + +返回tumble窗口的开始时间(包含边界)。 + +``` sql +tumbleStart(time_attr, interval [, timezone]); +``` + +## tumbleEnd {#time-window-functions-tumbleend} + +返回tumble窗口的结束时间(不包含边界)。 + +``` sql +tumbleEnd(time_attr, interval [, timezone]); +``` + +## hopStart {#time-window-functions-hopstart} + +返回hop窗口的开始时间(包含边界)。 + +``` sql +hopStart(time_attr, hop_interval, window_interval [, timezone]); +``` + +## hopEnd {#time-window-functions-hopend} + +返回hop窗口的结束时间(不包含边界)。 + +``` sql +hopEnd(time_attr, hop_interval, window_interval [, timezone]); +``` \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index da69860f068..506f1717b03 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -5,7 +5,7 @@ toc_title: VIEW # CREATE VIEW {#create-view} -创建一个新视图。 有两种类型的视图:普通视图和物化视图。 +创建一个新视图。 有两种类型的视图:普通视图,物化视图,Live视图和Window视图。 ## Normal {#normal} @@ -241,3 +241,120 @@ Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table defa - 使用定期刷新从系统表中查看指标。 [原始文章](https://clickhouse.com/docs/en/sql-reference/statements/create/view/) + +## Window View [Experimental] {#window-view} + +!!! important "重要" + 这是一项试验性功能,可能会在未来版本中以向后不兼容的方式进行更改。 + 通过[allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view)启用window view以及`WATCH`语句。输入命令 + `set allow_experimental_window_view = 1`。 + +``` sql +CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY time_window_function +``` + +Window view可以通过时间窗口聚合数据,并在满足窗口触发条件时自动触发对应窗口计算。其通过将计算状态保存降低处理延迟,支持将处理结果输出至目标表或通过`WATCH`语句输出至终端。 + +创建window view的方式和创建物化视图类似。Window view使用默认为`AggregatingMergeTree`的内部存储引擎存储计算中间状态。 + +### 时间窗口函数 {#window-view-shi-jian-chuang-kou-han-shu} + +[时间窗口函数](../../functions/time-window-functions.md)用于获取窗口的起始和结束时间。Window view需要和时间窗口函数配合使用。 + +### 时间属性 {#window-view-shi-jian-shu-xing} + +Window view 支持**处理时间**和**事件时间**两种时间类型。 + +**处理时间**为默认时间类型,该模式下window view使用本地机器时间计算窗口数据。“处理时间”时间类型计算简单,但具有不确定性。该模式下时间可以为时间窗口函数的第一个参数`time_attr`,或通过函数`now()`使用当前机器时间。下面的例子展示了使用“处理时间”创建window view的例子。 + +``` sql +CREATE WINDOW VIEW wv AS SELECT count(number), tumbleStart(w_id) as w_start from date GROUP BY tumble(now(), INTERVAL '5' SECOND) as w_id +``` + +**事件时间** 是事件真实发生的时间,该时间往往在事件发生时便嵌入数据记录。事件时间处理提供较高的确定性,可以处理乱序数据以及迟到数据。Window view通过水位线(`WATERMARK`)启用事件时间处理。 + +Window view提供如下三种水位线策略: + +* `STRICTLY_ASCENDING`: 提交观测到的最大时间作为水位线,小于最大观测时间的数据不算迟到。 +* `ASCENDING`: 提交观测到的最大时间减1作为水位线。小于或等于最大观测时间的数据不算迟到。 +* `BOUNDED`: WATERMARK=INTERVAL. 提交最大观测时间减去固定间隔(`INTERVAL`)做为水位线。 + +以下为使用`WATERMARK`创建window view的示例: + +``` sql +CREATE WINDOW VIEW wv WATERMARK=STRICTLY_ASCENDING AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND); +CREATE WINDOW VIEW wv WATERMARK=ASCENDING AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND); +CREATE WINDOW VIEW wv WATERMARK=INTERVAL '3' SECOND AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND); +``` + +通常,窗口会在水位线到达时触发,水位线到达之后的数据会被丢弃。Window view可以通过设置`ALLOWED_LATENESS=INTERVAL`来开启迟到消息处理。示例如下: + +``` sql +CREATE WINDOW VIEW test.wv TO test.dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTERVAL '2' SECOND AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM test.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid; +``` + +需要注意的是,迟到消息需要更新之前的处理结果。与在窗口结束时触发不同,迟到消息到达时window view会立即触发计算。因此,会导致同一个窗口输出多次计算结果。用户需要注意这种情况,并消除重复结果。 + +### 新窗口监控 {#window-view-xin-chuang-kou-jian-kong} + +Window view可以通过`WATCH`语句将处理结果推送至终端,或通过`TO`语句将结果推送至数据表。 + +``` sql +WATCH [db.]name [LIMIT n] +``` + +`WATCH`语句和`LIVE VIEW`中的类似。支持设置`LIMIT`参数,输出消息数目达到`LIMIT`限制时结束查询。 + +### 设置 {#window-view-she-zhi} + +- `window_view_clean_interval`: window view清除过期数据间隔(单位为秒)。系统会定期清除过期数据,尚未触发的窗口数据不会被清除。 +- `window_view_heartbeat_interval`: 用于判断watch查询活跃的心跳时间间隔。 + +### 示例 {#window-view-shi-li} + +假设我们需要每10秒统计一次`data`表中的点击日志,且`data`表的结构如下: + +``` sql +CREATE TABLE data ( `id` UInt64, `timestamp` DateTime) ENGINE = Memory; +``` + +首先,使用10秒大小的tumble函数创建window view。 + +``` sql +CREATE WINDOW VIEW wv as select count(id), tumbleStart(w_id) as window_start from data group by tumble(timestamp, INTERVAL '10' SECOND) as w_id +``` + +随后,我们使用`WATCH`语句获取计算结果。 + +``` sql +WATCH wv +``` + +当日志插入表`data`时, + +``` sql +INSERT INTO data VALUES(1,now()) +``` + +`WATCH`语句会输出如下结果: + +``` text +┌─count(id)─┬────────window_start─┐ +│ 1 │ 2020-01-14 16:56:40 │ +└───────────┴─────────────────────┘ +``` + +或者,我们可以通过`TO`关键字将处理结果输出至另一张表。 + +``` sql +CREATE WINDOW VIEW wv TO dst AS SELECT count(id), tumbleStart(w_id) as window_start FROM data GROUP BY tumble(timestamp, INTERVAL '10' SECOND) as w_id +``` + +ClickHouse测试中提供了更多的示例(以`*window_view*`命名)。 + +### Window View 使用场景 {#window-view-shi-yong-chang-jing} + +Window view 在以下场景有用: + +* **监控**: 以时间维度聚合及处理数据,并将处理结果输出至目标表。用户可通过目标表获取并操作计算结果。 +* **分析**: 以时间维度进行数据分析. 当数据源非常庞大时,window view可以减少重复全表查询的计算量。 diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 43d9f974648..4806a7fe46e 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -473,3 +473,7 @@ if (ENABLE_TESTS AND USE_GTEST) add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS}) add_dependencies(clickhouse-bundle clickhouse-tests) endif() + +if (ENABLE_FUZZING) + add_compile_definitions(FUZZING_MODE=1) +endif () diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 383b9bb5e52..e01677aaac6 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -20,9 +20,7 @@ #include #include -#if !defined(ARCADIA_BUILD) -# include -#endif +#include #include #include #include @@ -705,6 +703,12 @@ bool Client::processWithFuzzing(const String & full_query) throw; } + if (!orig_ast) + { + // Can't continue after a parsing error + return true; + } + // `USE db` should not be executed // since this will break every query after `DROP db` if (orig_ast->as()) @@ -712,12 +716,6 @@ bool Client::processWithFuzzing(const String & full_query) return true; } - if (!orig_ast) - { - // Can't continue after a parsing error - return true; - } - // Don't repeat: // - INSERT -- Because the tables may grow too big. // - CREATE -- Because first we run the unmodified query, it will succeed, diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 4dadef911d7..afd6a36ea15 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -22,10 +22,8 @@ #include #include -#if !defined(ARCADIA_BUILD) -# include "config_core.h" -# include "Common/config_version.h" -#endif +#include "config_core.h" +#include "Common/config_version.h" #if USE_SSL # include diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt index 530128c2041..da466f725b3 100644 --- a/programs/local/CMakeLists.txt +++ b/programs/local/CMakeLists.txt @@ -17,3 +17,9 @@ clickhouse_program_add(local) if(NOT CLICKHOUSE_ONE_SHARED) target_link_libraries(clickhouse-local-lib PRIVATE clickhouse-server-lib) endif() + +if (ENABLE_FUZZING) + add_compile_definitions(FUZZING_MODE=1) + set (WITH_COVERAGE ON) + target_link_libraries(clickhouse-local-lib PRIVATE ${LIB_FUZZING_ENGINE}) +endif () diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 14095aa8dd0..33615080df4 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -41,6 +41,10 @@ #include #include +#if defined(FUZZING_MODE) + #include +#endif + namespace fs = std::filesystem; @@ -384,12 +388,6 @@ void LocalServer::setupUsers() } -String LocalServer::getQueryTextPrefix() -{ - return getInitialCreateTableQuery(); -} - - void LocalServer::connect() { connection_parameters = ConnectionParameters(config()); @@ -407,10 +405,25 @@ try std::cout << std::fixed << std::setprecision(3); std::cerr << std::fixed << std::setprecision(3); +#if defined(FUZZING_MODE) + static bool first_time = true; + if (first_time) + { + + if (queries_files.empty() && !config().has("query")) + { + std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode." << "\033[0m" << std::endl; + std::cerr << "\033[31m" << "You have to provide a query with --query or --queries-file option." << "\033[0m" << std::endl; + std::cerr << "\033[31m" << "The query have to use function getFuzzerData() inside." << "\033[0m" << std::endl; + exit(1); + } + + is_interactive = false; +#else is_interactive = stdin_is_a_tty && (config().hasOption("interactive") || (!config().has("query") && !config().has("table-structure") && queries_files.empty())); - +#endif if (!is_interactive) { /// We will terminate process on error @@ -439,6 +452,15 @@ try connect(); +#ifdef FUZZING_MODE + first_time = false; + } +#endif + + String initial_query = getInitialCreateTableQuery(); + if (!initial_query.empty()) + processQueryText(initial_query); + if (is_interactive && !delayed_interactive) { runInteractive(); @@ -451,7 +473,9 @@ try runInteractive(); } +#ifndef FUZZING_MODE cleanup(); +#endif return Application::EXIT_OK; } catch (const DB::Exception & e) @@ -653,7 +677,7 @@ void LocalServer::processConfig() } -static std::string getHelpHeader() +[[ maybe_unused ]] static std::string getHelpHeader() { return "usage: clickhouse-local [initial table definition] [--query ]\n" @@ -669,7 +693,7 @@ static std::string getHelpHeader() } -static std::string getHelpFooter() +[[ maybe_unused ]] static std::string getHelpFooter() { return "Example printing memory used by each Unix user:\n" @@ -680,11 +704,23 @@ static std::string getHelpFooter() } -void LocalServer::printHelpMessage(const OptionsDescription & options_description) +void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & options_description) { +#if defined(FUZZING_MODE) + std::cout << + "usage: clickhouse -- \n" + "Note: It is important not to use only one letter keys with single dash for \n" + "for clickhouse-local arguments. It may work incorrectly.\n" + + "ClickHouse is build with coverage guided fuzzer (libfuzzer) inside it.\n" + "You have to provide a query which contains getFuzzerData function.\n" + "This will take the data from fuzzing engine, pass it to getFuzzerData function and execute a query.\n" + "Each time the data will be different, and it will last until some segfault or sanitizer assertion is found. \n"; +#else std::cout << getHelpHeader() << "\n"; std::cout << options_description.main_description.value() << "\n"; std::cout << getHelpFooter() << "\n"; +#endif } @@ -781,3 +817,51 @@ int mainEntryClickHouseLocal(int argc, char ** argv) return code ? code : 1; } } + +#if defined(FUZZING_MODE) + +std::optional fuzz_app; + +extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) +{ + int & argc = *pargc; + char ** argv = *pargv; + + /// As a user you can add flags to clickhouse binary in fuzzing mode as follows + /// clickhouse -- + + /// Calculate the position of delimiter "--" that separates arguments + /// of clickhouse-local and libfuzzer + int pos_delim = argc; + for (int i = 0; i < argc; ++i) + { + if (strcmp(argv[i], "--") == 0) + { + pos_delim = i; + break; + } + } + + /// Initialize clickhouse-local app + fuzz_app.emplace(); + fuzz_app->init(pos_delim, argv); + + /// We will leave clickhouse-local specific arguments as is, because libfuzzer will ignore + /// all keys starting with -- + return 0; +} + + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +try +{ + auto input = String(reinterpret_cast(data), size); + DB::FunctionGetFuzzerData::update(input); + fuzz_app->run(); + return 0; +} +catch (...) +{ + return 1; +} +#endif diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index ce0df06c86a..06e3746eacd 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -37,7 +37,6 @@ protected: void processError(const String & query) const override; String getName() const override { return "local"; } - String getQueryTextPrefix() override; void printHelpMessage(const OptionsDescription & options_description) override; void addOptions(OptionsDescription & options_description) override; diff --git a/programs/main.cpp b/programs/main.cpp index a71131c59ec..2cdda075ca7 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -13,9 +13,7 @@ #include #include /// pair -#if !defined(ARCADIA_BUILD) -# include "config_tools.h" -#endif +#include "config_tools.h" #include #include @@ -88,6 +86,7 @@ namespace using MainFunc = int (*)(int, char**); +#if !defined(FUZZING_MODE) /// Add an item here to register new application std::pair clickhouse_applications[] = @@ -141,7 +140,6 @@ std::pair clickhouse_applications[] = {"hash-binary", mainEntryClickHouseHashBinary}, }; - int printHelp(int, char **) { std::cerr << "Use one of the following commands:" << std::endl; @@ -150,7 +148,6 @@ int printHelp(int, char **) return -1; } - bool isClickhouseApp(const std::string & app_suffix, std::vector & argv) { /// Use app if the first arg 'app' is passed (the arg should be quietly removed) @@ -170,6 +167,7 @@ bool isClickhouseApp(const std::string & app_suffix, std::vector & argv) std::string app_name = "clickhouse-" + app_suffix; return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name)); } +#endif enum class InstructionFail @@ -342,9 +340,13 @@ struct Checker /// /// extern bool inside_main; /// class C { C() { assert(inside_main); } }; +#ifndef FUZZING_MODE bool inside_main = false; +#else +bool inside_main = true; +#endif - +#if !defined(FUZZING_MODE) int main(int argc_, char ** argv_) { inside_main = true; @@ -375,3 +377,4 @@ int main(int argc_, char ** argv_) return main_func(static_cast(argv.size()), argv.data()); } +#endif diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index bd99b047e6b..14075f9fbf2 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -82,10 +82,8 @@ #include #include -#if !defined(ARCADIA_BUILD) -# include "config_core.h" -# include "Common/config_version.h" -#endif +#include "config_core.h" +#include "Common/config_version.h" #if defined(OS_LINUX) # include @@ -96,7 +94,7 @@ #endif #if USE_SSL -# if USE_INTERNAL_SSL_LIBRARY && !defined(ARCADIA_BUILD) +# if USE_INTERNAL_SSL_LIBRARY # include # endif # include diff --git a/programs/server/config.xml b/programs/server/config.xml index 37f36aa5215..9a2a6d7729f 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -650,6 +650,38 @@ + + + false + + 127.0.0.1 + 9000 + + + 127.0.0.2 + 9000 + + + 127.0.0.3 + 9000 + + + + diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index f0c6004bd77..78dd3c7022a 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ namespace ErrorCodes extern const int QUOTA_EXPIRED; } + struct EnabledQuota::Impl { [[noreturn]] static void throwQuotaExceed( @@ -35,54 +37,6 @@ struct EnabledQuota::Impl } - /// Returns the end of the current interval. If the passed `current_time` is greater than that end, - /// the function automatically recalculates the interval's end by adding the interval's duration - /// one or more times until the interval's end is greater than `current_time`. - /// If that recalculation occurs the function also resets amounts of resources used and sets the variable - /// `counters_were_reset`. - static std::chrono::system_clock::time_point getEndOfInterval( - const Interval & interval, std::chrono::system_clock::time_point current_time, bool & counters_were_reset) - { - auto & end_of_interval = interval.end_of_interval; - auto end_loaded = end_of_interval.load(); - auto end = std::chrono::system_clock::time_point{end_loaded}; - if (current_time < end) - { - counters_were_reset = false; - return end; - } - - bool need_reset_counters = false; - - do - { - /// Calculate the end of the next interval: - /// | X | - /// end current_time next_end = end + duration * n - /// where n is an integer number, n >= 1. - const auto duration = interval.duration; - UInt64 n = static_cast((current_time - end + duration) / duration); - end = end + duration * n; - if (end_of_interval.compare_exchange_strong(end_loaded, end.time_since_epoch())) - { - /// We reset counters only if the interval's end has been calculated before. - /// If it hasn't we just calculate the interval's end for the first time and don't reset counters yet. - need_reset_counters = (end_loaded.count() != 0); - break; - } - end = std::chrono::system_clock::time_point{end_loaded}; - } - while (current_time >= end); - - if (need_reset_counters) - { - boost::range::fill(interval.used, 0); - counters_were_reset = true; - } - return end; - } - - static void used( const String & user_name, const Intervals & intervals, @@ -91,24 +45,22 @@ struct EnabledQuota::Impl std::chrono::system_clock::time_point current_time, bool check_exceeded) { + auto quota_type_i = static_cast(quota_type); for (const auto & interval : intervals.intervals) { - auto quota_type_i = static_cast(quota_type); QuotaValue used = (interval.used[quota_type_i] += value); QuotaValue max = interval.max[quota_type_i]; if (!max) continue; + if (used > max) { bool counters_were_reset = false; - auto end_of_interval = getEndOfInterval(interval, current_time, counters_were_reset); + auto end_of_interval = interval.getEndOfInterval(current_time, counters_were_reset); if (counters_were_reset) - { used = (interval.used[quota_type_i] += value); - if ((used > max) && check_exceeded) - throwQuotaExceed(user_name, intervals.quota_name, quota_type, used, max, interval.duration, end_of_interval); - } - else if (check_exceeded) + + if (check_exceeded && (used > max)) throwQuotaExceed(user_name, intervals.quota_name, quota_type, used, max, interval.duration, end_of_interval); } } @@ -127,10 +79,11 @@ struct EnabledQuota::Impl QuotaValue max = interval.max[quota_type_i]; if (!max) continue; + if (used > max) { bool counters_were_reset = false; - std::chrono::system_clock::time_point end_of_interval = getEndOfInterval(interval, current_time, counters_were_reset); + auto end_of_interval = interval.getEndOfInterval(current_time, counters_were_reset); if (!counters_were_reset) throwQuotaExceed(user_name, intervals.quota_name, quota_type, used, max, interval.duration, end_of_interval); } @@ -145,17 +98,32 @@ struct EnabledQuota::Impl for (auto quota_type : collections::range(QuotaType::MAX)) checkExceeded(user_name, intervals, quota_type, current_time); } + + static std::chrono::system_clock::duration randomDuration(std::chrono::seconds max) + { + auto count = std::chrono::duration_cast(max).count(); + std::uniform_int_distribution distribution{0, count - 1}; + return std::chrono::system_clock::duration(distribution(thread_local_rng)); + } }; -EnabledQuota::Interval::Interval() +EnabledQuota::Interval::Interval(std::chrono::seconds duration_, bool randomize_interval_, std::chrono::system_clock::time_point current_time_) + : duration(duration_) , randomize_interval(randomize_interval_) { + std::chrono::system_clock::time_point initial_end{}; + if (randomize_interval_) + initial_end += Impl::randomDuration(duration_); + end_of_interval = initial_end.time_since_epoch(); + for (auto quota_type : collections::range(QuotaType::MAX)) { auto quota_type_i = static_cast(quota_type); used[quota_type_i].store(0); max[quota_type_i] = 0; } + + getEndOfInterval(current_time_); /// Force updating the end of the interval for the first time. } @@ -177,6 +145,55 @@ EnabledQuota::Interval & EnabledQuota::Interval::operator =(const Interval & src } +/// Returns the end of the current interval. If the passed `current_time` is greater than that end, +/// the function automatically recalculates the interval's end by adding the interval's duration +/// one or more times until the interval's end is greater than `current_time`. +/// If that recalculation occurs the function also resets amounts of resources used and sets the variable +/// `counters_were_reset`. +std::chrono::system_clock::time_point EnabledQuota::Interval::getEndOfInterval(std::chrono::system_clock::time_point current_time) const +{ + bool counters_were_reset; + return getEndOfInterval(current_time, counters_were_reset); +} + +std::chrono::system_clock::time_point EnabledQuota::Interval::getEndOfInterval(std::chrono::system_clock::time_point current_time, bool & counters_were_reset) const +{ + auto end_loaded = end_of_interval.load(); + auto end = std::chrono::system_clock::time_point{end_loaded}; + if (current_time < end) + { + counters_were_reset = false; + return end; + } + + bool need_reset_counters = false; + + do + { + /// Calculate the end of the next interval: + /// | X | + /// end current_time next_end = end + duration * n + /// where n is an integer number, n >= 1. + UInt64 n = static_cast((current_time - end + duration) / duration); + end = end + duration * n; + if (end_of_interval.compare_exchange_strong(end_loaded, end.time_since_epoch())) + { + need_reset_counters = true; + break; + } + end = std::chrono::system_clock::time_point{end_loaded}; + } + while (current_time >= end); + + if (need_reset_counters) + { + boost::range::fill(used, 0); + counters_were_reset = true; + } + return end; +} + + std::optional EnabledQuota::Intervals::getUsage(std::chrono::system_clock::time_point current_time) const { if (!quota_id) @@ -192,8 +209,7 @@ std::optional EnabledQuota::Intervals::getUsage(std::chrono::system_ auto & out = usage.intervals.back(); out.duration = in.duration; out.randomize_interval = in.randomize_interval; - bool counters_were_reset = false; - out.end_of_interval = Impl::getEndOfInterval(in, current_time, counters_were_reset); + out.end_of_interval = in.getEndOfInterval(current_time); for (auto quota_type : collections::range(QuotaType::MAX)) { auto quota_type_i = static_cast(quota_type); diff --git a/src/Access/EnabledQuota.h b/src/Access/EnabledQuota.h index 097afe861d2..88362c9193f 100644 --- a/src/Access/EnabledQuota.h +++ b/src/Access/EnabledQuota.h @@ -73,9 +73,13 @@ private: bool randomize_interval = false; mutable std::atomic end_of_interval; - Interval(); + Interval(std::chrono::seconds duration_, bool randomize_interval_, std::chrono::system_clock::time_point current_time_); + Interval(const Interval & src) { *this = src; } Interval & operator =(const Interval & src); + + std::chrono::system_clock::time_point getEndOfInterval(std::chrono::system_clock::time_point current_time) const; + std::chrono::system_clock::time_point getEndOfInterval(std::chrono::system_clock::time_point current_time, bool & counters_were_reset) const; }; struct Intervals diff --git a/src/Access/QuotaCache.cpp b/src/Access/QuotaCache.cpp index 566c2409205..43ab4268b0c 100644 --- a/src/Access/QuotaCache.cpp +++ b/src/Access/QuotaCache.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -22,17 +21,6 @@ namespace ErrorCodes } -namespace -{ - std::chrono::system_clock::duration randomDuration(std::chrono::seconds max) - { - auto count = std::chrono::duration_cast(max).count(); - std::uniform_int_distribution distribution{0, count - 1}; - return std::chrono::system_clock::duration(distribution(thread_local_rng)); - } -} - - void QuotaCache::QuotaInfo::setQuota(const QuotaPtr & quota_, const UUID & quota_id_) { quota = quota_; @@ -94,18 +82,21 @@ boost::shared_ptr QuotaCache::QuotaInfo::getOrBui auto it = key_to_intervals.find(key); if (it != key_to_intervals.end()) return it->second; - return rebuildIntervals(key); + return rebuildIntervals(key, std::chrono::system_clock::now()); } void QuotaCache::QuotaInfo::rebuildAllIntervals() { + if (key_to_intervals.empty()) + return; + auto current_time = std::chrono::system_clock::now(); for (const String & key : key_to_intervals | boost::adaptors::map_keys) - rebuildIntervals(key); + rebuildIntervals(key, current_time); } -boost::shared_ptr QuotaCache::QuotaInfo::rebuildIntervals(const String & key) +boost::shared_ptr QuotaCache::QuotaInfo::rebuildIntervals(const String & key, std::chrono::system_clock::time_point current_time) { auto new_intervals = boost::make_shared(); new_intervals->quota_name = quota->getName(); @@ -115,14 +106,8 @@ boost::shared_ptr QuotaCache::QuotaInfo::rebuildI intervals.reserve(quota->all_limits.size()); for (const auto & limits : quota->all_limits) { - intervals.emplace_back(); + intervals.emplace_back(limits.duration, limits.randomize_interval, current_time); auto & interval = intervals.back(); - interval.duration = limits.duration; - std::chrono::system_clock::time_point end_of_interval{}; - interval.randomize_interval = limits.randomize_interval; - if (limits.randomize_interval) - end_of_interval += randomDuration(limits.duration); - interval.end_of_interval = end_of_interval.time_since_epoch(); for (auto quota_type : collections::range(QuotaType::MAX)) { auto quota_type_i = static_cast(quota_type); diff --git a/src/Access/QuotaCache.h b/src/Access/QuotaCache.h index 77682230370..7298acad415 100644 --- a/src/Access/QuotaCache.h +++ b/src/Access/QuotaCache.h @@ -43,7 +43,7 @@ private: String calculateKey(const EnabledQuota & enabled_quota) const; boost::shared_ptr getOrBuildIntervals(const String & key); - boost::shared_ptr rebuildIntervals(const String & key); + boost::shared_ptr rebuildIntervals(const String & key, std::chrono::system_clock::time_point current_time); void rebuildAllIntervals(); QuotaPtr quota; diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index e2a9220f113..eb061337753 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -20,6 +20,7 @@ namespace DB { + struct Settings; template constexpr bool DecimalOrExtendedInt = @@ -42,39 +43,19 @@ struct AvgFraction /// Invoked only is either Numerator or Denominator are Decimal. Float64 NO_SANITIZE_UNDEFINED divideIfAnyDecimal(UInt32 num_scale, UInt32 denom_scale [[maybe_unused]]) const { - if constexpr (is_decimal && is_decimal) - { - // According to the docs, num(S1) / denom(S2) would have scale S1 - - if constexpr (std::is_same_v && std::is_same_v) - ///Special case as Decimal256 / Decimal128 = compile error (as Decimal128 is not parametrized by a wide - ///int), but an __int128 instead - return DecimalUtils::convertTo( - numerator / (denominator.template convertTo()), num_scale); - else - return DecimalUtils::convertTo(numerator / denominator, num_scale); - } - - /// Numerator is always casted to Float64 to divide correctly if the denominator is not Float64. - Float64 num_converted; - + Float64 numerator_float; if constexpr (is_decimal) - num_converted = DecimalUtils::convertTo(numerator, num_scale); + numerator_float = DecimalUtils::convertTo(numerator, num_scale); else - num_converted = static_cast(numerator); /// all other types, including extended integral. - - std::conditional_t, - Float64, Denominator> denom_converted; + numerator_float = numerator; + Float64 denominator_float; if constexpr (is_decimal) - denom_converted = DecimalUtils::convertTo(denominator, denom_scale); - else if constexpr (DecimalOrExtendedInt) - /// no way to divide Float64 and extended integral type without an explicit cast. - denom_converted = static_cast(denominator); + denominator_float = DecimalUtils::convertTo(denominator, denom_scale); else - denom_converted = denominator; /// can divide on float, no cast required. + denominator_float = denominator; - return num_converted / denom_converted; + return numerator_float / denominator_float; } Float64 NO_SANITIZE_UNDEFINED divide() const @@ -237,9 +218,9 @@ public: using ColVecType = ColumnVectorOrDecimal; - void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final { - this->data(place).numerator += static_cast(*columns[0]).getData()[row_num]; + increment(place, static_cast(*columns[0]).getData()[row_num]); ++this->data(place).denominator; } @@ -259,7 +240,7 @@ public: sum_data.addMany(column.getData().data(), batch_size); this->data(place).denominator += batch_size; } - this->data(place).numerator += sum_data.sum; + increment(place, sum_data.sum); } void addBatchSinglePlaceNotNull( @@ -289,7 +270,7 @@ public: sum_data.addManyNotNull(column.getData().data(), null_map, batch_size); this->data(place).denominator += batch_size - countBytesInFilter(null_map, batch_size); } - this->data(place).numerator += sum_data.sum; + increment(place, sum_data.sum); } String getName() const override { return "avg"; } @@ -317,5 +298,10 @@ public: #endif +private: + void NO_SANITIZE_UNDEFINED increment(AggregateDataPtr __restrict place, Numerator inc) const + { + this->data(place).numerator += inc; + } }; } diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp index b7fdb3460e3..ab6fdc8fd7e 100644 --- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp +++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp @@ -82,17 +82,17 @@ createAggregateFunctionAvgWeighted(const std::string & name, const DataTypes & a const bool left_decimal = isDecimal(data_type); const bool right_decimal = isDecimal(data_type_weight); + /// We multiply value by weight, so actual scale of numerator is + if (left_decimal && right_decimal) ptr.reset(create(*data_type, *data_type_weight, argument_types, - getDecimalScale(*data_type), getDecimalScale(*data_type_weight))); + getDecimalScale(*data_type) + getDecimalScale(*data_type_weight), getDecimalScale(*data_type_weight))); else if (left_decimal) ptr.reset(create(*data_type, *data_type_weight, argument_types, getDecimalScale(*data_type))); else if (right_decimal) ptr.reset(create(*data_type, *data_type_weight, argument_types, - // numerator is not decimal, so its scale is 0 - 0, getDecimalScale(*data_type_weight))); + getDecimalScale(*data_type_weight), getDecimalScale(*data_type_weight))); else ptr.reset(create(*data_type, *data_type_weight, argument_types)); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5f4ebaaa895..5e4a16cfda7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -517,6 +517,8 @@ if (USE_BZIP2) target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BZIP2_INCLUDE_DIR}) endif() +dbms_target_link_libraries(PUBLIC consistent-hashing) + include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake") if (ENABLE_TESTS AND USE_GTEST) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 98f5eb5abeb..58bc239f003 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1414,18 +1414,12 @@ void ClientBase::runInteractive() highlight_callback = highlight; ReplxxLineReader lr(*suggest, history_file, config().has("multiline"), query_extenders, query_delimiters, highlight_callback); - -#elif defined(USE_READLINE) && USE_READLINE - ReadlineLineReader lr(*suggest, history_file, config().has("multiline"), query_extenders, query_delimiters); #else LineReader lr(history_file, config().has("multiline"), query_extenders, query_delimiters); #endif - /// Enable bracketed-paste-mode only when multiquery is enabled and multiline is - /// disabled, so that we are able to paste and execute multiline queries in a whole - /// instead of erroring out, while be less intrusive. - if (config().has("multiquery") && !config().has("multiline")) - lr.enableBracketedPaste(); + /// Enable bracketed-paste-mode so that we are able to paste multiline queries as a whole. + lr.enableBracketedPaste(); do { @@ -1497,17 +1491,14 @@ void ClientBase::runNonInteractive() { auto process_multi_query_from_file = [&](const String & file) { - auto text = getQueryTextPrefix(); String queries_from_file; ReadBufferFromFile in(file); readStringUntilEOF(queries_from_file, in); - text += queries_from_file; - return executeMultiQuery(text); + return executeMultiQuery(queries_from_file); }; - /// Read all queries into `text`. for (const auto & queries_file : queries_files) { for (const auto & interleave_file : interleave_queries_files) @@ -1522,9 +1513,6 @@ void ClientBase::runNonInteractive() } String text; - if (is_multiquery) - text = getQueryTextPrefix(); - if (config().has("query")) { text += config().getRawString("query"); /// Poco configuration should not process substitutions in form of ${...} inside query. diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index bad1395e699..4c5d29b390b 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -78,9 +78,6 @@ protected: String & query_to_execute, ASTPtr & parsed_query, const String & all_queries_text, std::optional & current_exception); - /// For non-interactive multi-query mode get queries text prefix. - virtual String getQueryTextPrefix() { return ""; } - static void clearTerminal(); void showClientVersion(); @@ -100,9 +97,10 @@ protected: const std::vector & external_tables_arguments) = 0; virtual void processConfig() = 0; -private: +protected: bool processQueryText(const String & text); +private: void receiveResult(ASTPtr parsed_query); bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled); void receiveLogs(ASTPtr parsed_query); diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index ca10160fa88..505a6514812 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -603,6 +603,14 @@ void Connection::sendReadTaskResponse(const String & response) out->next(); } + +void Connection::sendMergeTreeReadTaskResponse(const PartitionReadResponse & response) +{ + writeVarUInt(Protocol::Client::MergeTreeReadTaskResponse, *out); + response.serialize(*out); + out->next(); +} + void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String & name) { /// NOTE 'Throttler' is not used in this method (could use, but it's not important right now). @@ -872,6 +880,10 @@ Packet Connection::receivePacket() case Protocol::Server::ReadTaskRequest: return res; + case Protocol::Server::MergeTreeReadTaskRequest: + res.request = receivePartitionReadRequest(); + return res; + case Protocol::Server::ProfileEvents: res.block = receiveProfileEvents(); return res; @@ -1023,6 +1035,13 @@ ProfileInfo Connection::receiveProfileInfo() const return profile_info; } +PartitionReadRequest Connection::receivePartitionReadRequest() const +{ + PartitionReadRequest request; + request.deserialize(*in); + return request; +} + void Connection::throwUnexpectedPacket(UInt64 packet_type, const char * expected) const { diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 3b49760ba10..2ea5334bbd3 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -16,6 +16,8 @@ #include +#include + #include #include @@ -104,6 +106,8 @@ public: void sendData(const Block & block, const String & name/* = "" */, bool scalar/* = false */) override; + void sendMergeTreeReadTaskResponse(const PartitionReadResponse & response) override; + void sendExternalTablesData(ExternalTablesData & data) override; bool poll(size_t timeout_microseconds/* = 0 */) override; @@ -255,6 +259,7 @@ private: std::vector receiveMultistringMessage(UInt64 msg_type) const; std::unique_ptr receiveException() const; Progress receiveProgress() const; + PartitionReadRequest receivePartitionReadRequest() const; ProfileInfo receiveProfileInfo() const; void initInputBuffers(); diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index c73bea53d10..791ac4c1ef1 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -132,7 +132,7 @@ void HedgedConnections::sendQuery( const String & query, const String & query_id, UInt64 stage, - const ClientInfo & client_info, + ClientInfo & client_info, bool with_pending_data) { std::lock_guard lock(cancel_mutex); @@ -171,7 +171,9 @@ void HedgedConnections::sendQuery( modified_settings.group_by_two_level_threshold_bytes = 0; } - if (offset_states.size() > 1) + const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas; + + if (offset_states.size() > 1 && enable_sample_offset_parallel_processing) { modified_settings.parallel_replicas_count = offset_states.size(); modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset; diff --git a/src/Client/HedgedConnections.h b/src/Client/HedgedConnections.h index e39d9582cde..d64f7ea4286 100644 --- a/src/Client/HedgedConnections.h +++ b/src/Client/HedgedConnections.h @@ -86,7 +86,7 @@ public: const String & query, const String & query_id, UInt64 stage, - const ClientInfo & client_info, + ClientInfo & client_info, bool with_pending_data) override; void sendReadTaskResponse(const String &) override @@ -94,6 +94,11 @@ public: throw Exception("sendReadTaskResponse in not supported with HedgedConnections", ErrorCodes::LOGICAL_ERROR); } + void sendMergeTreeReadTaskResponse(PartitionReadResponse) override + { + throw Exception("sendMergeTreeReadTaskResponse in not supported with HedgedConnections", ErrorCodes::LOGICAL_ERROR); + } + Packet receivePacket() override; Packet receivePacketUnlocked(AsyncCallback async_callback, bool is_draining) override; @@ -112,6 +117,8 @@ public: bool hasActiveConnections() const override { return active_connection_count > 0; } + void setReplicaInfo(ReplicaInfo value) override { replica_info = value; } + private: /// If we don't receive data from replica and there is no progress in query /// execution for receive_data_timeout, we are trying to get new @@ -199,6 +206,8 @@ private: bool sent_query = false; bool cancelled = false; + ReplicaInfo replica_info; + mutable std::mutex cancel_mutex; }; diff --git a/src/Client/IConnections.h b/src/Client/IConnections.h index 53267cbbb3e..8dbd58c9598 100644 --- a/src/Client/IConnections.h +++ b/src/Client/IConnections.h @@ -1,6 +1,9 @@ #pragma once +#include + #include +#include namespace DB { @@ -27,10 +30,11 @@ public: const String & query, const String & query_id, UInt64 stage, - const ClientInfo & client_info, + ClientInfo & client_info, bool with_pending_data) = 0; virtual void sendReadTaskResponse(const String &) = 0; + virtual void sendMergeTreeReadTaskResponse(PartitionReadResponse response) = 0; /// Get packet from any replica. virtual Packet receivePacket() = 0; @@ -56,6 +60,17 @@ public: /// Get the replica addresses as a string. virtual std::string dumpAddresses() const = 0; + + struct ReplicaInfo + { + size_t all_replicas_count{0}; + size_t number_of_current_replica{0}; + }; + + /// This is needed in max_parallel_replicas case. + /// We create a RemoteQueryExecutor for each replica + virtual void setReplicaInfo(ReplicaInfo value) = 0; + /// Returns the number of replicas. virtual size_t size() const = 0; diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h index 9d6b54ef32f..b7c6ae314e2 100644 --- a/src/Client/IServerConnection.h +++ b/src/Client/IServerConnection.h @@ -12,6 +12,8 @@ #include #include +#include + #include @@ -32,10 +34,13 @@ struct Packet Progress progress; ProfileInfo profile_info; std::vector part_uuids; + PartitionReadRequest request; + PartitionReadResponse response; Packet() : type(Protocol::Server::Hello) {} }; + /// Struct which represents data we are going to send for external table. struct ExternalTableData { @@ -96,6 +101,8 @@ public: /// Send all contents of external (temporary) tables. virtual void sendExternalTablesData(ExternalTablesData & data) = 0; + virtual void sendMergeTreeReadTaskResponse(const PartitionReadResponse & response) = 0; + /// Check, if has data to read. virtual bool poll(size_t timeout_microseconds) = 0; diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 4f476b57c27..9eaa9ce883a 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -424,6 +424,11 @@ void LocalConnection::sendExternalTablesData(ExternalTablesData &) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); } +void LocalConnection::sendMergeTreeReadTaskResponse(const PartitionReadResponse &) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); +} + ServerConnectionPtr LocalConnection::createConnection(const ConnectionParameters &, ContextPtr current_context, bool send_progress) { return std::make_unique(current_context, send_progress); diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 1cc23defa6e..fbd054506e7 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -92,6 +92,8 @@ public: void sendExternalTablesData(ExternalTablesData &) override; + void sendMergeTreeReadTaskResponse(const PartitionReadResponse & response) override; + bool poll(size_t timeout_microseconds/* = 0 */) override; bool hasReadPendingData() const override; diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index a27f7709555..c3000443a9c 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -1,9 +1,10 @@ #include + +#include +#include #include #include -#include -#include "Core/Protocol.h" - +#include namespace DB { @@ -110,7 +111,7 @@ void MultiplexedConnections::sendQuery( const String & query, const String & query_id, UInt64 stage, - const ClientInfo & client_info, + ClientInfo & client_info, bool with_pending_data) { std::lock_guard lock(cancel_mutex); @@ -131,16 +132,29 @@ void MultiplexedConnections::sendQuery( modified_settings.group_by_two_level_threshold = 0; modified_settings.group_by_two_level_threshold_bytes = 0; } + + if (settings.allow_experimental_parallel_reading_from_replicas) + { + client_info.collaborate_with_initiator = true; + client_info.count_participating_replicas = replica_info.all_replicas_count; + client_info.number_of_current_replica = replica_info.number_of_current_replica; + } } + const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas; + size_t num_replicas = replica_states.size(); if (num_replicas > 1) { - /// Use multiple replicas for parallel query processing. - modified_settings.parallel_replicas_count = num_replicas; + if (enable_sample_offset_parallel_processing) + /// Use multiple replicas for parallel query processing. + modified_settings.parallel_replicas_count = num_replicas; + for (size_t i = 0; i < num_replicas; ++i) { - modified_settings.parallel_replica_offset = i; + if (enable_sample_offset_parallel_processing) + modified_settings.parallel_replica_offset = i; + replica_states[i].connection->sendQuery(timeouts, query, query_id, stage, &modified_settings, &client_info, with_pending_data); } @@ -179,6 +193,16 @@ void MultiplexedConnections::sendReadTaskResponse(const String & response) current_connection->sendReadTaskResponse(response); } + +void MultiplexedConnections::sendMergeTreeReadTaskResponse(PartitionReadResponse response) +{ + std::lock_guard lock(cancel_mutex); + if (cancelled) + return; + current_connection->sendMergeTreeReadTaskResponse(response); +} + + Packet MultiplexedConnections::receivePacket() { std::lock_guard lock(cancel_mutex); @@ -234,6 +258,7 @@ Packet MultiplexedConnections::drain() switch (packet.type) { + case Protocol::Server::MergeTreeReadTaskRequest: case Protocol::Server::ReadTaskRequest: case Protocol::Server::PartUUIDs: case Protocol::Server::Data: @@ -313,6 +338,7 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac switch (packet.type) { + case Protocol::Server::MergeTreeReadTaskRequest: case Protocol::Server::ReadTaskRequest: case Protocol::Server::PartUUIDs: case Protocol::Server::Data: diff --git a/src/Client/MultiplexedConnections.h b/src/Client/MultiplexedConnections.h index 4fb7d496b0c..e76d54218c7 100644 --- a/src/Client/MultiplexedConnections.h +++ b/src/Client/MultiplexedConnections.h @@ -38,10 +38,11 @@ public: const String & query, const String & query_id, UInt64 stage, - const ClientInfo & client_info, + ClientInfo & client_info, bool with_pending_data) override; void sendReadTaskResponse(const String &) override; + void sendMergeTreeReadTaskResponse(PartitionReadResponse response) override; Packet receivePacket() override; @@ -62,6 +63,7 @@ public: /// Without locking, because sendCancel() does not change the state of the replicas. bool hasActiveConnections() const override { return active_connection_count > 0; } + void setReplicaInfo(ReplicaInfo value) override { replica_info = value; } private: Packet receivePacketUnlocked(AsyncCallback async_callback, bool is_draining) override; @@ -102,6 +104,8 @@ private: bool sent_query = false; bool cancelled = false; + ReplicaInfo replica_info; + /// A mutex for the sendCancel function to execute safely /// in separate thread. mutable std::mutex cancel_mutex; diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index aa19aa2d8b0..d675d166f5f 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -28,13 +28,20 @@ namespace ErrorCodes } -static String getTypeString(const AggregateFunctionPtr & func) +static String getTypeString(const AggregateFunctionPtr & func, std::optional version = std::nullopt) { WriteBufferFromOwnString stream; - stream << "AggregateFunction(" << func->getName(); + + stream << "AggregateFunction("; + + /// If aggregate function does not support versioning its version is 0 and is not printed. + if (version && *version) + stream << *version << ", "; + + stream << func->getName(); + const auto & parameters = func->getParameters(); const auto & argument_types = func->getArgumentTypes(); - if (!parameters.empty()) { stream << '('; @@ -56,7 +63,7 @@ static String getTypeString(const AggregateFunctionPtr & func) ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & func_, std::optional version_) - : func(func_), type_string(getTypeString(func)), version(version_) + : func(func_), type_string(getTypeString(func, version_)), version(version_) { } @@ -66,10 +73,11 @@ ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & fu } -void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_) +void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_, size_t version_) { func = func_; - type_string = getTypeString(func); + version = version_; + type_string = getTypeString(func, version); } @@ -403,7 +411,7 @@ void ColumnAggregateFunction::protect() MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const { - return create(func); + return create(func, version); } Field ColumnAggregateFunction::operator[](size_t n) const diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 548ad238f0d..b5efff928bb 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -103,7 +103,7 @@ private: public: ~ColumnAggregateFunction() override; - void set(const AggregateFunctionPtr & func_); + void set(const AggregateFunctionPtr & func_, size_t version_); AggregateFunctionPtr getAggregateFunction() { return func; } AggregateFunctionPtr getAggregateFunction() const { return func; } diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 54785f92926..84a796d03d9 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -601,6 +601,7 @@ M(631, UNKNOWN_FILE_SIZE) \ M(632, UNEXPECTED_DATA_AFTER_PARSED_VALUE) \ M(633, QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW) \ + M(634, MONGODB_ERROR) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/PoolBase.h b/src/Common/PoolBase.h index 3f7f340c5d1..85d4e84abca 100644 --- a/src/Common/PoolBase.h +++ b/src/Common/PoolBase.h @@ -163,4 +163,3 @@ protected: /** Creates a new object to put into the pool. */ virtual ObjectPtr allocObject() = 0; }; - diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 12410de6bf0..0b2cd602b38 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -25,13 +25,13 @@ namespace { #if defined(OS_LINUX) thread_local size_t write_trace_iteration = 0; +#endif /// Even after timer_delete() the signal can be delivered, /// since it does not do anything with pending signals. /// /// And so to overcome this flag is exists, /// to ignore delivered signals after timer_delete(). thread_local bool signal_handler_disarmed = true; -#endif void writeTraceInfo(TraceType trace_type, int /* sig */, siginfo_t * info, void * context) { diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp index fa32d56b350..27db87809d3 100644 --- a/src/Common/new_delete.cpp +++ b/src/Common/new_delete.cpp @@ -1,6 +1,44 @@ #include #include +#if defined(OS_DARWIN) && (USE_JEMALLOC) +/// In case of OSX jemalloc register itself as a default zone allocator. +/// +/// Sure jemalloc will register itself, since zone_register() declared with +/// constructor attribute (since zone_register is also forbidden from +/// optimizing out), however those constructors will be called before +/// constructors for global variable initializers (__cxx_global_var_init()). +/// +/// So to make jemalloc under OSX more stable, we will call it explicitly from +/// global variable initializers so that each allocation will use it. +/// (NOTE: It is ok to call it twice, since zone_register() is a no-op if the +/// default zone is already replaced with something.) +/// +/// Refs: https://github.com/jemalloc/jemalloc/issues/708 + +extern "C" +{ + extern void zone_register(); +} + +static struct InitializeJemallocZoneAllocatorForOSX +{ + InitializeJemallocZoneAllocatorForOSX() + { + zone_register(); + /// jemalloc() initializes itself only on malloc() + /// and so if some global initializer will have free(nullptr) + /// jemalloc may trigger some internal assertion. + /// + /// To prevent this, we explicitly call malloc(free()) here. + if (void * ptr = malloc(0)) + { + free(ptr); + } + } +} initializeJemallocZoneAllocatorForOSX; +#endif + /// Replace default new/delete with memory tracking versions. /// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index d5498a1bc13..b324ba119fa 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1,6 +1,5 @@ #include -#include #include "config_core.h" #if USE_NURAFT @@ -15,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index fb18e1135a5..08c675eb421 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -64,24 +64,26 @@ namespace Protocol { enum Enum { - Hello = 0, /// Name, version, revision. - Data = 1, /// A block of data (compressed or not). - Exception = 2, /// The exception during query execution. - Progress = 3, /// Query execution progress: rows read, bytes read. - Pong = 4, /// Ping response - EndOfStream = 5, /// All packets were transmitted - ProfileInfo = 6, /// Packet with profiling info. - Totals = 7, /// A block with totals (compressed or not). - Extremes = 8, /// A block with minimums and maximums (compressed or not). - TablesStatusResponse = 9, /// A response to TablesStatus request. - Log = 10, /// System logs of the query execution - TableColumns = 11, /// Columns' description for default values calculation - PartUUIDs = 12, /// List of unique parts ids. - ReadTaskRequest = 13, /// String (UUID) describes a request for which next task is needed - /// This is such an inverted logic, where server sends requests - /// And client returns back response - ProfileEvents = 14, /// Packet with profile events from server. - MAX = ProfileEvents, + Hello = 0, /// Name, version, revision. + Data = 1, /// A block of data (compressed or not). + Exception = 2, /// The exception during query execution. + Progress = 3, /// Query execution progress: rows read, bytes read. + Pong = 4, /// Ping response + EndOfStream = 5, /// All packets were transmitted + ProfileInfo = 6, /// Packet with profiling info. + Totals = 7, /// A block with totals (compressed or not). + Extremes = 8, /// A block with minimums and maximums (compressed or not). + TablesStatusResponse = 9, /// A response to TablesStatus request. + Log = 10, /// System logs of the query execution + TableColumns = 11, /// Columns' description for default values calculation + PartUUIDs = 12, /// List of unique parts ids. + ReadTaskRequest = 13, /// String (UUID) describes a request for which next task is needed + /// This is such an inverted logic, where server sends requests + /// And client returns back response + ProfileEvents = 14, /// Packet with profile events from server. + MergeTreeReadTaskRequest = 15, /// Request from a MergeTree replica to a coordinator + MAX = MergeTreeReadTaskRequest, + }; /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10 @@ -106,6 +108,7 @@ namespace Protocol "PartUUIDs", "ReadTaskRequest", "ProfileEvents", + "MergeTreeReadTaskRequest", }; return packet <= MAX ? data[packet] @@ -130,20 +133,20 @@ namespace Protocol { enum Enum { - Hello = 0, /// Name, version, revision, default DB - Query = 1, /// Query id, query settings, stage up to which the query must be executed, - /// whether the compression must be used, - /// query text (without data for INSERTs). - Data = 2, /// A block of data (compressed or not). - Cancel = 3, /// Cancel the query execution. - Ping = 4, /// Check that connection to the server is alive. - TablesStatusRequest = 5, /// Check status of tables on the server. - KeepAlive = 6, /// Keep the connection alive - Scalar = 7, /// A block of data (compressed or not). - IgnoredPartUUIDs = 8, /// List of unique parts ids to exclude from query processing - ReadTaskResponse = 9, /// TODO: - - MAX = ReadTaskResponse, + Hello = 0, /// Name, version, revision, default DB + Query = 1, /// Query id, query settings, stage up to which the query must be executed, + /// whether the compression must be used, + /// query text (without data for INSERTs). + Data = 2, /// A block of data (compressed or not). + Cancel = 3, /// Cancel the query execution. + Ping = 4, /// Check that connection to the server is alive. + TablesStatusRequest = 5, /// Check status of tables on the server. + KeepAlive = 6, /// Keep the connection alive + Scalar = 7, /// A block of data (compressed or not). + IgnoredPartUUIDs = 8, /// List of unique parts ids to exclude from query processing + ReadTaskResponse = 9, /// A filename to read from s3 (used in s3Cluster) + MergeTreeReadTaskResponse = 10, /// Coordinator's decision with a modified set of mark ranges allowed to read + MAX = MergeTreeReadTaskResponse, }; inline const char * toString(UInt64 packet) @@ -159,6 +162,7 @@ namespace Protocol "Scalar", "IgnoredPartUUIDs", "ReadTaskResponse", + "MergeTreeReadTaskResponse" }; return packet <= MAX ? data[packet] diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index ac0fba384b8..36820788b91 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -31,6 +31,9 @@ #define DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION 1 +#define DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION 1 +#define DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS 54453 + /// Minimum revision supporting interserver secret. #define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET 54441 @@ -48,6 +51,7 @@ /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). -#define DBMS_TCP_PROTOCOL_VERSION 54452 + +#define DBMS_TCP_PROTOCOL_VERSION 54453 #define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449 diff --git a/src/Core/Settings.h b/src/Core/Settings.h index bcbe45dd002..47b01655c26 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -75,6 +75,7 @@ class IColumn; M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ + M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \ M(UInt64, hsts_max_age, 0, "Expired time for hsts. 0 means disable HSTS.", 0) \ M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \ M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \ @@ -125,6 +126,8 @@ class IColumn; M(UInt64, parallel_replicas_count, 0, "", 0) \ M(UInt64, parallel_replica_offset, 0, "", 0) \ \ + M(Bool, allow_experimental_parallel_reading_from_replicas, false, "If true, ClickHouse will send a SELECT query to all replicas of a table. It will work for any kind on MergeTree table.", 0) \ + \ M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \ \ M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \ diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index a5bf047f092..c65a30b80ac 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -85,7 +85,7 @@ String DataTypeAggregateFunction::getNameImpl(bool with_version) const MutableColumnPtr DataTypeAggregateFunction::createColumn() const { - return ColumnAggregateFunction::create(function, version); + return ColumnAggregateFunction::create(function, getVersion()); } @@ -139,17 +139,20 @@ static DataTypePtr create(const ASTPtr & arguments) if (!arguments || arguments->children.empty()) throw Exception("Data type AggregateFunction requires parameters: " - "name of aggregate function and list of data types for arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + "version(optionally), name of aggregate function and list of data types for arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); ASTPtr data_type_ast = arguments->children[0]; size_t argument_types_start_idx = 1; /* If aggregate function definition doesn't have version, it will have in AST children args [ASTFunction, types...] - in case * it is parametric, or [ASTIdentifier, types...] - otherwise. If aggregate function has version in AST, then it will be: - * [ASTLitearl, ASTFunction (or ASTIdentifier), types...]. + * [ASTLiteral, ASTFunction (or ASTIdentifier), types...]. */ if (auto * version_ast = arguments->children[0]->as()) { + if (arguments->children.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Data type AggregateFunction has version, but it requires at least one more parameter - name of aggregate function"); version = version_ast->value.safeGet(); data_type_ast = arguments->children[1]; argument_types_start_idx = 2; diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 84610557d21..7fa3a394be8 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -91,5 +91,6 @@ void registerDataTypeString(DataTypeFactory & factory) factory.registerAlias("NCHAR LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("BINARY LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("BINARY VARYING", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("VARBINARY", "String", DataTypeFactory::CaseInsensitive); } } diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index 4f804a0ca50..10ef35b7e7c 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -45,6 +45,15 @@ std::pair splitName(const std::string & name) return {name.substr(0, idx), name.substr(idx + 1)}; } +std::pair splitName(const std::string_view & name) +{ + auto idx = name.find_first_of('.'); + if (idx == std::string::npos || idx == 0 || idx + 1 == name.size()) + return {name, {}}; + + return {name.substr(0, idx), name.substr(idx + 1)}; +} + std::string extractTableName(const std::string & nested_name) { @@ -211,6 +220,7 @@ void validateArraySizes(const Block & block) } } + std::unordered_set getAllTableNames(const Block & block) { std::unordered_set nested_table_names; diff --git a/src/DataTypes/NestedUtils.h b/src/DataTypes/NestedUtils.h index d16e309fc81..9ed48920ce2 100644 --- a/src/DataTypes/NestedUtils.h +++ b/src/DataTypes/NestedUtils.h @@ -12,6 +12,7 @@ namespace Nested std::string concatenateName(const std::string & nested_table_name, const std::string & nested_field_name); std::pair splitName(const std::string & name); + std::pair splitName(const std::string_view & name); /// Returns the prefix of the name to the first '.'. Or the name is unchanged if there is no dot. std::string extractTableName(const std::string & nested_name); diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index 47b4bed9b31..442df47a773 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -79,7 +79,7 @@ void SerializationAggregateFunction::deserializeBinaryBulk(IColumn & column, Rea ColumnAggregateFunction::Container & vec = real_column.getData(); Arena & arena = real_column.createOrGetArena(); - real_column.set(function); + real_column.set(function, version); vec.reserve(vec.size() + limit); size_t size_of_state = function->sizeOfData(); diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 5e2b31ebb9d..261d0ff3c5d 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -394,12 +394,65 @@ template ReturnType SerializationNullable::deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested) { - return safeDeserialize(column, *nested, - [&istr] + if (istr.eof() || (*istr.position() != 'N' && *istr.position() != 'n')) + { + /// This is not null, surely. + return safeDeserialize(column, *nested, + [] { return false; }, + [&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeTextQuoted(nested_column, istr, settings); }); + } + + /// Check if we have enough data in buffer to check if it's a null. + if (istr.available() >= 4) + { + auto check_for_null = [&istr]() { - return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); - }, - [&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeTextQuoted(nested_column, istr, settings); }); + auto * pos = istr.position(); + if (checkStringCaseInsensitive("NULL", istr)) + return true; + istr.position() = pos; + return false; + }; + auto deserialize_nested = [&nested, &settings, &istr] (IColumn & nested_column) + { + nested->deserializeTextQuoted(nested_column, istr, settings); + }; + return safeDeserialize(column, *nested, check_for_null, deserialize_nested); + } + + /// We don't have enough data in buffer to check if it's a NULL + /// and we cannot check it just by one symbol (otherwise we won't be able + /// to differentiate for example NULL and NaN for float) + /// Use PeekableReadBuffer to make a checkpoint before checking + /// null and rollback if the check was failed. + PeekableReadBuffer buf(istr, true); + auto check_for_null = [&buf]() + { + buf.setCheckpoint(); + SCOPE_EXIT(buf.dropCheckpoint()); + if (checkStringCaseInsensitive("NULL", buf)) + return true; + + buf.rollbackToCheckpoint(); + return false; + }; + + auto deserialize_nested = [&nested, &settings, &buf] (IColumn & nested_column) + { + nested->deserializeTextQuoted(nested_column, buf, settings); + /// Check that we don't have any unread data in PeekableReadBuffer own memory. + if (likely(!buf.hasUnreadData())) + return; + + /// We have some unread data in PeekableReadBuffer own memory. + /// It can happen only if there is an unquoted string instead of a number. + throw DB::ParsingException( + ErrorCodes::CANNOT_READ_ALL_DATA, + "Error while parsing Nullable: got an unquoted string {} instead of a number", + String(buf.position(), std::min(10ul, buf.available()))); + }; + + return safeDeserialize(column, *nested, check_for_null, deserialize_nested); } diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index 09d3752b180..532691f7978 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -11,7 +11,7 @@ namespace DB { -TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const ASTPtr & ast) +TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast) { assert(global_context == global_context->getGlobalContext()); TableLoadingDependenciesVisitor::Data data; @@ -20,6 +20,7 @@ TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const data.global_context = global_context; TableLoadingDependenciesVisitor visitor{data}; visitor.visit(ast); + data.dependencies.erase(table); return data.dependencies; } @@ -132,7 +133,10 @@ void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & func } if (qualified_name.database.empty()) + { + /// It can be table/dictionary from default database or XML dictionary, but we cannot distinguish it here. qualified_name.database = data.default_database; + } data.dependencies.emplace(std::move(qualified_name)); } diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index b5ca976f665..ae7f7aa94d9 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -12,7 +12,7 @@ class ASTStorage; using TableNamesSet = std::unordered_set; -TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const ASTPtr & ast); +TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast); /// Visits ASTCreateQuery and extracts names of table (or dictionary) dependencies /// from column default expressions (joinGet, dictGet, etc) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index c898d5ee943..cb0c1cdae95 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -43,6 +43,7 @@ DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, c , db_uuid(uuid) { assert(db_uuid != UUIDHelpers::Nil); + fs::create_directories(fs::path(getContext()->getPath()) / "metadata"); fs::create_directories(path_to_table_symlinks); tryCreateMetadataSymlink(); } diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 4b3e06e318e..1ff84b53eee 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -269,6 +269,7 @@ StoragePtr DatabaseLazy::loadTable(const String & table_name) const } void DatabaseLazy::clearExpiredTables() const +try { std::lock_guard lock(mutex); auto time_now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); @@ -303,6 +304,10 @@ void DatabaseLazy::clearExpiredTables() const cache_expiration_queue.splice(cache_expiration_queue.begin(), busy_tables, busy_tables.begin(), busy_tables.end()); } +catch (...) +{ + tryLogCurrentException(log, __PRETTY_FUNCTION__); +} DatabaseLazyIterator::DatabaseLazyIterator(const DatabaseLazy & database_, Strings && table_names_) diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 50e56885743..3309d25b1c2 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -121,7 +121,7 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl throw Exception(ErrorCodes::UNKNOWN_TABLE, "Cannot alter: There is no metadata of table {}", table_id.getNameForLogs()); applyMetadataChangesToCreateQuery(it->second, metadata); - TableNamesSet new_dependencies = getDependenciesSetFromCreateQuery(local_context->getGlobalContext(), it->second); + TableNamesSet new_dependencies = getDependenciesSetFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second); DatabaseCatalog::instance().updateLoadingDependencies(table_id, std::move(new_dependencies)); } diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index b7a0aff24d6..b5557d9a08d 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -181,8 +181,8 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables return; } - TableNamesSet loading_dependencies = getDependenciesSetFromCreateQuery(getContext(), ast); QualifiedTableName qualified_name{database_name, create_query->getTable()}; + TableNamesSet loading_dependencies = getDependenciesSetFromCreateQuery(getContext(), qualified_name, ast); std::lock_guard lock{metadata.mutex}; metadata.parsed_tables[qualified_name] = ParsedTableMetadata{full_path.string(), ast}; @@ -297,7 +297,7 @@ void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & ta out.close(); } - TableNamesSet new_dependencies = getDependenciesSetFromCreateQuery(local_context->getGlobalContext(), ast); + TableNamesSet new_dependencies = getDependenciesSetFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast); DatabaseCatalog::instance().updateLoadingDependencies(table_id, std::move(new_dependencies)); commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path, statement, local_context); diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index b8c380b7be1..4ce719279f3 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -133,10 +133,14 @@ void TablesLoader::removeUnresolvableDependencies(bool remove_loaded) /// Table exists and it's already loaded if (DatabaseCatalog::instance().isTableExist(StorageID(dependency_name.database, dependency_name.table), global_context)) return remove_loaded; - /// It's XML dictionary. It was loaded before tables and DDL dictionaries. + /// It's XML dictionary. if (dependency_name.database == metadata.default_database && global_context->getExternalDictionariesLoader().has(dependency_name.table)) - return remove_loaded; + { + LOG_WARNING(log, "Tables {} depend on XML dictionary {}, but XML dictionaries are loaded independently." + "Consider converting it to DDL dictionary.", fmt::join(info.dependent_database_objects, ", "), dependency_name); + return true; + } /// Some tables depends on table "dependency_name", but there is no such table in DatabaseCatalog and we don't have its metadata. /// We will ignore it and try to load dependent tables without "dependency_name" diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index edca02b83ad..1ddcdd96454 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -230,74 +230,64 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) std::string settings_config_prefix = config_prefix + ".clickhouse"; - std::unique_ptr configuration; + std::string host = config.getString(settings_config_prefix + ".host", "localhost"); + std::string user = config.getString(settings_config_prefix + ".user", "default"); + std::string password = config.getString(settings_config_prefix + ".password", ""); + std::string db = config.getString(settings_config_prefix + ".db", default_database); + std::string table = config.getString(settings_config_prefix + ".table", ""); + UInt16 port = static_cast(config.getUInt(settings_config_prefix + ".port", default_port)); + auto named_collection = created_from_ddl ? getExternalDataSourceConfiguration(config, settings_config_prefix, global_context) : std::nullopt; + if (named_collection) { - std::string host = named_collection->host; - UInt16 port = named_collection->port; - configuration = std::make_unique( - ClickHouseDictionarySource::Configuration{ - .host = host, - .user = named_collection->username, - .password = named_collection->password, - .db = named_collection->database, - .table = named_collection->table, - .query = config.getString(settings_config_prefix + ".query", ""), - .where = config.getString(settings_config_prefix + ".where", ""), - .invalidate_query = config.getString(settings_config_prefix + ".invalidate_query", ""), - .update_field = config.getString(settings_config_prefix + ".update_field", ""), - .update_lag = config.getUInt64(settings_config_prefix + ".update_lag", 1), - .port = port, - .is_local = isLocalAddress({host, port}, default_port), - .secure = config.getBool(settings_config_prefix + ".secure", false) - }); - } - else - { - std::string host = config.getString(settings_config_prefix + ".host", "localhost"); - UInt16 port = static_cast(config.getUInt(settings_config_prefix + ".port", default_port)); - configuration = std::make_unique( - ClickHouseDictionarySource::Configuration{ - .host = host, - .user = config.getString(settings_config_prefix + ".user", "default"), - .password = config.getString(settings_config_prefix + ".password", ""), - .db = config.getString(settings_config_prefix + ".db", default_database), - .table = config.getString(settings_config_prefix + ".table", ""), - .query = config.getString(settings_config_prefix + ".query", ""), - .where = config.getString(settings_config_prefix + ".where", ""), - .invalidate_query = config.getString(settings_config_prefix + ".invalidate_query", ""), - .update_field = config.getString(settings_config_prefix + ".update_field", ""), - .update_lag = config.getUInt64(settings_config_prefix + ".update_lag", 1), - .port = port, - .is_local = isLocalAddress({host, port}, default_port), - .secure = config.getBool(settings_config_prefix + ".secure", false) - }); + host = named_collection->host; + user = named_collection->username; + password = named_collection->password; + db = named_collection->database; + table = named_collection->table; + port = named_collection->port; } + ClickHouseDictionarySource::Configuration configuration{ + .host = host, + .user = user, + .password = password, + .db = db, + .table = table, + .query = config.getString(settings_config_prefix + ".query", ""), + .where = config.getString(settings_config_prefix + ".where", ""), + .invalidate_query = config.getString(settings_config_prefix + ".invalidate_query", ""), + .update_field = config.getString(settings_config_prefix + ".update_field", ""), + .update_lag = config.getUInt64(settings_config_prefix + ".update_lag", 1), + .port = port, + .is_local = isLocalAddress({host, port}, default_port), + .secure = config.getBool(settings_config_prefix + ".secure", false)}; + ContextMutablePtr context; - if (configuration->is_local) + if (configuration.is_local) { /// We should set user info even for the case when the dictionary is loaded in-process (without TCP communication). Session session(global_context, ClientInfo::Interface::LOCAL); - session.authenticate(configuration->user, configuration->password, {}); + session.authenticate(configuration.user, configuration.password, {}); context = session.makeQueryContext(); } else { context = Context::createCopy(global_context); } + context->applySettingsChanges(readSettingsFromDictionaryConfig(config, config_prefix)); String dictionary_name = config.getString(".dictionary.name", ""); String dictionary_database = config.getString(".dictionary.database", ""); - if (dictionary_name == configuration->table && dictionary_database == configuration->db) + if (dictionary_name == configuration.table && dictionary_database == configuration.db) throw Exception(ErrorCodes::BAD_ARGUMENTS, "ClickHouseDictionarySource table cannot be dictionary table"); - return std::make_unique(dict_struct, *configuration, sample_block, context); + return std::make_unique(dict_struct, configuration, sample_block, context); }; factory.registerSource("clickhouse", create_table_source); diff --git a/src/Dictionaries/ExternalQueryBuilder.cpp b/src/Dictionaries/ExternalQueryBuilder.cpp index 9ddaaeb573a..f513c7b2f61 100644 --- a/src/Dictionaries/ExternalQueryBuilder.cpp +++ b/src/Dictionaries/ExternalQueryBuilder.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +static constexpr std::string_view CONDITION_PLACEHOLDER_TO_REPLACE_VALUE = "{condition}"; ExternalQueryBuilder::ExternalQueryBuilder( const DictionaryStructure & dict_struct_, @@ -215,7 +216,7 @@ std::string ExternalQueryBuilder::composeUpdateQuery(const std::string & update_ { writeString(query, out); - auto condition_position = query.find("{condition}"); + auto condition_position = query.find(CONDITION_PLACEHOLDER_TO_REPLACE_VALUE); if (condition_position == std::string::npos) { writeString(" WHERE ", out); @@ -230,7 +231,7 @@ std::string ExternalQueryBuilder::composeUpdateQuery(const std::string & update_ const auto & condition_value = condition_value_buffer.str(); auto query_copy = query; - query_copy.replace(condition_position, condition_value.size(), condition_value); + query_copy.replace(condition_position, CONDITION_PLACEHOLDER_TO_REPLACE_VALUE.size(), condition_value); return query_copy; } @@ -300,7 +301,7 @@ std::string ExternalQueryBuilder::composeLoadIdsQuery(const std::vector { writeString(query, out); - auto condition_position = query.find("{condition}"); + auto condition_position = query.find(CONDITION_PLACEHOLDER_TO_REPLACE_VALUE); if (condition_position == std::string::npos) { writeString(" WHERE ", out); @@ -315,7 +316,7 @@ std::string ExternalQueryBuilder::composeLoadIdsQuery(const std::vector const auto & condition_value = condition_value_buffer.str(); auto query_copy = query; - query_copy.replace(condition_position, condition_value.size(), condition_value); + query_copy.replace(condition_position, CONDITION_PLACEHOLDER_TO_REPLACE_VALUE.size(), condition_value); return query_copy; } @@ -391,7 +392,7 @@ std::string ExternalQueryBuilder::composeLoadKeysQuery( { writeString(query, out); - auto condition_position = query.find("{condition}"); + auto condition_position = query.find(CONDITION_PLACEHOLDER_TO_REPLACE_VALUE); if (condition_position == std::string::npos) { writeString(" WHERE ", out); @@ -406,7 +407,7 @@ std::string ExternalQueryBuilder::composeLoadKeysQuery( const auto & condition_value = condition_value_buffer.str(); auto query_copy = query; - query_copy.replace(condition_position, condition_value.size(), condition_value); + query_copy.replace(condition_position, CONDITION_PLACEHOLDER_TO_REPLACE_VALUE.size(), condition_value); return query_copy; } diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 42d6a0c0c03..7dc955eb8f7 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -78,12 +78,14 @@ RangeHashedDictionary::RangeHashedDictionary( const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, const DictionaryLifetime dict_lifetime_, - bool require_nonempty_) + bool require_nonempty_, + BlockPtr update_field_loaded_block_) : IDictionary(dict_id_) , dict_struct(dict_struct_) , source_ptr{std::move(source_ptr_)} , dict_lifetime(dict_lifetime_) , require_nonempty(require_nonempty_) + , update_field_loaded_block(std::move(update_field_loaded_block_)) { createAttributes(); loadData(); @@ -295,7 +297,6 @@ void RangeHashedDictionary::createAttributes() for (const auto & attribute : dict_struct.attributes) { - attribute_index_by_name.emplace(attribute.name, attributes.size()); attributes.push_back(createAttribute(attribute)); if (attribute.hierarchical) @@ -307,68 +308,21 @@ void RangeHashedDictionary::createAttributes() template void RangeHashedDictionary::loadData() { - QueryPipeline pipeline(source_ptr->loadAll()); - - PullingPipelineExecutor executor(pipeline); - Block block; - while (executor.pull(block)) + if (!source_ptr->hasUpdateField()) { - size_t skip_keys_size_offset = dict_struct.getKeysSize(); + QueryPipeline pipeline(source_ptr->loadAll()); - Columns key_columns; - key_columns.reserve(skip_keys_size_offset); - - /// Split into keys columns and attribute columns - for (size_t i = 0; i < skip_keys_size_offset; ++i) - key_columns.emplace_back(block.safeGetByPosition(i).column); - - DictionaryKeysArenaHolder arena_holder; - DictionaryKeysExtractor keys_extractor(key_columns, arena_holder.getComplexKeyArena()); - const size_t keys_size = keys_extractor.getKeysSize(); - - element_count += keys_size; - - // Support old behaviour, where invalid date means 'open range'. - const bool is_date = isDate(dict_struct.range_min->type); - - const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset).column); - const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset + 1).column); - - skip_keys_size_offset += 2; - - for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) + PullingPipelineExecutor executor(pipeline); + Block block; + while (executor.pull(block)) { - const auto & attribute_column = *block.safeGetByPosition(attribute_index + skip_keys_size_offset).column; - auto & attribute = attributes[attribute_index]; - - for (size_t key_index = 0; key_index < keys_size; ++key_index) - { - auto key = keys_extractor.extractCurrentKey(); - - RangeStorageType lower_bound; - RangeStorageType upper_bound; - - if (is_date) - { - lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, 0); - upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, DATE_LUT_MAX_DAY_NUM + 1); - } - else - { - lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, RANGE_MIN_NULL_VALUE); - upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, RANGE_MAX_NULL_VALUE); - } - - if constexpr (std::is_same_v) - key = copyKeyInArena(key); - - setAttributeValue(attribute, key, Range{lower_bound, upper_bound}, attribute_column[key_index]); - keys_extractor.rollbackCurrentKey(); - } - - keys_extractor.reset(); + blockToAttributes(block); } } + else + { + updateData(); + } if (require_nonempty && 0 == element_count) throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY, @@ -401,6 +355,9 @@ void RangeHashedDictionary::calculateBytesAllocated() if constexpr (dictionary_key_type == DictionaryKeyType::Complex) bytes_allocated += complex_key_arena.size(); + + if (update_field_loaded_block) + bytes_allocated += update_field_loaded_block->allocatedBytes(); } template @@ -497,6 +454,106 @@ void RangeHashedDictionary::getItemsImpl( found_count.fetch_add(keys_found, std::memory_order_relaxed); } +template +void RangeHashedDictionary::updateData() +{ + if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) + { + QueryPipeline pipeline(source_ptr->loadUpdatedAll()); + + PullingPipelineExecutor executor(pipeline); + Block block; + while (executor.pull(block)) + { + /// We are using this to keep saved data if input stream consists of multiple blocks + if (!update_field_loaded_block) + update_field_loaded_block = std::make_shared(block.cloneEmpty()); + + for (size_t attribute_index = 0; attribute_index < block.columns(); ++attribute_index) + { + const IColumn & update_column = *block.getByPosition(attribute_index).column.get(); + MutableColumnPtr saved_column = update_field_loaded_block->getByPosition(attribute_index).column->assumeMutable(); + saved_column->insertRangeFrom(update_column, 0, update_column.size()); + } + } + } + else + { + static constexpr size_t range_columns_size = 2; + + auto pipe = source_ptr->loadUpdatedAll(); + mergeBlockWithPipe( + dict_struct.getKeysSize() + range_columns_size, + *update_field_loaded_block, + std::move(pipe)); + } + + if (update_field_loaded_block) + { + blockToAttributes(*update_field_loaded_block.get()); + } +} + +template +void RangeHashedDictionary::blockToAttributes(const Block & block [[maybe_unused]]) +{ + size_t skip_keys_size_offset = dict_struct.getKeysSize(); + + Columns key_columns; + key_columns.reserve(skip_keys_size_offset); + + /// Split into keys columns and attribute columns + for (size_t i = 0; i < skip_keys_size_offset; ++i) + key_columns.emplace_back(block.safeGetByPosition(i).column); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + + element_count += keys_size; + + // Support old behaviour, where invalid date means 'open range'. + const bool is_date = isDate(dict_struct.range_min->type); + + const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset).column); + const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset + 1).column); + + skip_keys_size_offset += 2; + + for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) + { + const auto & attribute_column = *block.safeGetByPosition(attribute_index + skip_keys_size_offset).column; + auto & attribute = attributes[attribute_index]; + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + + RangeStorageType lower_bound; + RangeStorageType upper_bound; + + if (is_date) + { + lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, 0); + upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, DATE_LUT_MAX_DAY_NUM + 1); + } + else + { + lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, RANGE_MIN_NULL_VALUE); + upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, RANGE_MAX_NULL_VALUE); + } + + if constexpr (std::is_same_v) + key = copyKeyInArena(key); + + setAttributeValue(attribute, key, Range{lower_bound, upper_bound}, attribute_column[key_index]); + keys_extractor.rollbackCurrentKey(); + } + + keys_extractor.reset(); + } +} + template template void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value) diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 1ccd9708d79..1605e2bab81 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -39,7 +39,8 @@ public: const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, const DictionaryLifetime dict_lifetime_, - bool require_nonempty_); + bool require_nonempty_, + BlockPtr update_field_loaded_block_ = nullptr); std::string getTypeName() const override { return "RangeHashed"; } @@ -63,7 +64,7 @@ public: std::shared_ptr clone() const override { - return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty); + return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, update_field_loaded_block); } const IDictionarySource * getSource() const override { return source_ptr.get(); } @@ -156,6 +157,10 @@ private: ValueSetter && set_value, DefaultValueExtractor & default_value_extractor) const; + void updateData(); + + void blockToAttributes(const Block & block); + template static void setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value); @@ -185,8 +190,8 @@ private: const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; const bool require_nonempty; + BlockPtr update_field_loaded_block; - std::map attribute_index_by_name; std::vector attributes; Arena complex_key_arena; diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index 5264e6413e7..c116a62a977 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -97,7 +97,7 @@ std::unique_ptr DiskHDFS::writeFile(const String & path /// Single O_WRONLY in libhdfs adds O_TRUNC auto hdfs_buffer = std::make_unique(hdfs_path, - config, buf_size, + config, settings->replication, buf_size, mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); return std::make_unique>(std::move(hdfs_buffer), @@ -142,12 +142,13 @@ bool DiskHDFS::checkUniqueId(const String & hdfs_uri) const namespace { -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings) { return std::make_unique( config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".thread_pool_size", 16), - config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); + config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), + settings.hdfs_replication); } } @@ -173,7 +174,7 @@ void registerDiskHDFS(DiskFactory & factory) return std::make_shared( name, uri, - getSettings(config, config_prefix), + getSettings(config, config_prefix, context_->getSettingsRef()), metadata_disk, config); }; diff --git a/src/Disks/HDFS/DiskHDFS.h b/src/Disks/HDFS/DiskHDFS.h index 881d6e2937c..47150f1cfd8 100644 --- a/src/Disks/HDFS/DiskHDFS.h +++ b/src/Disks/HDFS/DiskHDFS.h @@ -14,14 +14,17 @@ struct DiskHDFSSettings size_t min_bytes_for_seek; int thread_pool_size; int objects_chunk_size_to_delete; + int replication; DiskHDFSSettings( int min_bytes_for_seek_, int thread_pool_size_, - int objects_chunk_size_to_delete_) + int objects_chunk_size_to_delete_, + int replication_) : min_bytes_for_seek(min_bytes_for_seek_) , thread_pool_size(thread_pool_size_) - , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) {} + , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) + , replication(replication_) {} }; diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h index c9b6532e76c..1b0cc17cb41 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h @@ -1,9 +1,6 @@ #pragma once -#if !defined(ARCADIA_BUILD) #include -#endif - #include #include #include diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 5bc7d4e4819..f15de4a2d7f 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -1,9 +1,6 @@ #pragma once -#if !defined(ARCADIA_BUILD) #include -#endif - #include #include #include diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 2c2662a6a67..d956d9e6bfb 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -69,10 +69,7 @@ void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule esca readEscapedString(tmp, buf); break; case FormatSettings::EscapingRule::Quoted: - /// FIXME: it skips only strings, not numbers, arrays or tuples. - /// we should read until delimiter and skip all data between - /// single quotes. - readQuotedString(tmp, buf); + readQuotedFieldIntoString(tmp, buf); break; case FormatSettings::EscapingRule::CSV: readCSVString(tmp, buf, format_settings.csv); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 456e2016f4e..75b096de425 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -304,6 +304,26 @@ OutputFormatPtr FormatFactory::getOutputFormat( return format; } +String FormatFactory::getContentType( + const String & name, + ContextPtr context, + const std::optional & _format_settings) const +{ + const auto & output_getter = getCreators(name).output_creator; + if (!output_getter) + throw Exception(ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT, "Format {} is not suitable for output (with processors)", name); + + auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context); + + Block empty_block; + RowOutputFormatParams empty_params; + WriteBufferFromOwnString empty_buffer; + auto format = output_getter(empty_buffer, empty_block, empty_params, format_settings); + + return format->getContentType(); +} + + void FormatFactory::registerInputFormat(const String & name, InputCreator input_creator) { auto & target = dict[name].input_creator; diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 77ecd2c167f..ea285c47996 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -131,6 +131,11 @@ public: const Block & sample, ContextPtr context, WriteCallback callback = {}, + const std::optional & _format_settings = std::nullopt) const; + + String getContentType( + const String & name, + ContextPtr context, const std::optional & format_settings = std::nullopt) const; void registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine); diff --git a/src/Formats/ProtobufSchemas.cpp b/src/Formats/ProtobufSchemas.cpp index a6e63b1c256..9f25f830e37 100644 --- a/src/Formats/ProtobufSchemas.cpp +++ b/src/Formats/ProtobufSchemas.cpp @@ -71,6 +71,7 @@ ProtobufSchemas::~ProtobufSchemas() = default; const google::protobuf::Descriptor * ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info) { + std::lock_guard lock(mutex); auto it = importers.find(info.schemaDirectory()); if (it == importers.end()) it = importers.emplace(info.schemaDirectory(), std::make_unique(info.schemaDirectory())).first; diff --git a/src/Formats/ProtobufSchemas.h b/src/Formats/ProtobufSchemas.h index f911cb2ce4b..0a2eeea9893 100644 --- a/src/Formats/ProtobufSchemas.h +++ b/src/Formats/ProtobufSchemas.h @@ -4,6 +4,7 @@ #if USE_PROTOBUF #include +#include #include #include #include @@ -39,6 +40,7 @@ public: private: class ImporterWithSourceTree; std::unordered_map> importers; + std::mutex mutex; }; } diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index 93a3096492a..07189d0edfc 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -28,6 +28,8 @@ # include # include # include +# include +# include # include # include # include @@ -139,6 +141,15 @@ namespace } + WriteBuffer & writeIndent(WriteBuffer & out, size_t size) { return out << String(size * 4, ' '); } + + + [[noreturn]] void wrongNumberOfColumns(size_t number_of_columns, const String & expected) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong number of columns: expected {}, specified {}", expected, number_of_columns); + } + + struct ProtobufReaderOrWriter { ProtobufReaderOrWriter(ProtobufReader & reader_) : reader(&reader_) {} // NOLINT(google-explicit-constructor) @@ -152,8 +163,12 @@ namespace class ProtobufSerializerSingleValue : public ProtobufSerializer { protected: - ProtobufSerializerSingleValue(const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : field_descriptor(field_descriptor_) + ProtobufSerializerSingleValue( + const std::string_view & column_name_, + const FieldDescriptor & field_descriptor_, + const ProtobufReaderOrWriter & reader_or_writer_) + : column_name(column_name_) + , field_descriptor(field_descriptor_) , field_typeid(field_descriptor_.type()) , field_tag(field_descriptor.number()) , reader(reader_or_writer_.reader) @@ -164,13 +179,15 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; } void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]->getPtr(); } @@ -259,14 +276,28 @@ namespace return result; } + [[noreturn]] void incompatibleColumnType(const std::string_view & column_type) const + { + throw Exception( + ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD, + "The column {} ({}) cannot be serialized to the field {} ({}) due to their types are not compatible", + quoteString(column_name), + column_type, + quoteString(field_descriptor.full_name()), + field_descriptor.type_name()); + } + [[noreturn]] void cannotConvertValue(const std::string_view & src_value, const std::string_view & src_type_name, const std::string_view & dest_type_name) const { throw Exception( - "Could not convert value '" + String{src_value} + "' from type " + String{src_type_name} + " to type " + String{dest_type_name} + - " while " + (reader ? "reading" : "writing") + " field " + field_descriptor.name(), + "Could not convert value '" + String{src_value} + "' from type " + String{src_type_name} + " to type " + + String{dest_type_name} + " while " + (reader ? "reading" : "writing") + " field " + + quoteString(field_descriptor.name()) + " " + (reader ? "for inserting into" : "extracted from") + " column " + + quoteString(column_name), ErrorCodes::PROTOBUF_BAD_CAST); } + const String column_name; const FieldDescriptor & field_descriptor; const FieldTypeId field_typeid; const int field_tag; @@ -289,8 +320,8 @@ namespace public: using ColumnType = ColumnVector; - ProtobufSerializerNumber(const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + ProtobufSerializerNumber(const std::string_view & column_name_, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) { setFunctions(); } @@ -319,6 +350,13 @@ namespace column_vector.insertValue(getDefaultNumber()); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerNumber<" << TypeName << ">: column " << quoteString(column_name) + << " -> field " << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() + << ")\n"; + } + private: void setFunctions() { @@ -469,7 +507,7 @@ namespace case FieldTypeId::TYPE_ENUM: { if (std::is_floating_point_v) - failedToSetFunctions(); + incompatibleColumnType(TypeName); write_function = [this](NumberType value) { @@ -484,18 +522,10 @@ namespace } default: - failedToSetFunctions(); + incompatibleColumnType(TypeName); } } - [[noreturn]] void failedToSetFunctions() const - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type " + quoteString(TypeName), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } - NumberType getDefaultNumber() { if (!default_number) @@ -529,10 +559,11 @@ namespace using ColumnType = std::conditional_t; ProtobufSerializerString( + const std::string_view & column_name_, const std::shared_ptr & fixed_string_data_type_, const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) , fixed_string_data_type(fixed_string_data_type_) , n(fixed_string_data_type->getN()) { @@ -542,8 +573,10 @@ namespace } ProtobufSerializerString( - const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + const std::string_view & column_name_, + const google::protobuf::FieldDescriptor & field_descriptor_, + const ProtobufReaderOrWriter & reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) { static_assert(!is_fixed_string, "This constructor for String only"); setFunctions(); @@ -649,6 +682,13 @@ namespace } } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerString<" << (is_fixed_string ? "fixed" : "") << ">: column " + << quoteString(column_name) << " -> field " << quoteString(field_descriptor.full_name()) << " (" + << field_descriptor.type_name() << ")\n"; + } + private: void setFunctions() { @@ -799,18 +839,10 @@ namespace } default: - failedToSetFunctions(); + this->incompatibleColumnType(is_fixed_string ? "FixedString" : "String"); } } - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type " + quoteString(is_fixed_string ? "FixedString" : "String"), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } - const PaddedPODArray & getDefaultString() { if (!default_string) @@ -890,16 +922,24 @@ namespace using BaseClass = ProtobufSerializerNumber; ProtobufSerializerEnum( + const std::string_view & column_name_, const std::shared_ptr & enum_data_type_, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : BaseClass(field_descriptor_, reader_or_writer_), enum_data_type(enum_data_type_) + : BaseClass(column_name_, field_descriptor_, reader_or_writer_), enum_data_type(enum_data_type_) { assert(enum_data_type); setFunctions(); prepareEnumMapping(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerEnum<" << TypeName << ">: column " << quoteString(this->column_name) + << " -> field " << quoteString(this->field_descriptor.full_name()) << " (" + << this->field_descriptor.type_name() << ")\n"; + } + private: void setFunctions() { @@ -964,18 +1004,10 @@ namespace } default: - failedToSetFunctions(); + this->incompatibleColumnType(enum_data_type->getName()); } } - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(this->field_descriptor.full_name()) + " has an incompatible type " + this->field_descriptor.type_name() - + " for serialization of the data type " + quoteString(enum_data_type->getName()), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } - void checkEnumDataTypeValue(NumberType value) { enum_data_type->findByValue(value); /// Throws an exception if the value isn't defined in the DataTypeEnum. @@ -1089,10 +1121,11 @@ namespace using ColumnType = ColumnDecimal; ProtobufSerializerDecimal( + const std::string_view & column_name_, const DataTypeDecimalBase & decimal_data_type_, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) , precision(decimal_data_type_.getPrecision()) , scale(decimal_data_type_.getScale()) { @@ -1123,6 +1156,13 @@ namespace column_decimal.insertValue(getDefaultDecimal()); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerDecimal<" << TypeName << ">: column " << quoteString(column_name) + << " -> field " << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() + << ")\n"; + } + private: void setFunctions() { @@ -1227,7 +1267,7 @@ namespace case FieldTypeId::TYPE_BOOL: { if (std::is_same_v) - failedToSetFunctions(); + incompatibleColumnType(TypeName); else { write_function = [this](const DecimalType & decimal) @@ -1281,18 +1321,10 @@ namespace } default: - failedToSetFunctions(); + incompatibleColumnType(TypeName); } } - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type " + quoteString(TypeName), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } - DecimalType getDefaultDecimal() { if (!default_decimal) @@ -1349,13 +1381,20 @@ namespace { public: ProtobufSerializerDate( + const std::string_view & column_name_, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerNumber(field_descriptor_, reader_or_writer_) + : ProtobufSerializerNumber(column_name_, field_descriptor_, reader_or_writer_) { setFunctions(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerDate: column " << quoteString(column_name) << " -> field " + << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() << ")\n"; + } + private: void setFunctions() { @@ -1395,7 +1434,7 @@ namespace } default: - failedToSetFunctions(); + incompatibleColumnType("Date"); } } @@ -1412,14 +1451,6 @@ namespace readDateText(date, buf); return date; } - - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type 'Date'", - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } }; @@ -1428,15 +1459,22 @@ namespace { public: ProtobufSerializerDateTime( + const std::string_view & column_name_, const DataTypeDateTime & type, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerNumber(field_descriptor_, reader_or_writer_), + : ProtobufSerializerNumber(column_name_, field_descriptor_, reader_or_writer_), date_lut(type.getTimeZone()) { setFunctions(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerDateTime: column " << quoteString(column_name) << " -> field " + << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() << ")\n"; + } + protected: const DateLUTImpl & date_lut; @@ -1478,7 +1516,7 @@ namespace } default: - failedToSetFunctions(); + incompatibleColumnType("DateTime"); } } @@ -1497,14 +1535,6 @@ namespace tm = 0; return tm; } - - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type 'DateTime'", - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } }; @@ -1513,9 +1543,10 @@ namespace { public: ProtobufSerializerUUID( + const std::string_view & column_name_, const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) { setFunctions(); } @@ -1544,16 +1575,17 @@ namespace column_vector.insertDefault(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerUUID: column " << quoteString(column_name) << " -> field " + << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() << ")\n"; + } + private: void setFunctions() { if ((field_typeid != FieldTypeId::TYPE_STRING) && (field_typeid != FieldTypeId::TYPE_BYTES)) - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type UUID", - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } + incompatibleColumnType("UUID"); write_function = [this](UUID value) { @@ -1591,20 +1623,16 @@ namespace { public: ProtobufSerializerAggregateFunction( + const std::string_view & column_name_, const std::shared_ptr & aggregate_function_data_type_, const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) , aggregate_function_data_type(aggregate_function_data_type_) , aggregate_function(aggregate_function_data_type->getFunction()) { if ((field_typeid != FieldTypeId::TYPE_STRING) && (field_typeid != FieldTypeId::TYPE_BYTES)) - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type " + quoteString(aggregate_function_data_type->getName()), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } + incompatibleColumnType(aggregate_function_data_type->getName()); } void writeRow(size_t row_num) override @@ -1642,6 +1670,12 @@ namespace column_af.getData().push_back(data); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerAggregateFunction: column " << quoteString(column_name) << " -> field " + << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() << ")\n"; + } + private: void dataToString(ConstAggregateDataPtr data, String & str) const { @@ -1684,7 +1718,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; const auto & column_nullable = assert_cast(*column); ColumnPtr nested_column = column_nullable.getNestedColumnPtr(); @@ -1693,7 +1728,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -1744,6 +1780,12 @@ namespace column_nullable.insertDefault(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerNullable ->\n"; + nested_serializer->describeTree(out, indent + 1); + } + private: const std::unique_ptr nested_serializer; ColumnPtr column; @@ -1761,7 +1803,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); const auto & column_map = assert_cast(*columns[0]); ColumnPtr nested_column = column_map.getNestedColumnPtr(); nested_serializer->setColumns(&nested_column, 1); @@ -1769,7 +1812,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -1778,6 +1822,12 @@ namespace void readRow(size_t row_num) override { nested_serializer->readRow(row_num); } void insertDefaults(size_t row_num) override { nested_serializer->insertDefaults(row_num); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerMap ->\n"; + nested_serializer->describeTree(out, indent + 1); + } + private: const std::unique_ptr nested_serializer; }; @@ -1794,7 +1844,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; const auto & column_lc = assert_cast(*column); ColumnPtr nested_column = column_lc.getDictionary().getNestedColumn(); @@ -1804,7 +1855,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -1862,6 +1914,12 @@ namespace column_lc.insertFromFullColumn(*default_value_column, 0); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerLowCardinality ->\n"; + nested_serializer->describeTree(out, indent + 1); + } + private: const std::unique_ptr nested_serializer; ColumnPtr column; @@ -1882,7 +1940,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; const auto & column_array = assert_cast(*column); ColumnPtr data_column = column_array.getDataPtr(); @@ -1891,7 +1950,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -1944,6 +2004,12 @@ namespace column_array.insertDefault(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerArray ->\n"; + element_serializer->describeTree(out, indent + 1); + } + private: const std::unique_ptr element_serializer; ColumnPtr column; @@ -1955,10 +2021,12 @@ namespace { public: ProtobufSerializerTupleAsArray( + const std::string_view & column_name_, const std::shared_ptr & tuple_data_type_, const FieldDescriptor & field_descriptor_, std::vector> element_serializers_) - : tuple_data_type(tuple_data_type_) + : column_name(column_name_) + , tuple_data_type(tuple_data_type_) , tuple_size(tuple_data_type->getElements().size()) , field_descriptor(field_descriptor_) , element_serializers(std::move(element_serializers_)) @@ -1969,7 +2037,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; const auto & column_tuple = assert_cast(*column); for (size_t i : collections::range(tuple_size)) @@ -1982,7 +2051,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -2006,9 +2076,12 @@ namespace if (current_element_index >= tuple_size) { throw Exception( - "Too many (" + std::to_string(current_element_index) + ") elements was read from the field " - + field_descriptor.full_name() + " to fit in the data type " + tuple_data_type->getName(), - ErrorCodes::PROTOBUF_BAD_CAST); + ErrorCodes::PROTOBUF_BAD_CAST, + "Column {}: More than {} elements was read from the field {} to fit in the data type {}", + quoteString(column_name), + tuple_size, + quoteString(field_descriptor.full_name()), + tuple_data_type->getName()); } element_serializers[current_element_index]->readRow(row_num); @@ -2040,7 +2113,17 @@ namespace } } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerTupleAsArray: column " << quoteString(column_name) << " (" + << tuple_data_type->getName() << ") -> field " << quoteString(field_descriptor.full_name()) << " (" + << field_descriptor.type_name() << ") ->\n"; + for (const auto & element_serializer : element_serializers) + element_serializer->describeTree(out, indent + 1); + } + private: + const String column_name; const std::shared_ptr tuple_data_type; const size_t tuple_size; const FieldDescriptor & field_descriptor; @@ -2062,12 +2145,14 @@ namespace }; ProtobufSerializerMessage( - std::vector field_descs_, + std::vector && field_descs_, const FieldDescriptor * parent_field_descriptor_, bool with_length_delimiter_, + std::unique_ptr missing_columns_filler_, const ProtobufReaderOrWriter & reader_or_writer_) : parent_field_descriptor(parent_field_descriptor_) , with_length_delimiter(with_length_delimiter_) + , missing_columns_filler(std::move(missing_columns_filler_)) , should_skip_if_empty(parent_field_descriptor ? shouldSkipZeroOrEmpty(*parent_field_descriptor) : false) , reader(reader_or_writer_.reader) , writer(reader_or_writer_.writer) @@ -2085,14 +2170,18 @@ namespace void setColumns(const ColumnPtr * columns_, size_t num_columns_) override { - columns.assign(columns_, columns_ + num_columns_); + if (!num_columns_) + wrongNumberOfColumns(num_columns_, ">0"); std::vector field_columns; for (const FieldInfo & info : field_infos) { field_columns.clear(); + field_columns.reserve(info.column_indices.size()); for (size_t column_index : info.column_indices) { + if (column_index >= num_columns_) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong column index {}, expected column indices <{}", column_index, num_columns_); field_columns.emplace_back(columns_[column_index]); } info.field_serializer->setColumns(field_columns.data(), field_columns.size()); @@ -2100,15 +2189,17 @@ namespace if (reader) { - missing_column_indices.resize(num_columns_); - for (size_t column_index : collections::range(num_columns_)) - missing_column_indices[column_index] = column_index; + mutable_columns.resize(num_columns_); + for (size_t i : collections::range(num_columns_)) + mutable_columns[i] = columns_[i]->assumeMutable(); + + std::vector column_is_missing; + column_is_missing.resize(num_columns_, true); for (const FieldInfo & info : field_infos) - { - for (size_t column_index : info.column_indices) - missing_column_indices[column_index] = static_cast(-1); - } - boost::range::remove_erase(missing_column_indices, static_cast(-1)); + for (size_t i : info.column_indices) + column_is_missing[i] = false; + + has_missing_columns = (std::find(column_is_missing.begin(), column_is_missing.end(), true) != column_is_missing.end()); } } @@ -2157,7 +2248,7 @@ namespace { last_field_index = 0; last_field_tag = field_infos[0].field_tag; - size_t old_size = columns.empty() ? 0 : columns[0]->size(); + size_t old_size = mutable_columns.empty() ? 0 : mutable_columns[0]->size(); try { @@ -2182,10 +2273,10 @@ namespace } catch (...) { - for (auto & column : columns) + for (auto & column : mutable_columns) { if (column->size() > old_size) - column->assumeMutableRef().popBack(column->size() - old_size); + column->popBack(column->size() - old_size); } throw; } @@ -2195,6 +2286,7 @@ namespace reader->endNestedMessage(); else reader->endMessage(false); + addDefaultsToMissingColumns(row_num); } @@ -2205,6 +2297,32 @@ namespace addDefaultsToMissingColumns(row_num); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + size_t num_columns = 0; + for (const auto & field_info : field_infos) + num_columns += field_info.column_indices.size(); + + writeIndent(out, indent) << "ProtobufSerializerMessage: " << num_columns << " columns ->"; + if (parent_field_descriptor) + out << " field " << quoteString(parent_field_descriptor->full_name()) << " (" << parent_field_descriptor->type_name() << ")"; + + for (size_t i = 0; i != field_infos.size(); ++i) + { + out << "\n"; + const auto & field_info = field_infos[i]; + writeIndent(out, indent + 1) << "Columns #"; + for (size_t j = 0; j != field_info.column_indices.size(); ++j) + { + if (j) + out << ", "; + out << field_info.column_indices[j]; + } + out << " ->\n"; + field_info.field_serializer->describeTree(out, indent + 2); + } + } + private: size_t findFieldIndexByFieldTag(int field_tag) { @@ -2229,19 +2347,14 @@ namespace void addDefaultsToMissingColumns(size_t row_num) { - for (size_t column_idx : missing_column_indices) - { - auto & column = columns[column_idx]; - size_t old_size = column->size(); - if (row_num >= old_size) - column->assumeMutableRef().insertDefault(); - } + if (has_missing_columns) + missing_columns_filler->addDefaults(mutable_columns, row_num); } struct FieldInfo { FieldInfo( - std::vector column_indices_, + std::vector && column_indices_, const FieldDescriptor & field_descriptor_, std::unique_ptr field_serializer_) : column_indices(std::move(column_indices_)) @@ -2261,13 +2374,14 @@ namespace const FieldDescriptor * const parent_field_descriptor; const bool with_length_delimiter; + const std::unique_ptr missing_columns_filler; const bool should_skip_if_empty; ProtobufReader * const reader; ProtobufWriter * const writer; std::vector field_infos; std::unordered_map field_index_by_field_tag; - Columns columns; - std::vector missing_column_indices; + MutableColumns mutable_columns; + bool has_missing_columns = false; int last_field_tag = 0; size_t last_field_index = static_cast(-1); }; @@ -2277,14 +2391,15 @@ namespace class ProtobufSerializerTupleAsNestedMessage : public ProtobufSerializer { public: - explicit ProtobufSerializerTupleAsNestedMessage(std::unique_ptr nested_message_serializer_) - : nested_message_serializer(std::move(nested_message_serializer_)) + explicit ProtobufSerializerTupleAsNestedMessage(std::unique_ptr message_serializer_) + : message_serializer(std::move(message_serializer_)) { } void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); const auto & column_tuple = assert_cast(*columns[0]); size_t tuple_size = column_tuple.tupleSize(); assert(tuple_size); @@ -2292,22 +2407,29 @@ namespace element_columns.reserve(tuple_size); for (size_t i : collections::range(tuple_size)) element_columns.emplace_back(column_tuple.getColumnPtr(i)); - nested_message_serializer->setColumns(element_columns.data(), element_columns.size()); + message_serializer->setColumns(element_columns.data(), element_columns.size()); } void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } - void writeRow(size_t row_num) override { nested_message_serializer->writeRow(row_num); } - void readRow(size_t row_num) override { nested_message_serializer->readRow(row_num); } - void insertDefaults(size_t row_num) override { nested_message_serializer->insertDefaults(row_num); } + void writeRow(size_t row_num) override { message_serializer->writeRow(row_num); } + void readRow(size_t row_num) override { message_serializer->readRow(row_num); } + void insertDefaults(size_t row_num) override { message_serializer->insertDefaults(row_num); } + + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerTupleAsNestedMessage ->\n"; + message_serializer->describeTree(out, indent + 1); + } private: - const std::unique_ptr nested_message_serializer; + const std::unique_ptr message_serializer; }; @@ -2317,14 +2439,23 @@ namespace { public: explicit ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages( - std::unique_ptr nested_message_serializer_) - : nested_message_serializer(std::move(nested_message_serializer_)) + const std::vector & column_names_, + const FieldDescriptor * parent_field_descriptor_, + std::unique_ptr message_serializer_, + const std::function & get_root_desc_function_) + : parent_field_descriptor(parent_field_descriptor_) + , message_serializer(std::move(message_serializer_)) + , get_root_desc_function(get_root_desc_function_) { + column_names.reserve(column_names_.size()); + for (const auto & column_name : column_names_) + column_names.emplace_back(column_name); } void setColumns(const ColumnPtr * columns, size_t num_columns) override { - assert(num_columns); + if (!num_columns) + wrongNumberOfColumns(num_columns, ">0"); data_columns.clear(); data_columns.reserve(num_columns); offset_columns.clear(); @@ -2334,13 +2465,28 @@ namespace { const auto & column_array = assert_cast(*columns[i]); data_columns.emplace_back(column_array.getDataPtr()); - offset_columns.emplace_back(column_array.getOffsetsPtr()); + + auto offset_column = column_array.getOffsetsPtr(); + if (std::binary_search(offset_columns.begin(), offset_columns.end(), offset_column)) + continue; + + /// Keep `offset_columns` sorted. + offset_columns.insert(std::upper_bound(offset_columns.begin(), offset_columns.end(), offset_column), offset_column); + + /// All the columns listed in `offset_columns` should have equal offsets. + if (i >= 1) + { + const auto & column_array0 = assert_cast(*columns[0]); + if (!column_array0.hasEqualOffsets(column_array)) + { + throw Exception(ErrorCodes::PROTOBUF_BAD_CAST, + "Column #{} {} and column #{} {} are supposed to have equal offsets according to the following serialization tree:\n{}", + 0, quoteString(column_names[0]), i, quoteString(column_names[i]), get_root_desc_function(0)); + } + } } - std::sort(offset_columns.begin(), offset_columns.end()); - offset_columns.erase(std::unique(offset_columns.begin(), offset_columns.end()), offset_columns.end()); - - nested_message_serializer->setColumns(data_columns.data(), data_columns.size()); + message_serializer->setColumns(data_columns.data(), data_columns.size()); } void setColumns(const MutableColumnPtr * columns, size_t num_columns) override @@ -2357,14 +2503,8 @@ namespace const auto & offset_column0 = assert_cast(*offset_columns[0]); size_t start_offset = offset_column0.getElement(row_num - 1); size_t end_offset = offset_column0.getElement(row_num); - for (size_t i : collections::range(1, offset_columns.size())) - { - const auto & offset_column = assert_cast(*offset_columns[i]); - if (offset_column.getElement(row_num) != end_offset) - throw Exception("Components of FlattenedNested have different sizes", ErrorCodes::PROTOBUF_BAD_CAST); - } for (size_t i : collections::range(start_offset, end_offset)) - nested_message_serializer->writeRow(i); + message_serializer->writeRow(i); } void readRow(size_t row_num) override @@ -2377,7 +2517,7 @@ namespace try { - nested_message_serializer->readRow(old_data_size); + message_serializer->readRow(old_data_size); size_t data_size = data_columns[0]->size(); if (data_size != old_data_size + 1) throw Exception("Unexpected number of elements of ColumnArray has been read", ErrorCodes::LOGICAL_ERROR); @@ -2432,8 +2572,26 @@ namespace } } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages: columns "; + for (size_t i = 0; i != column_names.size(); ++i) + { + if (i) + out << ", "; + out << "#" << i << " " << quoteString(column_names[i]); + } + out << " ->"; + if (parent_field_descriptor) + out << " field " << quoteString(parent_field_descriptor->full_name()) << " (" << parent_field_descriptor->type_name() << ") ->\n"; + message_serializer->describeTree(out, indent + 1); + } + private: - const std::unique_ptr nested_message_serializer; + Strings column_names; + const FieldDescriptor * parent_field_descriptor; + const std::unique_ptr message_serializer; + const std::function get_root_desc_function; Columns data_columns; Columns offset_columns; }; @@ -2445,24 +2603,34 @@ namespace public: explicit ProtobufSerializerBuilder(const ProtobufReaderOrWriter & reader_or_writer_) : reader_or_writer(reader_or_writer_) {} - std::unique_ptr buildMessageSerializer( + std::unique_ptr buildMessageSerializer( const Strings & column_names, const DataTypes & data_types, std::vector & missing_column_indices, const MessageDescriptor & message_descriptor, bool with_length_delimiter) { + root_serializer_ptr = std::make_shared(); + get_root_desc_function = [root_serializer_ptr = root_serializer_ptr](size_t indent) -> String + { + WriteBufferFromOwnString buf; + (*root_serializer_ptr)->describeTree(buf, indent); + return buf.str(); + }; + std::vector used_column_indices; - auto serializer = buildMessageSerializerImpl( + auto message_serializer = buildMessageSerializerImpl( /* num_columns = */ column_names.size(), column_names.data(), data_types.data(), - used_column_indices, message_descriptor, with_length_delimiter, - /* parent_field_descriptor = */ nullptr); + /* parent_field_descriptor = */ nullptr, + used_column_indices, + /* columns_are_reordered_outside = */ false, + /* check_nested_while_filling_missing_columns = */ true); - if (!serializer) + if (!message_serializer) { throw Exception( "Not found matches between the names of the columns {" + boost::algorithm::join(column_names, ", ") @@ -2473,10 +2641,18 @@ namespace missing_column_indices.clear(); missing_column_indices.reserve(column_names.size() - used_column_indices.size()); - boost::range::set_difference(collections::range(column_names.size()), used_column_indices, + auto used_column_indices_sorted = std::move(used_column_indices); + std::sort(used_column_indices_sorted.begin(), used_column_indices_sorted.end()); + boost::range::set_difference(collections::range(column_names.size()), used_column_indices_sorted, std::back_inserter(missing_column_indices)); - return serializer; + *root_serializer_ptr = message_serializer.get(); + +#if 0 + LOG_INFO(&Poco::Logger::get("ProtobufSerializer"), "Serialization tree:\n{}", get_root_desc_function(0)); +#endif + + return message_serializer; } private: @@ -2621,24 +2797,66 @@ namespace } /// Builds a serializer for a protobuf message (root or nested). - template + /// + /// Some of the passed columns might be skipped, the function sets `used_column_indices` to + /// the list of those columns which match any fields in the protobuf message. + /// + /// Normally `columns_are_reordered_outside` should be false - if it's false it means that + /// the used column indices will be passed to ProtobufSerializerMessage, which will write/read + /// only those columns and set the rest of columns by default. + /// Set `columns_are_reordered_outside` to true if you're going to reorder columns + /// according to `used_column_indices` returned and pass to + /// ProtobufSerializerMessage::setColumns() only the columns which are actually used. std::unique_ptr buildMessageSerializerImpl( size_t num_columns, - const StringOrStringViewT * column_names, + const String * column_names, const DataTypePtr * data_types, - std::vector & used_column_indices, const MessageDescriptor & message_descriptor, bool with_length_delimiter, - const FieldDescriptor * parent_field_descriptor) + const FieldDescriptor * parent_field_descriptor, + std::vector & used_column_indices, + bool columns_are_reordered_outside, + bool check_nested_while_filling_missing_columns) + { + std::vector column_names_sv; + column_names_sv.reserve(num_columns); + for (size_t i = 0; i != num_columns; ++i) + column_names_sv.emplace_back(column_names[i]); + + return buildMessageSerializerImpl( + num_columns, + column_names_sv.data(), + data_types, + message_descriptor, + with_length_delimiter, + parent_field_descriptor, + used_column_indices, + columns_are_reordered_outside, + check_nested_while_filling_missing_columns); + } + + std::unique_ptr buildMessageSerializerImpl( + size_t num_columns, + const std::string_view * column_names, + const DataTypePtr * data_types, + const MessageDescriptor & message_descriptor, + bool with_length_delimiter, + const FieldDescriptor * parent_field_descriptor, + std::vector & used_column_indices, + bool columns_are_reordered_outside, + bool check_nested_while_filling_missing_columns) { std::vector field_descs; boost::container::flat_map field_descriptors_in_use; used_column_indices.clear(); used_column_indices.reserve(num_columns); + boost::container::flat_set used_column_indices_sorted; + used_column_indices_sorted.reserve(num_columns); + size_t sequential_column_index = 0; auto add_field_serializer = [&](const std::string_view & column_name_, - std::vector column_indices_, + std::vector && column_indices_, const FieldDescriptor & field_descriptor_, std::unique_ptr field_serializer_) { @@ -2652,12 +2870,17 @@ namespace ErrorCodes::MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD); } - for (size_t column_index : column_indices_) + used_column_indices.insert(used_column_indices.end(), column_indices_.begin(), column_indices_.end()); + used_column_indices_sorted.insert(column_indices_.begin(), column_indices_.end()); + + auto column_indices_to_pass_to_message_serializer = std::move(column_indices_); + if (columns_are_reordered_outside) { - /// Keep `used_column_indices` sorted. - used_column_indices.insert(boost::range::upper_bound(used_column_indices, column_index), column_index); + for (auto & index : column_indices_to_pass_to_message_serializer) + index = sequential_column_index++; } - field_descs.push_back({std::move(column_indices_), &field_descriptor_, std::move(field_serializer_)}); + + field_descs.push_back({std::move(column_indices_to_pass_to_message_serializer), &field_descriptor_, std::move(field_serializer_)}); field_descriptors_in_use.emplace(&field_descriptor_, column_name_); }; @@ -2666,7 +2889,7 @@ namespace /// We're going through all the passed columns. for (size_t column_idx : collections::range(num_columns)) { - if (boost::range::binary_search(used_column_indices, column_idx)) + if (used_column_indices_sorted.count(column_idx)) continue; const auto & column_name = column_names[column_idx]; @@ -2702,7 +2925,7 @@ namespace for (size_t j : collections::range(column_idx + 1, num_columns)) { - if (boost::range::binary_search(used_column_indices, j)) + if (used_column_indices_sorted.count(j)) continue; std::string_view other_suffix; if (!columnNameStartsWithFieldName(column_names[j], *field_descriptor, other_suffix)) @@ -2740,10 +2963,16 @@ namespace nested_column_names.size(), nested_column_names.data(), nested_data_types.data(), - used_column_indices_in_nested, *field_descriptor->message_type(), - false, - field_descriptor); + /* with_length_delimiter = */ false, + field_descriptor, + used_column_indices_in_nested, + /* columns_are_reordered_outside = */ true, + /* check_nested_while_filling_missing_columns = */ false); + + /// `columns_are_reordered_outside` is true because column indices are + /// going to be transformed and then written to the outer message, + /// see add_field_serializer() below. if (nested_message_serializer) { @@ -2774,14 +3003,24 @@ namespace nested_column_names.size(), nested_column_names.data(), nested_data_types.data(), - used_column_indices_in_nested, *field_descriptor->message_type(), - false, - field_descriptor); + /* with_length_delimiter = */ false, + field_descriptor, + used_column_indices_in_nested, + /* columns_are_reordered_outside = */ true, + /* check_nested_while_filling_missing_columns = */ false); + + /// `columns_are_reordered_outside` is true because column indices are + /// going to be transformed and then written to the outer message, + /// see add_field_serializer() below. if (nested_message_serializer) { - auto field_serializer = std::make_unique(std::move(nested_message_serializer)); + std::vector column_names_used; + for (size_t i : used_column_indices_in_nested) + column_names_used.emplace_back(nested_column_names[i]); + auto field_serializer = std::make_unique( + std::move(column_names_used), field_descriptor, std::move(nested_message_serializer), get_root_desc_function); transformColumnIndices(used_column_indices_in_nested, nested_column_indices); add_field_serializer(column_name, std::move(used_column_indices_in_nested), *field_descriptor, std::move(field_serializer)); break; @@ -2808,8 +3047,18 @@ namespace if (field_descs.empty()) return nullptr; + std::unique_ptr missing_columns_filler; + if (reader_or_writer.reader) + { + if (check_nested_while_filling_missing_columns) + missing_columns_filler = std::make_unique(num_columns, column_names, data_types); + else + missing_columns_filler = std::make_unique(); + } + return std::make_unique( - std::move(field_descs), parent_field_descriptor, with_length_delimiter, reader_or_writer); + std::move(field_descs), parent_field_descriptor, with_length_delimiter, + std::move(missing_columns_filler), reader_or_writer); } /// Builds a serializer for one-to-one match: @@ -2823,34 +3072,34 @@ namespace auto data_type_id = data_type->getTypeId(); switch (data_type_id) { - case TypeIndex::UInt8: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt16: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt32: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt64: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt128: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt256: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int8: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int16: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int32: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int64: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int128: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int256: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Float32: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Float64: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Date: return std::make_unique(field_descriptor, reader_or_writer); - case TypeIndex::DateTime: return std::make_unique(assert_cast(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::DateTime64: return std::make_unique(assert_cast(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::String: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::FixedString: return std::make_unique>(typeid_cast>(data_type), field_descriptor, reader_or_writer); - case TypeIndex::Enum8: return std::make_unique>(typeid_cast>(data_type), field_descriptor, reader_or_writer); - case TypeIndex::Enum16: return std::make_unique>(typeid_cast>(data_type), field_descriptor, reader_or_writer); - case TypeIndex::Decimal32: return std::make_unique>(assert_cast &>(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::Decimal64: return std::make_unique>(assert_cast &>(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::Decimal128: return std::make_unique>(assert_cast &>(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::Decimal256: return std::make_unique>(assert_cast &>(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::UUID: return std::make_unique(field_descriptor, reader_or_writer); - case TypeIndex::Interval: return std::make_unique(field_descriptor, reader_or_writer); - case TypeIndex::AggregateFunction: return std::make_unique(typeid_cast>(data_type), field_descriptor, reader_or_writer); + case TypeIndex::UInt8: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt16: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt32: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt64: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt128: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt256: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int8: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int16: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int32: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int64: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int128: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int256: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Float32: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Float64: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Date: return std::make_unique(column_name, field_descriptor, reader_or_writer); + case TypeIndex::DateTime: return std::make_unique(column_name, assert_cast(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::DateTime64: return std::make_unique(column_name, assert_cast(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::String: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::FixedString: return std::make_unique>(column_name, typeid_cast>(data_type), field_descriptor, reader_or_writer); + case TypeIndex::Enum8: return std::make_unique>(column_name, typeid_cast>(data_type), field_descriptor, reader_or_writer); + case TypeIndex::Enum16: return std::make_unique>(column_name, typeid_cast>(data_type), field_descriptor, reader_or_writer); + case TypeIndex::Decimal32: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::Decimal64: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::Decimal128: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::Decimal256: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::UUID: return std::make_unique(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Interval: return std::make_unique(column_name, field_descriptor, reader_or_writer); + case TypeIndex::AggregateFunction: return std::make_unique(column_name, typeid_cast>(data_type), field_descriptor, reader_or_writer); case TypeIndex::Nullable: { @@ -2907,16 +3156,18 @@ namespace { /// Try to serialize as a nested message. std::vector used_column_indices; - auto nested_message_serializer = buildMessageSerializerImpl( + auto message_serializer = buildMessageSerializerImpl( size_of_tuple, tuple_data_type.getElementNames().data(), tuple_data_type.getElements().data(), - used_column_indices, *field_descriptor.message_type(), - false, - &field_descriptor); + /* with_length_delimiter = */ false, + &field_descriptor, + used_column_indices, + /* columns_are_reordered_outside = */ false, + /* check_nested_while_filling_missing_columns = */ false); - if (!nested_message_serializer) + if (!message_serializer) { throw Exception( "Not found matches between the names of the tuple's elements {" @@ -2926,7 +3177,7 @@ namespace ErrorCodes::NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS); } - return std::make_unique(std::move(nested_message_serializer)); + return std::make_unique(std::move(message_serializer)); } /// Serialize as a repeated field. @@ -2947,6 +3198,7 @@ namespace return nullptr; return std::make_unique( + column_name, typeid_cast>(data_type), field_descriptor, std::move(nested_serializers)); @@ -2973,6 +3225,8 @@ namespace } const ProtobufReaderOrWriter reader_or_writer; + std::function get_root_desc_function; + std::shared_ptr root_serializer_ptr; }; } diff --git a/src/Formats/ProtobufSerializer.h b/src/Formats/ProtobufSerializer.h index 315a138f9cf..3eaca6a18d6 100644 --- a/src/Formats/ProtobufSerializer.h +++ b/src/Formats/ProtobufSerializer.h @@ -15,7 +15,7 @@ class ProtobufWriter; class IDataType; using DataTypePtr = std::shared_ptr; using DataTypes = std::vector; - +class WriteBuffer; /// Utility class, does all the work for serialization in the Protobuf format. class ProtobufSerializer @@ -30,6 +30,8 @@ public: virtual void readRow(size_t row_num) = 0; virtual void insertDefaults(size_t row_num) = 0; + virtual void describeTree(WriteBuffer & out, size_t indent) const = 0; + static std::unique_ptr create( const Strings & column_names, const DataTypes & data_types, diff --git a/src/Formats/RowInputMissingColumnsFiller.cpp b/src/Formats/RowInputMissingColumnsFiller.cpp new file mode 100644 index 00000000000..ff8f9e19380 --- /dev/null +++ b/src/Formats/RowInputMissingColumnsFiller.cpp @@ -0,0 +1,140 @@ +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + + +RowInputMissingColumnsFiller::RowInputMissingColumnsFiller() = default; + +RowInputMissingColumnsFiller::RowInputMissingColumnsFiller(const NamesAndTypesList & names_and_types) +{ + std::unordered_map> nested_groups; /// Nested prefix -> column indices. + size_t i = 0; + for (auto it = names_and_types.begin(); it != names_and_types.end(); ++it, ++i) + { + const auto & name_and_type = *it; + if (isArray(name_and_type.type)) + { + auto split = Nested::splitName(name_and_type.name); + if (!split.second.empty()) /// Is it really a column of Nested data structure? + nested_groups[split.first].push_back(i); + } + } + setNestedGroups(std::move(nested_groups), names_and_types.size()); +} + +RowInputMissingColumnsFiller::RowInputMissingColumnsFiller(const Names & names, const DataTypes & types) +{ + std::unordered_map> nested_groups; /// Nested prefix -> column indices. + for (size_t i = 0; i != names.size(); ++i) + { + if (isArray(types[i])) + { + auto split = Nested::splitName(names[i]); + if (!split.second.empty()) /// Is it really a column of Nested data structure? + nested_groups[split.first].push_back(i); + } + } + setNestedGroups(std::move(nested_groups), names.size()); +} + +RowInputMissingColumnsFiller::RowInputMissingColumnsFiller(size_t count, const std::string_view * names, const DataTypePtr * types) +{ + std::unordered_map> nested_groups; /// Nested prefix -> column indices. + for (size_t i = 0; i != count; ++i) + { + if (isArray(types[i])) + { + auto split = Nested::splitName(names[i]); + if (!split.second.empty()) /// Is it really a column of Nested data structure? + nested_groups[split.first].push_back(i); + } + } + setNestedGroups(std::move(nested_groups), count); +} + +void RowInputMissingColumnsFiller::setNestedGroups(std::unordered_map> && nested_groups, size_t num_columns) +{ + if (!nested_groups.empty()) + { + column_infos.resize(num_columns); + for (auto & nested_group : nested_groups | boost::adaptors::map_values) + { + if (nested_group.size() <= 1) + continue; + auto nested_group_shared = std::make_shared>(std::move(nested_group)); + for (size_t i : *nested_group_shared) + column_infos[i].nested_group = nested_group_shared; + } + } +} + + +void RowInputMissingColumnsFiller::addDefaults(MutableColumns & columns, size_t row_num) const +{ + for (size_t i = 0; i != columns.size(); ++i) + { + auto & column = *columns[i]; + size_t column_size = column.size(); + if (row_num < column_size) + continue; /// The column already has an element in this position, skipping. + + if (row_num > column_size) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Wrong row_number {}, expected either {} or {}", row_num, column_size - 1, column_size); + + if ((i >= column_infos.size()) || !column_infos[i].nested_group) + { + column.insertDefault(); + continue; + } + + const auto & nested_group = *column_infos[i].nested_group; + size_t size_of_array = 0; + for (size_t j : nested_group) + { + const auto & column_j = columns[j]; + size_t column_size_j = column_j->size(); + if (row_num < column_size_j) + { + const auto * column_array = typeid_cast(column_j.get()); + if (!column_array) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column with Array type is not represented by ColumnArray column: {}", column_j->dumpStructure()); + const auto & offsets = column_array->getOffsets(); + size_of_array = offsets[row_num] - offsets[row_num - 1]; + break; + } + } + + for (size_t j : nested_group) + { + auto & column_j = columns[j]; + size_t column_size_j = column_j->size(); + if (row_num >= column_size_j) + { + if (row_num > column_size_j) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Wrong row_number {}, expected either {} or {}", row_num, column_size_j - 1, column_size_j); + + auto * column_array = typeid_cast(column_j.get()); + if (!column_array) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column with Array type is not represented by ColumnArray column: {}", column_j->dumpStructure()); + + auto & data = column_array->getData(); + auto & offsets = column_array->getOffsets(); + for (size_t k = 0; k != size_of_array; ++k) + data.insertDefault(); + offsets.push_back(data.size()); + } + } + } +} + +} diff --git a/src/Formats/RowInputMissingColumnsFiller.h b/src/Formats/RowInputMissingColumnsFiller.h new file mode 100644 index 00000000000..0eaefd4e814 --- /dev/null +++ b/src/Formats/RowInputMissingColumnsFiller.h @@ -0,0 +1,40 @@ +#pragma once + +#include + + +namespace DB +{ + +/// Adds default values to columns if they don't have a specified row yet. +/// This class can be useful for implementing IRowInputFormat. +/// For missing columns of nested structure, it creates not columns of empty arrays, +/// but columns of arrays of correct lengths. +class RowInputMissingColumnsFiller +{ +public: + /// Makes a column filler which checks nested structures while adding default values to columns. + RowInputMissingColumnsFiller(const NamesAndTypesList & names_and_types); + RowInputMissingColumnsFiller(const Names & names, const DataTypes & types); + RowInputMissingColumnsFiller(size_t count, const std::string_view * names, const DataTypePtr * types); + + /// Default constructor makes a column filler which doesn't check nested structures while + /// adding default values to columns. + RowInputMissingColumnsFiller(); + + /// Adds default values to some columns. + /// For each column the function checks the number of rows and if it's less than (row_num + 1) + /// the function will add a default value to this column. + void addDefaults(MutableColumns & columns, size_t row_num) const; + +private: + void setNestedGroups(std::unordered_map> && nested_groups, size_t num_columns); + + struct ColumnInfo + { + std::shared_ptr> nested_group; + }; + std::vector column_infos; +}; + +} diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index dbbf72861be..120eac7bb5f 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -123,3 +123,7 @@ set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-s # target_link_libraries(clickhouse_functions PRIVATE ${S2_LIBRARY}) target_include_directories(clickhouse_functions SYSTEM PUBLIC ${S2_GEOMETRY_INCLUDE_DIR}) + +if (ENABLE_FUZZING) + add_compile_definitions(FUZZING_MODE=1) +endif () diff --git a/src/Functions/FunctionsWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp similarity index 91% rename from src/Functions/FunctionsWindow.cpp rename to src/Functions/FunctionsTimeWindow.cpp index 2ed5e9863d6..79ce7356ee7 100644 --- a/src/Functions/FunctionsWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include namespace DB { @@ -114,9 +114,9 @@ namespace } template <> -struct WindowImpl +struct TimeWindowImpl { - static constexpr auto name = "TUMBLE"; + static constexpr auto name = "tumble"; [[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { @@ -211,9 +211,9 @@ struct WindowImpl }; template <> -struct WindowImpl +struct TimeWindowImpl { - static constexpr auto name = "TUMBLE_START"; + static constexpr auto name = "tumbleStart"; static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { @@ -231,49 +231,61 @@ struct WindowImpl } else { - return std::static_pointer_cast(WindowImpl::getReturnType(arguments, function_name)) + return std::static_pointer_cast(TimeWindowImpl::getReturnType(arguments, function_name)) ->getElement(0); } } [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) { - const auto which_type = WhichDataType(arguments[0].type); + const auto & time_column = arguments[0]; + const auto which_type = WhichDataType(time_column.type); ColumnPtr result_column; - if (which_type.isDateTime()) - result_column= WindowImpl::dispatchForColumns(arguments, function_name); + if (arguments.size() == 1) + { + if (which_type.isUInt32()) + return time_column.column; + else //isTuple + result_column = time_column.column; + } else - result_column = arguments[0].column; + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); return executeWindowBound(result_column, 0, function_name); } }; template <> -struct WindowImpl +struct TimeWindowImpl { - static constexpr auto name = "TUMBLE_END"; + static constexpr auto name = "tumbleEnd"; [[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { - return WindowImpl::getReturnType(arguments, function_name); + return TimeWindowImpl::getReturnType(arguments, function_name); } [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String& function_name) { - const auto which_type = WhichDataType(arguments[0].type); + const auto & time_column = arguments[0]; + const auto which_type = WhichDataType(time_column.type); ColumnPtr result_column; - if (which_type.isDateTime()) - result_column = WindowImpl::dispatchForColumns(arguments, function_name); + if (arguments.size() == 1) + { + if (which_type.isUInt32()) + return time_column.column; + else //isTuple + result_column = time_column.column; + } else - result_column = arguments[0].column; + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); return executeWindowBound(result_column, 1, function_name); } }; template <> -struct WindowImpl +struct TimeWindowImpl { - static constexpr auto name = "HOP"; + static constexpr auto name = "hop"; [[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { @@ -403,9 +415,9 @@ struct WindowImpl }; template <> -struct WindowImpl +struct TimeWindowImpl { - static constexpr auto name = "WINDOW_ID"; + static constexpr auto name = "windowID"; [[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { @@ -535,7 +547,7 @@ struct WindowImpl [[maybe_unused]] static ColumnPtr dispatchForTumbleColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) { - ColumnPtr column = WindowImpl::dispatchForColumns(arguments, function_name); + ColumnPtr column = TimeWindowImpl::dispatchForColumns(arguments, function_name); return executeWindowBound(column, 1, function_name); } @@ -555,9 +567,9 @@ struct WindowImpl }; template <> -struct WindowImpl +struct TimeWindowImpl { - static constexpr auto name = "HOP_START"; + static constexpr auto name = "hopStart"; static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { @@ -575,7 +587,7 @@ struct WindowImpl } else { - return std::static_pointer_cast(WindowImpl::getReturnType(arguments, function_name))->getElement(0); + return std::static_pointer_cast(TimeWindowImpl::getReturnType(arguments, function_name))->getElement(0); } } @@ -592,19 +604,19 @@ struct WindowImpl result_column = time_column.column; } else - result_column = WindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); return executeWindowBound(result_column, 0, function_name); } }; template <> -struct WindowImpl +struct TimeWindowImpl { - static constexpr auto name = "HOP_END"; + static constexpr auto name = "hopEnd"; [[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { - return WindowImpl::getReturnType(arguments, function_name); + return TimeWindowImpl::getReturnType(arguments, function_name); } [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) @@ -620,25 +632,25 @@ struct WindowImpl result_column = time_column.column; } else - result_column = WindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); return executeWindowBound(result_column, 1, function_name); } }; -template -DataTypePtr FunctionWindow::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const +template +DataTypePtr FunctionTimeWindow::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const { - return WindowImpl::getReturnType(arguments, name); + return TimeWindowImpl::getReturnType(arguments, name); } -template -ColumnPtr FunctionWindow::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const +template +ColumnPtr FunctionTimeWindow::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const { - return WindowImpl::dispatchForColumns(arguments, name); + return TimeWindowImpl::dispatchForColumns(arguments, name); } -void registerFunctionsWindow(FunctionFactory& factory) +void registerFunctionsTimeWindow(FunctionFactory& factory) { factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsWindow.h b/src/Functions/FunctionsTimeWindow.h similarity index 76% rename from src/Functions/FunctionsWindow.h rename to src/Functions/FunctionsTimeWindow.h index 37acb660751..6e3b5da9971 100644 --- a/src/Functions/FunctionsWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -7,30 +7,30 @@ namespace DB { -/** Window functions: +/** Time window functions: * - * TUMBLE(time_attr, interval [, timezone]) + * tumble(time_attr, interval [, timezone]) * - * TUMBLE_START(window_id) + * tumbleStart(window_id) * - * TUMBLE_START(time_attr, interval [, timezone]) + * tumbleStart(time_attr, interval [, timezone]) * - * TUMBLE_END(window_id) + * tumbleEnd(window_id) * - * TUMBLE_END(time_attr, interval [, timezone]) + * tumbleEnd(time_attr, interval [, timezone]) * - * HOP(time_attr, hop_interval, window_interval [, timezone]) + * hop(time_attr, hop_interval, window_interval [, timezone]) * - * HOP_START(window_id) + * hopStart(window_id) * - * HOP_START(time_attr, hop_interval, window_interval [, timezone]) + * hopStart(time_attr, hop_interval, window_interval [, timezone]) * - * HOP_END(window_id) + * hopEnd(window_id) * - * HOP_END(time_attr, hop_interval, window_interval [, timezone]) + * hopEnd(time_attr, hop_interval, window_interval [, timezone]) * */ -enum WindowFunctionName +enum TimeWindowFunctionName { TUMBLE, TUMBLE_START, @@ -117,8 +117,8 @@ struct ToStartOfTransform; ADD_TIME(Second, 1) #undef ADD_TIME -template -struct WindowImpl +template +struct TimeWindowImpl { static constexpr auto name = "UNKNOWN"; @@ -127,12 +127,12 @@ struct WindowImpl static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name); }; -template -class FunctionWindow : public IFunction +template +class FunctionTimeWindow : public IFunction { public: - static constexpr auto name = WindowImpl::name; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static constexpr auto name = TimeWindowImpl::name; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } @@ -145,11 +145,11 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override; }; -using FunctionTumble = FunctionWindow; -using FunctionTumbleStart = FunctionWindow; -using FunctionTumbleEnd = FunctionWindow; -using FunctionHop = FunctionWindow; -using FunctionWindowId = FunctionWindow; -using FunctionHopStart = FunctionWindow; -using FunctionHopEnd = FunctionWindow; +using FunctionTumble = FunctionTimeWindow; +using FunctionTumbleStart = FunctionTimeWindow; +using FunctionTumbleEnd = FunctionTimeWindow; +using FunctionHop = FunctionTimeWindow; +using FunctionWindowId = FunctionTimeWindow; +using FunctionHopStart = FunctionTimeWindow; +using FunctionHopEnd = FunctionTimeWindow; } diff --git a/src/Functions/array/arraySlice.cpp b/src/Functions/array/arraySlice.cpp index d6b50f55563..7a2e97de78a 100644 --- a/src/Functions/array/arraySlice.cpp +++ b/src/Functions/array/arraySlice.cpp @@ -102,7 +102,7 @@ public: { if (!length_column || length_column->onlyNull()) { - return array_column; + return arguments[0].column; } else if (isColumnConst(*length_column)) sink = GatherUtils::sliceFromLeftConstantOffsetBounded(*source, 0, length_column->getInt(0)); diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 7394e0f36f9..95046d95176 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -107,48 +107,61 @@ public: const auto & null_map_data = cast_result_nullable.getNullMapData(); size_t null_map_data_size = null_map_data.size(); const auto & nested_column = cast_result_nullable.getNestedColumn(); - IColumn::MutablePtr result = return_type->createColumn(); + auto result = return_type->createColumn(); result->reserve(null_map_data_size); + ColumnNullable * result_nullable = nullptr; + if (result->isNullable()) + result_nullable = assert_cast(&*result); + size_t start_insert_index = 0; - /// Created separate branch because cast and inserting field from other column is slower + Field default_value; + ColumnPtr default_column; + if (arguments.size() == 3) { - const auto & default_column_with_type = arguments[2]; - auto default_column = default_column_with_type.column->convertToFullColumnIfConst(); + auto default_values_column = arguments[2].column; - for (size_t i = 0; i < null_map_data_size; ++i) - { - bool is_current_index_null = null_map_data[i]; - if (!is_current_index_null) - continue; - - if (i != start_insert_index) - result->insertRangeFrom(nested_column, start_insert_index, i - start_insert_index); - - result->insertFrom(*default_column, i); - start_insert_index = i + 1; - } + if (isColumnConst(*default_values_column)) + default_value = (*default_values_column)[0]; + else + default_column = default_values_column->convertToFullColumnIfConst(); } else { - for (size_t i = 0; i < null_map_data_size; ++i) + default_value = return_type->getDefault(); + } + + for (size_t i = 0; i < null_map_data_size; ++i) + { + bool is_current_index_null = null_map_data[i]; + if (!is_current_index_null) + continue; + + if (i != start_insert_index) { - bool is_current_index_null = null_map_data[i]; - if (!is_current_index_null) - continue; - - if (i != start_insert_index) + if (result_nullable) + result_nullable->insertRangeFromNotNullable(nested_column, start_insert_index, i - start_insert_index); + else result->insertRangeFrom(nested_column, start_insert_index, i - start_insert_index); - - result->insertDefault(); - start_insert_index = i + 1; } + + if (default_column) + result->insertFrom(*default_column, i); + else + result->insert(default_value); + + start_insert_index = i + 1; } if (null_map_data_size != start_insert_index) - result->insertRangeFrom(nested_column, start_insert_index, null_map_data_size - start_insert_index); + { + if (result_nullable) + result_nullable->insertRangeFromNotNullable(nested_column, start_insert_index, null_map_data_size - start_insert_index); + else + result->insertRangeFrom(nested_column, start_insert_index, null_map_data_size - start_insert_index); + } return result; } diff --git a/src/Functions/getFuzzerData.cpp b/src/Functions/getFuzzerData.cpp new file mode 100644 index 00000000000..f516c871950 --- /dev/null +++ b/src/Functions/getFuzzerData.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ + +void registerFunctionGetFuzzerData(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/getFuzzerData.h b/src/Functions/getFuzzerData.h new file mode 100644 index 00000000000..635ca2bdce9 --- /dev/null +++ b/src/Functions/getFuzzerData.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ +class FunctionGetFuzzerData : public IFunction +{ + inline static String fuzz_data; + +public: + static constexpr auto name = "getFuzzerData"; + + inline static FunctionPtr create(ContextPtr) { return create(); } + + static FunctionPtr create() + { + return std::make_shared(); + } + + inline String getName() const override { return name; } + + inline size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + inline bool isDeterministic() const override { return false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, + const DataTypePtr &, + size_t input_rows_count) const override + { + return DataTypeString().createColumnConst(input_rows_count, fuzz_data); + } + + static void update(const String & fuzz_data_) + { + fuzz_data = fuzz_data_; + } +}; + +} diff --git a/src/Functions/registerFunctions.cpp b/src/Functions/registerFunctions.cpp index 8db19435443..2b56615ee6f 100644 --- a/src/Functions/registerFunctions.cpp +++ b/src/Functions/registerFunctions.cpp @@ -54,7 +54,7 @@ void registerFunctionValidateNestedArraySizes(FunctionFactory & factory); void registerFunctionsSnowflake(FunctionFactory & factory); void registerFunctionTid(FunctionFactory & factory); void registerFunctionLogTrace(FunctionFactory & factory); -void registerFunctionsWindow(FunctionFactory &); +void registerFunctionsTimeWindow(FunctionFactory &); #if USE_SSL void registerFunctionEncrypt(FunctionFactory & factory); @@ -115,7 +115,7 @@ void registerFunctions() registerFunctionsStringHash(factory); registerFunctionValidateNestedArraySizes(factory); registerFunctionsSnowflake(factory); - registerFunctionsWindow(factory); + registerFunctionsTimeWindow(factory); #if USE_SSL registerFunctionEncrypt(factory); diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index ffe195973d3..76d61ce509a 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -85,6 +85,10 @@ void registerFunctionGetOSKernelVersion(FunctionFactory &); void registerFunctionConvertCharset(FunctionFactory &); #endif +#ifdef FUZZING_MODE +void registerFunctionGetFuzzerData(FunctionFactory & factory); +#endif + void registerFunctionsMiscellaneous(FunctionFactory & factory) { registerFunctionCurrentDatabase(factory); @@ -166,6 +170,10 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) #if USE_ICU registerFunctionConvertCharset(factory); #endif + +#ifdef FUZZING_MODE + registerFunctionGetFuzzerData(factory); +#endif } } diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp index f237b158fe5..ad357c74402 100644 --- a/src/Functions/reinterpretAs.cpp +++ b/src/Functions/reinterpretAs.cpp @@ -24,6 +24,7 @@ #include + namespace DB { namespace ErrorCodes @@ -174,16 +175,14 @@ public: const auto & offsets_from = col_from->getOffsets(); size_t size = offsets_from.size(); auto & vec_res = col_res->getData(); - vec_res.resize(size); + vec_res.resize_fill(size); size_t offset = 0; for (size_t i = 0; i < size; ++i) { - ToFieldType value{}; - memcpy(&value, + memcpy(&vec_res[i], &data_from[offset], std::min(static_cast(sizeof(ToFieldType)), offsets_from[i] - offset - 1)); - vec_res[i] = value; offset = offsets_from[i]; } @@ -201,15 +200,18 @@ public: size_t step = col_from_fixed->getN(); size_t size = data_from.size() / step; auto & vec_res = col_res->getData(); - vec_res.resize(size); size_t offset = 0; size_t copy_size = std::min(step, sizeof(ToFieldType)); + + if (sizeof(ToFieldType) <= step) + vec_res.resize(size); + else + vec_res.resize_fill(size); + for (size_t i = 0; i < size; ++i) { - ToFieldType value{}; - memcpy(&value, &data_from[offset], copy_size); - vec_res[i] = value; + memcpy(&vec_res[i], &data_from[offset], copy_size); offset += step; } @@ -288,7 +290,7 @@ private: { StringRef data = src.getDataAt(i); - std::memcpy(&data_to[offset], data.data, std::min(n, data.size)); + memcpy(&data_to[offset], data.data, std::min(n, data.size)); offset += n; } } @@ -347,10 +349,13 @@ private: using To = typename ToContainer::value_type; size_t size = from.size(); - to.resize_fill(size); - static constexpr size_t copy_size = std::min(sizeof(From), sizeof(To)); + if (sizeof(To) <= sizeof(From)) + to.resize(size); + else + to.resize_fill(size); + for (size_t i = 0; i < size; ++i) memcpy(static_cast(&to[i]), static_cast(&from[i]), copy_size); } diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 675adc43ce6..b0a6838b81e 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1212,4 +1212,96 @@ void skipToNextRowOrEof(PeekableReadBuffer & buf, const String & row_after_delim } } + +template +static void readQuotedFieldInBrackets(String & s, ReadBuffer & buf) +{ + assertChar(opening_bracket, buf); + s.push_back(opening_bracket); + + size_t balance = 1; + + while (!buf.eof() && balance) + { + char * next_pos = find_first_symbols<'\'', opening_bracket, closing_bracket>(buf.position(), buf.buffer().end()); + appendToStringOrVector(s, buf, next_pos); + buf.position() = next_pos; + + if (!buf.hasPendingData()) + continue; + + s.push_back(*buf.position()); + + if (*buf.position() == '\'') + { + readQuotedStringInto(s, buf); + s.push_back('\''); + } + else if (*buf.position() == opening_bracket) + { + ++balance; + ++buf.position(); + } + else if (*buf.position() == closing_bracket) + { + --balance; + ++buf.position(); + } + } +} + +void readQuotedFieldIntoString(String & s, ReadBuffer & buf) +{ + s.clear(); + + if (buf.eof()) + return; + + /// Possible values in 'Quoted' field: + /// - Strings: '...' + /// - Arrays: [...] + /// - Tuples: (...) + /// - Maps: {...} + /// - NULL + /// - Number: integer, float, decimal. + + if (*buf.position() == '\'') + readQuotedString(s, buf); + else if (*buf.position() == '[') + readQuotedFieldInBrackets<'[', ']'>(s, buf); + else if (*buf.position() == '(') + readQuotedFieldInBrackets<'(', ')'>(s, buf); + else if (*buf.position() == '{') + readQuotedFieldInBrackets<'{', '}'>(s, buf); + else if (checkCharCaseInsensitive('n', buf)) + { + /// NULL or NaN + if (checkCharCaseInsensitive('u', buf)) + { + assertStringCaseInsensitive("ll", buf); + s.append("NULL"); + } + else + { + assertStringCaseInsensitive("an", buf); + s.append("NaN"); + } + } + else + { + /// It's an integer, float or decimal. They all can be parsed as float. + /// Use PeekableReadBuffer to copy field to string after parsing. + PeekableReadBuffer peekable_buf(buf); + peekable_buf.setCheckpoint(); + Float64 tmp; + readFloatText(tmp, peekable_buf); + peekable_buf.makeContinuousMemoryFromCheckpointToPos(); + auto * end = peekable_buf.position(); + peekable_buf.rollbackToCheckpoint(); + s.append(peekable_buf.position(), end); + peekable_buf.position() = end; + } +} + + } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index da59fc7973c..c48306cf6d3 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -184,6 +184,15 @@ inline void assertChar(char symbol, ReadBuffer & buf) } } +inline bool checkCharCaseInsensitive(char c, ReadBuffer & buf) +{ + char a; + if (!buf.peek(a) || !equalsCaseInsensitive(a, c)) + return false; + buf.ignore(); + return true; +} + inline void assertString(const String & s, ReadBuffer & buf) { assertString(s.c_str(), buf); @@ -1375,4 +1384,6 @@ struct PcgDeserializer } }; +void readQuotedFieldIntoString(String & s, ReadBuffer & buf); + } diff --git a/src/IO/WriteBufferFromHTTP.cpp b/src/IO/WriteBufferFromHTTP.cpp index 6bb6936855f..5ddc28d2db1 100644 --- a/src/IO/WriteBufferFromHTTP.cpp +++ b/src/IO/WriteBufferFromHTTP.cpp @@ -7,7 +7,11 @@ namespace DB { WriteBufferFromHTTP::WriteBufferFromHTTP( - const Poco::URI & uri, const std::string & method, const ConnectionTimeouts & timeouts, size_t buffer_size_) + const Poco::URI & uri, + const std::string & method, + const std::string & content_type, + const ConnectionTimeouts & timeouts, + size_t buffer_size_) : WriteBufferFromOStream(buffer_size_) , session{makeHTTPSession(uri, timeouts)} , request{method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1} @@ -15,6 +19,11 @@ WriteBufferFromHTTP::WriteBufferFromHTTP( request.setHost(uri.getHost()); request.setChunkedTransferEncoding(true); + if (!content_type.empty()) + { + request.set("Content-Type", content_type); + } + LOG_TRACE((&Poco::Logger::get("WriteBufferToHTTP")), "Sending request to {}", uri.toString()); ostr = &session->sendRequest(request); diff --git a/src/IO/WriteBufferFromHTTP.h b/src/IO/WriteBufferFromHTTP.h index cfd3597a95c..31b2a921889 100644 --- a/src/IO/WriteBufferFromHTTP.h +++ b/src/IO/WriteBufferFromHTTP.h @@ -20,6 +20,7 @@ class WriteBufferFromHTTP : public WriteBufferFromOStream public: explicit WriteBufferFromHTTP(const Poco::URI & uri, const std::string & method = Poco::Net::HTTPRequest::HTTP_POST, // POST or PUT only + const std::string & content_type = "", const ConnectionTimeouts & timeouts = {}, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE); diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 9cdc77df957..5498e1c90f3 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -118,6 +118,7 @@ inline void writeStringBinary(const std::string_view & s, WriteBuffer & buf) writeStringBinary(StringRef{s}, buf); } + template void writeVectorBinary(const std::vector & v, WriteBuffer & buf) { diff --git a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp index 70a58971d3f..383ca3db6f4 100644 --- a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp +++ b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp @@ -157,7 +157,12 @@ void ArithmeticOperationsInAgrFuncMatcher::visit(const ASTFunction & func, ASTPt void ArithmeticOperationsInAgrFuncMatcher::visit(ASTPtr & ast, Data & data) { if (const auto * function_node = ast->as()) + { + if (function_node->is_window_function) + return; + visit(*function_node, ast, data); + } } bool ArithmeticOperationsInAgrFuncMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &) diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 79956aaebed..827e7d27409 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -89,6 +89,13 @@ void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision) writeBinary(uint8_t(0), out); } } + + if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS) + { + writeVarUInt(static_cast(collaborate_with_initiator), out); + writeVarUInt(count_participating_replicas, out); + writeVarUInt(number_of_current_replica, out); + } } @@ -170,6 +177,15 @@ void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision) readBinary(client_trace_context.trace_flags, in); } } + + if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS) + { + UInt64 value; + readVarUInt(value, in); + collaborate_with_initiator = static_cast(value); + readVarUInt(count_participating_replicas, in); + readVarUInt(number_of_current_replica, in); + } } diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index d42c34f07e2..3ce740c6436 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -108,6 +108,11 @@ public: bool is_replicated_database_internal = false; + /// For parallel processing on replicas + bool collaborate_with_initiator{false}; + UInt64 count_participating_replicas{0}; + UInt64 number_of_current_replica{0}; + bool empty() const { return query_kind == QueryKind::NO_QUERY; } /** Serialization and deserialization. diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index ec78abf574c..a64e17264b1 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -184,6 +184,8 @@ public: bool isLocal() const { return !local_addresses.empty(); } bool hasRemoteConnections() const { return local_addresses.size() != per_replica_pools.size(); } size_t getLocalNodeCount() const { return local_addresses.size(); } + size_t getRemoteNodeCount() const { return per_replica_pools.size() - local_addresses.size(); } + size_t getAllNodeCount() const { return per_replica_pools.size(); } bool hasInternalReplication() const { return has_internal_replication; } /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication const std::string & insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const; diff --git a/src/Interpreters/ClusterProxy/IStreamFactory.h b/src/Interpreters/ClusterProxy/IStreamFactory.h index 6360aee2f55..483ce9dcab9 100644 --- a/src/Interpreters/ClusterProxy/IStreamFactory.h +++ b/src/Interpreters/ClusterProxy/IStreamFactory.h @@ -37,7 +37,9 @@ public: Block header; size_t shard_num = 0; + size_t num_replicas = 0; ConnectionPoolWithFailoverPtr pool; + ConnectionPoolPtrs per_replica_pools; /// If we connect to replicas lazily. /// (When there is a local replica with big delay). diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index b644f2936d9..a47874c475a 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -117,7 +117,9 @@ void SelectStreamFactory::createForShard( .query = modified_query_ast, .header = header, .shard_num = shard_info.shard_num, + .num_replicas = shard_info.getAllNodeCount(), .pool = shard_info.pool, + .per_replica_pools = shard_info.per_replica_pools, .lazy = lazy, .local_delay = local_delay, }); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 9b2721cd15d..db1d6a37877 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2962,7 +2962,7 @@ PartUUIDsPtr Context::getPartUUIDs() const ReadTaskCallback Context::getReadTaskCallback() const { if (!next_task_callback.has_value()) - throw Exception(fmt::format("Next task callback is not set for query {}", getInitialQueryId()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Next task callback is not set for query {}", getInitialQueryId()); return next_task_callback.value(); } @@ -2972,6 +2972,20 @@ void Context::setReadTaskCallback(ReadTaskCallback && callback) next_task_callback = callback; } + +MergeTreeReadTaskCallback Context::getMergeTreeReadTaskCallback() const +{ + if (!merge_tree_read_task_callback.has_value()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Next task callback for is not set for query {}", getInitialQueryId()); + + return merge_tree_read_task_callback.value(); +} + +void Context::setMergeTreeReadTaskCallback(MergeTreeReadTaskCallback && callback) +{ + merge_tree_read_task_callback = callback; +} + PartUUIDsPtr Context::getIgnoredPartUUIDs() const { auto lock = getLock(); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 5948cc7f7a7..823bc028c15 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -14,6 +14,7 @@ #include #include #include +#include #include "config_core.h" @@ -148,6 +149,8 @@ using InputBlocksReader = std::function; /// Used in distributed task processing using ReadTaskCallback = std::function; +using MergeTreeReadTaskCallback = std::function(PartitionReadRequest)>; + /// An empty interface for an arbitrary object that may be attached by a shared pointer /// to query context, when using ClickHouse as a library. struct IHostContext @@ -216,8 +219,12 @@ private: Scalars scalars; Scalars local_scalars; - /// Fields for distributed s3 function + /// Used in s3Cluster table function. With this callback, a worker node could ask an initiator + /// about next file to read from s3. std::optional next_task_callback; + /// Used in parallel reading from replicas. A replica tells about its intentions to read + /// some ranges from some part and initiator will tell the replica about whether it is accepted or denied. + std::optional merge_tree_read_task_callback; /// Record entities accessed by current query, and store this information in system.query_log. struct QueryAccessInfo @@ -865,6 +872,9 @@ public: ReadTaskCallback getReadTaskCallback() const; void setReadTaskCallback(ReadTaskCallback && callback); + MergeTreeReadTaskCallback getMergeTreeReadTaskCallback() const; + void setMergeTreeReadTaskCallback(MergeTreeReadTaskCallback && callback); + /// Background executors related methods void initializeBackgroundExecutorsIfNeeded(); diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 27bb4906f1a..ee5dc4deebb 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -1189,7 +1189,7 @@ void DDLWorker::runMainThread() } catch (...) { - tryLogCurrentException(log, "Unexpected error, will try to restart main thread:"); + tryLogCurrentException(log, "Unexpected error, will try to restart main thread"); reset_state(); sleepForSeconds(5); } diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index e682a98114d..f615aa24a91 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -95,14 +95,16 @@ QualifiedTableName ExternalDictionariesLoader::qualifyDictionaryNameWithDatabase return qualified_dictionary_name; } - if (qualified_name->database.empty() && has(dictionary_name)) + /// If dictionary was not qualified with database name, try to resolve dictionary as xml dictionary. + if (qualified_name->database.empty() && !has(qualified_name->table)) { - /// This is xml dictionary - return *qualified_name; - } + std::string current_database_name = query_context->getCurrentDatabase(); + std::string resolved_name = resolveDictionaryNameFromDatabaseCatalog(dictionary_name, current_database_name); - if (qualified_name->database.empty()) - qualified_name->database = query_context->getCurrentDatabase(); + /// If after qualify dictionary_name with default_database_name we find it, add default_database to qualified name. + if (has(resolved_name)) + qualified_name->database = std::move(current_database_name); + } return *qualified_name; } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 0d9cad34545..8f003e75a07 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -981,9 +981,10 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) return {}; /// If table has dependencies - add them to the graph - TableNamesSet loading_dependencies = getDependenciesSetFromCreateQuery(getContext()->getGlobalContext(), query_ptr); + QualifiedTableName qualified_name{database_name, create.getTable()}; + TableNamesSet loading_dependencies = getDependenciesSetFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr); if (!loading_dependencies.empty()) - DatabaseCatalog::instance().addLoadingDependencies(QualifiedTableName{database_name, create.getTable()}, std::move(loading_dependencies)); + DatabaseCatalog::instance().addLoadingDependencies(std::move(qualified_name), std::move(loading_dependencies)); return fillTableIfNeeded(create); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 8c0d3620dd6..b7edf12e23f 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -380,13 +380,6 @@ BlockIO InterpreterInsertQuery::execute() BlockIO res; - res.pipeline.addStorageHolder(table); - if (const auto * mv = dynamic_cast(table.get())) - { - if (auto inner_table = mv->tryGetTargetTable()) - res.pipeline.addStorageHolder(inner_table); - } - /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? if (is_distributed_insert_select) { @@ -445,6 +438,13 @@ BlockIO InterpreterInsertQuery::execute() } } + res.pipeline.addStorageHolder(table); + if (const auto * mv = dynamic_cast(table.get())) + { + if (auto inner_table = mv->tryGetTargetTable()) + res.pipeline.addStorageHolder(inner_table); + } + return res; } diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 471ad67d4e7..fef31d906c6 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -231,8 +231,8 @@ bool isStorageTouchedByMutations( PullingPipelineExecutor executor(io.pipeline); Block block; - while (!block.rows()) - executor.pull(block); + while (executor.pull(block)) {} + if (!block.rows()) return false; else if (block.rows() != 1) @@ -569,7 +569,16 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) stages.emplace_back(context); const auto & column = columns_desc.get(command.column_name); - stages.back().column_to_updated.emplace(column.name, column.default_desc.expression->clone()); + + if (!column.default_desc.expression) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot materialize column `{}` because it doesn't have default expression", column.name); + + auto materialized_column = makeASTFunction( + "_CAST", column.default_desc.expression->clone(), std::make_shared(column.type->getName())); + + stages.back().column_to_updated.emplace(column.name, materialized_column); } else if (command.type == MutationCommand::MATERIALIZE_INDEX) { diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 0440c52797c..802bf4e43ce 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -51,7 +51,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v } } } - else if (function->name == "toUInt8" || function->name == "toInt8") + else if (function->name == "toUInt8" || function->name == "toInt8" || function->name == "identity") { if (const auto * expr_list = function->arguments->as()) { diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 4d58f0c97dc..a4583685a90 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -16,11 +16,6 @@ #include -namespace CurrentMetrics -{ - extern const Metric Query; -} - namespace DB { @@ -29,6 +24,7 @@ namespace ErrorCodes extern const int TOO_MANY_SIMULTANEOUS_QUERIES; extern const int QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING; extern const int LOGICAL_ERROR; + extern const int QUERY_WAS_CANCELLED; } @@ -295,8 +291,10 @@ QueryStatus::QueryStatus( , query(query_) , client_info(client_info_) , priority_handle(std::move(priority_handle_)) - , num_queries_increment{CurrentMetrics::Query} { + auto settings = getContext()->getSettings(); + limits.max_execution_time = settings.max_execution_time; + overflow_mode = settings.timeout_overflow_mode; } QueryStatus::~QueryStatus() @@ -332,6 +330,22 @@ void QueryStatus::removePipelineExecutor(PipelineExecutor * e) std::erase_if(executors, [e](PipelineExecutor * x) { return x == e; }); } +bool QueryStatus::checkTimeLimit() +{ + if (is_killed.load()) + throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED); + + return limits.checkTimeLimit(watch, overflow_mode); +} + +bool QueryStatus::checkTimeLimitSoft() +{ + if (is_killed.load()) + return false; + + return limits.checkTimeLimit(watch, OverflowMode::BREAK); +} + void QueryStatus::setUserProcessList(ProcessListForUser * user_process_list_) { diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 02be24bb2bd..9c826bde061 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -1,11 +1,12 @@ #pragma once #include -#include #include #include #include #include +#include +#include #include #include #include @@ -25,11 +26,6 @@ #include -namespace CurrentMetrics -{ - extern const Metric Query; -} - namespace DB { @@ -93,9 +89,12 @@ protected: /// Progress of output stream Progress progress_out; - QueryPriorities::Handle priority_handle; + /// Used to externally check for the query time limits + /// They are saved in the constructor to limit the overhead of each call to checkTimeLimit() + ExecutionSpeedLimits limits; + OverflowMode overflow_mode; - CurrentMetrics::Increment num_queries_increment{CurrentMetrics::Query}; + QueryPriorities::Handle priority_handle; std::atomic is_killed { false }; @@ -176,6 +175,11 @@ public: /// Removes a pipeline to the QueryStatus void removePipelineExecutor(PipelineExecutor * e); + + /// Checks the query time limits (cancelled or timeout) + bool checkTimeLimit(); + /// Same as checkTimeLimit but it never throws + [[nodiscard]] bool checkTimeLimitSoft(); }; diff --git a/src/Interpreters/RewriteAnyFunctionVisitor.cpp b/src/Interpreters/RewriteAnyFunctionVisitor.cpp index eed6368ae54..5eb14aa4252 100644 --- a/src/Interpreters/RewriteAnyFunctionVisitor.cpp +++ b/src/Interpreters/RewriteAnyFunctionVisitor.cpp @@ -63,7 +63,12 @@ bool extractIdentifiers(const ASTFunction & func, std::unordered_set & void RewriteAnyFunctionMatcher::visit(ASTPtr & ast, Data & data) { if (auto * func = ast->as()) + { + if (func->is_window_function) + return; + visit(*func, ast, data); + } } void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data) diff --git a/src/Interpreters/RewriteSumIfFunctionVisitor.cpp b/src/Interpreters/RewriteSumIfFunctionVisitor.cpp index 7b322ca1585..7f725c1d8a5 100644 --- a/src/Interpreters/RewriteSumIfFunctionVisitor.cpp +++ b/src/Interpreters/RewriteSumIfFunctionVisitor.cpp @@ -10,7 +10,12 @@ namespace DB void RewriteSumIfFunctionMatcher::visit(ASTPtr & ast, Data & data) { if (auto * func = ast->as()) + { + if (func->is_window_function) + return; + visit(*func, ast, data); + } } void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data &) diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index d864bb54b2e..6b3a50d88e2 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -465,9 +465,13 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, ASTFunction * func = elem->as(); /// Never remove untuple. It's result column may be in required columns. - /// It is not easy to analyze untuple here, because types were not calculated yes. + /// It is not easy to analyze untuple here, because types were not calculated yet. if (func && func->name == "untuple") new_elements.push_back(elem); + + /// removing aggregation can change number of rows, so `count()` result in outer sub-query would be wrong + if (func && AggregateFunctionFactory::instance().isAggregateFunctionName(func->name) && !select_query->groupBy()) + new_elements.push_back(elem); } } diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp index d9ac53097ab..3e82930af9d 100644 --- a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp @@ -1,6 +1,7 @@ #include "UserDefinedSQLFunctionVisitor.h" #include +#include #include #include @@ -18,19 +19,16 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } -void UserDefinedSQLFunctionMatcher::visit(ASTPtr & ast, Data & data) +void UserDefinedSQLFunctionMatcher::visit(ASTPtr & ast, Data &) { auto * function = ast->as(); if (!function) return; - auto result = tryToReplaceFunction(*function); - - if (result) - { - ast = result; - visit(ast, data); - } + std::unordered_set udf_in_replace_process; + auto replace_result = tryToReplaceFunction(*function, udf_in_replace_process); + if (replace_result) + ast = replace_result; } bool UserDefinedSQLFunctionMatcher::needChildVisit(const ASTPtr &, const ASTPtr &) @@ -38,8 +36,13 @@ bool UserDefinedSQLFunctionMatcher::needChildVisit(const ASTPtr &, const ASTPtr return true; } -ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & function) +ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & function, std::unordered_set & udf_in_replace_process) { + if (udf_in_replace_process.find(function.name) != udf_in_replace_process.end()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Recursive function call detected during function call {}", + function.name); + auto user_defined_function = UserDefinedSQLFunctionFactory::instance().tryGet(function.name); if (!user_defined_function) return nullptr; @@ -71,10 +74,15 @@ ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & f identifier_name_to_function_argument.emplace(identifier_name, function_argument); } + auto [it, _] = udf_in_replace_process.emplace(function.name); + auto function_body_to_update = function_core_expression->children.at(1)->clone(); + auto expression_list = std::make_shared(); + expression_list->children.emplace_back(std::move(function_body_to_update)); + std::stack ast_nodes_to_update; - ast_nodes_to_update.push(function_body_to_update); + ast_nodes_to_update.push(expression_list); while (!ast_nodes_to_update.empty()) { @@ -83,6 +91,13 @@ ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & f for (auto & child : ast_node_to_update->children) { + if (auto * inner_function = child->as()) + { + auto replace_result = tryToReplaceFunction(*inner_function, udf_in_replace_process); + if (replace_result) + child = replace_result; + } + auto identifier_name_opt = tryGetIdentifierName(child); if (identifier_name_opt) { @@ -104,6 +119,10 @@ ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & f } } + udf_in_replace_process.erase(it); + + function_body_to_update = expression_list->children[0]; + auto function_alias = function.tryGetAlias(); if (!function_alias.empty()) diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.h b/src/Interpreters/UserDefinedSQLFunctionVisitor.h index 46f95f29ab9..686594c088f 100644 --- a/src/Interpreters/UserDefinedSQLFunctionVisitor.h +++ b/src/Interpreters/UserDefinedSQLFunctionVisitor.h @@ -34,7 +34,7 @@ public: private: static void visit(ASTFunction & func, const Data & data); - static ASTPtr tryToReplaceFunction(const ASTFunction & function); + static ASTPtr tryToReplaceFunction(const ASTFunction & function, std::unordered_set & udf_in_replace_process); }; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 62964180cba..b69bbcc6332 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -308,7 +308,7 @@ static void onExceptionBeforeStart(const String & query_for_logging, ContextPtr span.parent_span_id = context->getClientInfo().client_trace_context.span_id; span.operation_name = "query"; span.start_time_us = current_time_us; - span.finish_time_us = current_time_us; + span.finish_time_us = time_in_microseconds(std::chrono::system_clock::now()); /// Keep values synchronized to type enum in QueryLogElement::createBlock. span.attribute_names.push_back("clickhouse.query_status"); @@ -632,7 +632,13 @@ static std::tuple executeQueryImpl( } { - OpenTelemetrySpanHolder span("IInterpreter::execute()"); + std::unique_ptr span; + if (context->query_trace_context.trace_id != UUID()) + { + auto raw_interpreter_ptr = interpreter.get(); + std::string class_name(abi::__cxa_demangle(typeid(*raw_interpreter_ptr).name(), nullptr, nullptr, nullptr)); + span = std::make_unique(class_name + "::execute()"); + } res = interpreter->execute(); } diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index bf20bef6992..b571a8e8e10 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -225,7 +225,13 @@ void removeColumnNullability(ColumnWithTypeAndName & column) if (column.column && column.column->isNullable()) { + column.column = column.column->convertToFullColumnIfConst(); const auto * nullable_col = checkAndGetColumn(*column.column); + if (!nullable_col) + { + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' is expected to be nullable", column.dumpStructure()); + } + MutableColumnPtr mutable_column = nullable_col->getNestedColumn().cloneEmpty(); insertFromNullableOrDefault(mutable_column, nullable_col); column.column = std::move(mutable_column); diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 952e5c5a738..facc7e728c9 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -268,7 +268,7 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat what = "MATERIALIZED VIEW"; else if (is_live_view) what = "LIVE VIEW"; - if (is_window_view) + else if (is_window_view) what = "WINDOW VIEW"; settings.ostr diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index e9ec7b43a21..584c2a32afd 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -35,7 +34,6 @@ #include #include -#include #include "ASTColumnsMatcher.h" #include @@ -1935,15 +1933,21 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e { if (const auto * func = lambda->as(); func && func->name == "lambda") { + if (func->arguments->children.size() != 2) + throw Exception(ErrorCodes::SYNTAX_ERROR, "lambda requires two arguments"); + const auto * lambda_args_tuple = func->arguments->children.at(0)->as(); + if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") + throw Exception(ErrorCodes::SYNTAX_ERROR, "First argument of lambda must be a tuple"); + const ASTs & lambda_arg_asts = lambda_args_tuple->arguments->children; if (lambda_arg_asts.size() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "APPLY column transformer can only accept lambda with one argument"); + throw Exception(ErrorCodes::SYNTAX_ERROR, "APPLY column transformer can only accept lambda with one argument"); if (auto opt_arg_name = tryGetIdentifierName(lambda_arg_asts[0]); opt_arg_name) lambda_arg = *opt_arg_name; else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "lambda argument declarations must be identifiers"); + throw Exception(ErrorCodes::SYNTAX_ERROR, "lambda argument declarations must be identifiers"); } else { diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 65b4b6df7f2..64f117c707f 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -1,7 +1,7 @@ #pragma once -#include #include +#include #include #include @@ -25,22 +25,24 @@ namespace ErrorCodes struct Expected { const char * max_parsed_pos = nullptr; - std::set variants; + std::vector variants; /// 'description' should be statically allocated string. - void add(const char * current_pos, const char * description) + ALWAYS_INLINE void add(const char * current_pos, const char * description) { if (!max_parsed_pos || current_pos > max_parsed_pos) { variants.clear(); max_parsed_pos = current_pos; + variants.push_back(description); + return; } - if (!max_parsed_pos || current_pos >= max_parsed_pos) - variants.insert(description); + if ((current_pos == max_parsed_pos) && (find(variants.begin(), variants.end(), description) == variants.end())) + variants.push_back(description); } - void add(TokenIterator it, const char * description) + ALWAYS_INLINE void add(TokenIterator it, const char * description) { add(it->begin, description); } @@ -60,18 +62,18 @@ public: Pos(Tokens & tokens_, uint32_t max_depth_) : TokenIterator(tokens_), max_depth(max_depth_) {} - void increaseDepth() + ALWAYS_INLINE void increaseDepth() { ++depth; - if (max_depth > 0 && depth > max_depth) + if (unlikely(max_depth > 0 && depth > max_depth)) throw Exception( "Maximum parse depth (" + std::to_string(max_depth) + ") exceeded. Consider rising max_parser_depth parameter.", ErrorCodes::TOO_DEEP_RECURSION); } - void decreaseDepth() + ALWAYS_INLINE void decreaseDepth() { - if (depth == 0) + if (unlikely(depth == 0)) throw Exception("Logical error in parser: incorrect calculation of parse depth", ErrorCodes::LOGICAL_ERROR); --depth; } diff --git a/src/Parsers/IParserBase.h b/src/Parsers/IParserBase.h index cf69e5f2dfa..ce08bdef790 100644 --- a/src/Parsers/IParserBase.h +++ b/src/Parsers/IParserBase.h @@ -12,7 +12,7 @@ class IParserBase : public IParser { public: template - static bool wrapParseImpl(Pos & pos, const F & func) + ALWAYS_INLINE static bool wrapParseImpl(Pos & pos, const F & func) { Pos begin = pos; bool res = func(); @@ -24,7 +24,7 @@ public: struct IncreaseDepthTag {}; template - static bool wrapParseImpl(Pos & pos, IncreaseDepthTag, const F & func) + ALWAYS_INLINE static bool wrapParseImpl(Pos & pos, IncreaseDepthTag, const F & func) { Pos begin = pos; pos.increaseDepth(); diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h index e3a5b9f79c3..b84bec57817 100644 --- a/src/Parsers/TokenIterator.h +++ b/src/Parsers/TokenIterator.h @@ -1,8 +1,10 @@ #pragma once -#include +#include #include +#include + namespace DB { @@ -59,22 +61,30 @@ private: public: explicit TokenIterator(Tokens & tokens_) : tokens(&tokens_) {} - const Token & get() { return (*tokens)[index]; } - const Token & operator*() { return get(); } - const Token * operator->() { return &get(); } + ALWAYS_INLINE const Token & get() { return (*tokens)[index]; } + ALWAYS_INLINE const Token & operator*() { return get(); } + ALWAYS_INLINE const Token * operator->() { return &get(); } - TokenIterator & operator++() { ++index; return *this; } - TokenIterator & operator--() { --index; return *this; } + ALWAYS_INLINE TokenIterator & operator++() + { + ++index; + return *this; + } + ALWAYS_INLINE TokenIterator & operator--() + { + --index; + return *this; + } - bool operator< (const TokenIterator & rhs) const { return index < rhs.index; } - bool operator<= (const TokenIterator & rhs) const { return index <= rhs.index; } - bool operator== (const TokenIterator & rhs) const { return index == rhs.index; } - bool operator!= (const TokenIterator & rhs) const { return index != rhs.index; } + ALWAYS_INLINE bool operator<(const TokenIterator & rhs) const { return index < rhs.index; } + ALWAYS_INLINE bool operator<=(const TokenIterator & rhs) const { return index <= rhs.index; } + ALWAYS_INLINE bool operator==(const TokenIterator & rhs) const { return index == rhs.index; } + ALWAYS_INLINE bool operator!=(const TokenIterator & rhs) const { return index != rhs.index; } - bool isValid() { return get().type < TokenType::EndOfStream; } + ALWAYS_INLINE bool isValid() { return get().type < TokenType::EndOfStream; } /// Rightmost token we had looked. - const Token & max() { return tokens->max(); } + ALWAYS_INLINE const Token & max() { return tokens->max(); } }; diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index c8c9153b777..e722f8718f7 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -22,7 +21,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int QUERY_WAS_CANCELLED; } @@ -32,8 +30,6 @@ PipelineExecutor::PipelineExecutor(Processors & processors, QueryStatus * elem) try { graph = std::make_unique(processors); - if (process_list_element) - process_list_element->addPipelineExecutor(this); } catch (Exception & exception) { @@ -46,6 +42,12 @@ PipelineExecutor::PipelineExecutor(Processors & processors, QueryStatus * elem) throw; } + if (process_list_element) + { + // Add the pipeline to the QueryStatus at the end to avoid issues if other things throw + // as that would leave the executor "linked" + process_list_element->addPipelineExecutor(this); + } } PipelineExecutor::~PipelineExecutor() @@ -73,6 +75,7 @@ void PipelineExecutor::finish() void PipelineExecutor::execute(size_t num_threads) { + checkTimeLimit(); if (num_threads < 1) num_threads = 1; @@ -124,10 +127,33 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) return false; } +bool PipelineExecutor::checkTimeLimitSoft() +{ + if (process_list_element) + { + bool continuing = process_list_element->checkTimeLimitSoft(); + // We call cancel here so that all processors are notified and tasks waken up + // so that the "break" is faster and doesn't wait for long events + if (!continuing) + cancel(); + return continuing; + } + + return true; +} + +bool PipelineExecutor::checkTimeLimit() +{ + bool continuing = checkTimeLimitSoft(); + if (!continuing) + process_list_element->checkTimeLimit(); // Will throw if needed + + return continuing; +} + void PipelineExecutor::finalizeExecution() { - if (process_list_element && process_list_element->isKilled()) - throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED); + checkTimeLimit(); if (cancelled) return; @@ -190,6 +216,9 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie if (tasks.isFinished()) break; + if (!checkTimeLimitSoft()) + break; + #ifndef NDEBUG Stopwatch processing_time_watch; #endif diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 19137b2306a..12f2bd8b75b 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -43,6 +43,11 @@ public: /// Cancel execution. May be called from another thread. void cancel(); + /// Checks the query time limits (cancelled or timeout). Throws on cancellation or when time limit is reached and the query uses "break" + bool checkTimeLimit(); + /// Same as checkTimeLimit but it never throws. It returns false on cancellation or time limit reached + [[nodiscard]] bool checkTimeLimitSoft(); + private: ExecutingGraphPtr graph; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index fdddfdef2a4..0ba07df95a6 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -117,8 +117,8 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) data->rethrowExceptionIfHas(); - bool is_execution_finished = lazy_format ? lazy_format->isFinished() - : data->is_finished.load(); + bool is_execution_finished + = !data->executor->checkTimeLimitSoft() || lazy_format ? lazy_format->isFinished() : data->is_finished.load(); if (is_execution_finished) { diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index a9c73b9f8fb..ae522c1073d 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -44,6 +44,9 @@ bool PullingPipelineExecutor::pull(Chunk & chunk) if (!executor) executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + if (!executor->checkTimeLimitSoft()) + return false; + if (!executor->executeStep(&has_data_flag)) return false; diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index 6ff9a8cca2c..8cd9d154ae4 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -205,7 +205,7 @@ void CustomSeparatedRowInputFormat::syncAfterError() bool CustomSeparatedRowInputFormat::parseRowStartWithDiagnosticInfo(WriteBuffer & out) { - return parseDelimiterWithDiagnosticInfo(out, buf, format_settings.custom.row_before_delimiter, "delimiter before first firld", ignore_spaces); + return parseDelimiterWithDiagnosticInfo(out, buf, format_settings.custom.row_before_delimiter, "delimiter before first field", ignore_spaces); } bool CustomSeparatedRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) diff --git a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h index 23fb506c220..ac03c2991bf 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h @@ -23,6 +23,13 @@ public: String getName() const override { return "JSONEachRowRowOutputFormat"; } +public: + /// Content-Type to set when sending HTTP response. + String getContentType() const override + { + return settings.json.array_of_rows ? "application/json; charset=UTF-8" : "application/x-ndjson; charset=UTF-8" ; + } + protected: void writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) override; void writeFieldDelimiter() override; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 67ca6d3d8e0..eddbbb9138c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -74,7 +74,8 @@ ReadFromMergeTree::ReadFromMergeTree( bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, Poco::Logger * log_, - MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_) + MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_, + bool enable_parallel_reading) : ISourceStep(DataStream{.header = MergeTreeBaseSelectProcessor::transformHeader( metadata_snapshot_->getSampleBlockForColumns(real_column_names_, data_.getVirtuals(), data_.getStorageID()), getPrewhereInfo(query_info_), @@ -107,6 +108,9 @@ ReadFromMergeTree::ReadFromMergeTree( auto type = std::make_shared(); output_stream->header.insert({type->createColumn(), type, "_sample_factor"}); } + + if (enable_parallel_reading) + read_task_callback = context->getMergeTreeReadTaskCallback(); } Pipe ReadFromMergeTree::readFromPool( @@ -127,6 +131,7 @@ Pipe ReadFromMergeTree::readFromPool( } const auto & settings = context->getSettingsRef(); + const auto & client_info = context->getClientInfo(); MergeTreeReadPool::BackoffSettings backoff_settings(settings); auto pool = std::make_shared( @@ -147,17 +152,30 @@ Pipe ReadFromMergeTree::readFromPool( for (size_t i = 0; i < max_streams; ++i) { + std::optional extension; + if (read_task_callback) + { + extension = ParallelReadingExtension + { + .callback = read_task_callback.value(), + .count_participating_replicas = client_info.count_participating_replicas, + .number_of_current_replica = client_info.number_of_current_replica, + .colums_to_read = required_columns + }; + } + auto source = std::make_shared( i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, data, metadata_snapshot, use_uncompressed_cache, - prewhere_info, actions_settings, reader_settings, virt_column_names); + prewhere_info, actions_settings, reader_settings, virt_column_names, std::move(extension)); - if (i == 0) - { - /// Set the approximate number of rows for the first source only + /// Set the approximate number of rows for the first source only + /// In case of parallel processing on replicas do not set approximate rows at all. + /// Because the value will be identical on every replicas and will be accounted + /// multiple times (settings.max_parallel_replicas times more) + if (i == 0 && !client_info.collaborate_with_initiator) source->addTotalRowsApprox(total_rows); - } pipes.emplace_back(std::move(source)); } @@ -172,10 +190,22 @@ ProcessorPtr ReadFromMergeTree::createSource( bool use_uncompressed_cache, bool has_limit_below_one_block) { + const auto & client_info = context->getClientInfo(); + std::optional extension; + if (read_task_callback) + { + extension = ParallelReadingExtension + { + .callback = read_task_callback.value(), + .count_participating_replicas = client_info.count_participating_replicas, + .number_of_current_replica = client_info.number_of_current_replica, + .colums_to_read = required_columns + }; + } return std::make_shared( data, metadata_snapshot, part.data_part, max_block_size, preferred_block_size_bytes, preferred_max_column_in_block_size_bytes, required_columns, part.ranges, use_uncompressed_cache, prewhere_info, - actions_settings, reader_settings, virt_column_names, part.part_index_in_query, has_limit_below_one_block); + actions_settings, reader_settings, virt_column_names, part.part_index_in_query, has_limit_below_one_block, std::move(extension)); } Pipe ReadFromMergeTree::readInOrder( diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 46b62467ae0..0bdfa66bcc7 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -97,7 +97,8 @@ public: bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, Poco::Logger * log_, - MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_ + MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_, + bool enable_parallel_reading ); String getName() const override { return "ReadFromMergeTree"; } @@ -184,6 +185,8 @@ private: MergeTreeDataSelectAnalysisResultPtr selectRangesToRead(MergeTreeData::DataPartsVector parts) const; ReadFromMergeTree::AnalysisResult getAnalysisResult() const; MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr; + + std::optional read_task_callback; }; struct MergeTreeDataSelectAnalysisResult diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 399e7d01839..8fcec03d746 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include namespace DB { @@ -112,7 +114,10 @@ ReadFromRemote::ReadFromRemote( { } -void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard) +void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard, + std::shared_ptr coordinator, + std::shared_ptr pool, + std::optional replica_info) { bool add_agg_info = stage == QueryProcessingStage::WithMergeableState; bool add_totals = false; @@ -125,7 +130,10 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFacto } auto lazily_create_stream = [ - pool = shard.pool, shard_num = shard.shard_num, shard_count = shard_count, query = shard.query, header = shard.header, + replica_info = replica_info, + pool = pool ? pool : shard.pool, + coordinator = coordinator, + shard_num = shard.shard_num, shard_count = shard_count, query = shard.query, header = shard.header, context = context, throttler = throttler, main_table = main_table, table_func_ptr = table_func_ptr, scalars = scalars, external_tables = external_tables, @@ -161,9 +169,12 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFacto max_remote_delay = std::max(try_result.staleness, max_remote_delay); } - if (try_results.empty() || local_delay < max_remote_delay) + /// We disable this branch in case of parallel reading from replicas, because createLocalPlan will call + /// InterpreterSelectQuery directly and it will be too ugly to pass ParallelReplicasCoordinator or some callback there. + if (!context->getClientInfo().collaborate_with_initiator && (try_results.empty() || local_delay < max_remote_delay)) { auto plan = createLocalPlan(query, header, context, stage, shard_num, shard_count); + return QueryPipelineBuilder::getPipe(std::move(*plan->buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)))); @@ -180,7 +191,8 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFacto scalars["_shard_num"] = Block{{DataTypeUInt32().createColumnConst(1, shard_num), std::make_shared(), "_shard_num"}}; auto remote_query_executor = std::make_shared( - pool, std::move(connections), query_string, header, context, throttler, scalars, external_tables, stage); + pool, std::move(connections), query_string, header, context, throttler, scalars, external_tables, stage, + RemoteQueryExecutor::Extension{.parallel_reading_coordinator = std::move(coordinator), .replica_info = replica_info}); return createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read); } @@ -191,7 +203,10 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFacto addConvertingActions(pipes.back(), output_stream->header); } -void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard) +void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard, + std::shared_ptr coordinator, + std::shared_ptr pool, + std::optional replica_info) { bool add_agg_info = stage == QueryProcessingStage::WithMergeableState; bool add_totals = false; @@ -207,11 +222,20 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory:: scalars["_shard_num"] = Block{{DataTypeUInt32().createColumnConst(1, shard.shard_num), std::make_shared(), "_shard_num"}}; - auto remote_query_executor = std::make_shared( - shard.pool, query_string, shard.header, context, throttler, scalars, external_tables, stage); + + std::shared_ptr remote_query_executor; + + remote_query_executor = std::make_shared( + pool ? pool : shard.pool, query_string, shard.header, context, throttler, scalars, external_tables, stage, + RemoteQueryExecutor::Extension{.parallel_reading_coordinator = std::move(coordinator), .replica_info = std::move(replica_info)}); + remote_query_executor->setLogger(log); - remote_query_executor->setPoolMode(PoolMode::GET_MANY); + /// In case of parallel reading from replicas we have a connection pool per replica. + /// Setting PoolMode will make no sense. + if (!pool) + remote_query_executor->setPoolMode(PoolMode::GET_MANY); + if (!table_func_ptr) remote_query_executor->setMainTable(main_table); @@ -223,12 +247,51 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory:: void ReadFromRemote::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { Pipes pipes; - for (const auto & shard : shards) + + const auto & settings = context->getSettingsRef(); + const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas; + + /// We have to create a pipe for each replica + /// FIXME: The second condition is only for tests to work, because hedged connections enabled by default. + if (settings.max_parallel_replicas > 1 && !enable_sample_offset_parallel_processing && !context->getSettingsRef().use_hedged_requests) { - if (shard.lazy) - addLazyPipe(pipes, shard); - else - addPipe(pipes, shard); + const Settings & current_settings = context->getSettingsRef(); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); + + for (const auto & shard : shards) + { + auto coordinator = std::make_shared(); + + for (size_t replica_num = 0; replica_num < shard.num_replicas; ++replica_num) + { + IConnections::ReplicaInfo replica_info + { + .all_replicas_count = shard.num_replicas, + .number_of_current_replica = replica_num + }; + + auto pool = shard.per_replica_pools[replica_num]; + auto pool_with_failover = std::make_shared( + ConnectionPoolPtrs{pool}, current_settings.load_balancing); + + if (shard.lazy) + addLazyPipe(pipes, shard, coordinator, pool_with_failover, replica_info); + else + addPipe(pipes, shard, coordinator, pool_with_failover, replica_info); + } + } + } + else + { + for (const auto & shard : shards) + { + auto coordinator = std::make_shared(); + + if (shard.lazy) + addLazyPipe(pipes, shard, /*coordinator=*/nullptr, /*pool*/{}, /*replica_info*/std::nullopt); + else + addPipe(pipes, shard, /*coordinator=*/nullptr, /*pool*/{}, /*replica_info*/std::nullopt); + } } auto pipe = Pipe::unitePipes(std::move(pipes)); diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index f963164dd3f..f361be93b5a 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -1,9 +1,11 @@ #pragma once #include #include +#include #include #include #include +#include namespace DB { @@ -37,6 +39,12 @@ public: void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; private: + enum class Mode + { + PerReplica, + PerShard + }; + ClusterProxy::IStreamFactory::Shards shards; QueryProcessingStage::Enum stage; @@ -52,8 +60,16 @@ private: Poco::Logger * log; UInt32 shard_count; - void addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard); - void addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard); + void addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard, + std::shared_ptr coordinator, + std::shared_ptr pool, + std::optional replica_info); + void addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard, + std::shared_ptr coordinator, + std::shared_ptr pool, + std::optional replica_info); + + void addPipeForReplica(); }; } diff --git a/src/Processors/ResizeProcessor.cpp b/src/Processors/ResizeProcessor.cpp index d652a342150..f5ee1cb487c 100644 --- a/src/Processors/ResizeProcessor.cpp +++ b/src/Processors/ResizeProcessor.cpp @@ -403,12 +403,22 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in /// Close all other waiting for data outputs (there is no corresponding input for them). while (!waiting_outputs.empty()) { - auto & output = output_ports[waiting_outputs.front()]; - waiting_outputs.pop(); + auto & output = output_ports[waiting_outputs.front()]; + waiting_outputs.pop(); - output.status = OutputStatus::Finished; - output.port->finish(); - ++num_finished_outputs; + if (output.status != OutputStatus::Finished) + ++num_finished_outputs; + + output.status = OutputStatus::Finished; + output.port->finish(); + } + + if (num_finished_outputs == outputs.size()) + { + for (auto & input : inputs) + input.close(); + + return Status::Finished; } if (disabled_input_ports.empty()) @@ -418,4 +428,3 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in } } - diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index fd9b80e4673..88ecbe6adc3 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,6 +1,9 @@ #include + +#include #include #include +#include namespace ProfileEvents diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index 9d0ccf11ace..e7100e8510b 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -1,12 +1,14 @@ #pragma once +#include #include -#include namespace DB { +class QueryStatus; +class ThreadStatus; /// Proxy class which counts number of written block, rows, bytes class CountingTransform final : public ExceptionKeepingTransform diff --git a/src/Processors/Transforms/LimitsCheckingTransform.h b/src/Processors/Transforms/LimitsCheckingTransform.h index 50891ece654..2f96a17c17b 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.h +++ b/src/Processors/Transforms/LimitsCheckingTransform.h @@ -1,8 +1,9 @@ #pragma once +#include #include #include #include -#include +#include #include diff --git a/src/Processors/Transforms/MongoDBSource.cpp b/src/Processors/Transforms/MongoDBSource.cpp index 30ad9746520..4f5be41a89d 100644 --- a/src/Processors/Transforms/MongoDBSource.cpp +++ b/src/Processors/Transforms/MongoDBSource.cpp @@ -36,6 +36,7 @@ namespace ErrorCodes extern const int MONGODB_CANNOT_AUTHENTICATE; extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int UNKNOWN_TYPE; + extern const int MONGODB_ERROR; } @@ -327,6 +328,14 @@ Chunk MongoDBSource::generate() for (auto & document : response.documents()) { + if (document->exists("ok") && document->exists("$err") + && document->exists("code") && document->getInteger("ok") == 0) + { + auto code = document->getInteger("code"); + const Poco::MongoDB::Element::Ptr value = document->get("$err"); + auto message = static_cast &>(*value).value(); + throw Exception(ErrorCodes::MONGODB_ERROR, "Got error from MongoDB: {}, code: {}", message, code); + } ++num_rows; for (const auto idx : collections::range(0, size)) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 1f8376f4700..7a3bb25d2c6 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -4,10 +4,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -1005,6 +1007,12 @@ static void assertSameColumns(const Columns & left_all, assert(left_column); assert(right_column); + if (const auto * left_lc = typeid_cast(left_column)) + left_column = left_lc->getDictionary().getNestedColumn().get(); + + if (const auto * right_lc = typeid_cast(right_column)) + right_column = right_lc->getDictionary().getNestedColumn().get(); + assert(typeid(*left_column).hash_code() == typeid(*right_column).hash_code()); @@ -1056,10 +1064,13 @@ void WindowTransform::appendChunk(Chunk & chunk) // Another problem with Const columns is that the aggregate functions // can't work with them, so we have to materialize them like the // Aggregator does. + // Likewise, aggregate functions can't work with LowCardinality, + // so we have to materialize them too. // Just materialize everything. auto columns = chunk.detachColumns(); + block.original_input_columns = columns; for (auto & column : columns) - column = std::move(column)->convertToFullColumnIfConst(); + column = recursiveRemoveLowCardinality(std::move(column)->convertToFullColumnIfConst()); block.input_columns = std::move(columns); // Initialize output columns. @@ -1302,7 +1313,7 @@ IProcessor::Status WindowTransform::prepare() // Output the ready block. const auto i = next_output_block_number - first_block_number; auto & block = blocks[i]; - auto columns = block.input_columns; + auto columns = block.original_input_columns; for (auto & res : block.output_columns) { columns.push_back(ColumnPtr(std::move(res))); diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h index 5fbdd6d38e1..077979e83b9 100644 --- a/src/Processors/Transforms/WindowTransform.h +++ b/src/Processors/Transforms/WindowTransform.h @@ -39,6 +39,7 @@ struct WindowFunctionWorkspace struct WindowTransformBlock { + Columns original_input_columns; Columns input_columns; MutableColumns output_columns; diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index ada16a1f201..653d9a2bbf8 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -7,6 +7,7 @@ #include #include #include "Core/Protocol.h" +#include "IO/ReadHelpers.h" #include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include namespace CurrentMetrics @@ -42,21 +44,26 @@ namespace ErrorCodes RemoteQueryExecutor::RemoteQueryExecutor( const String & query_, const Block & header_, ContextPtr context_, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_) + QueryProcessingStage::Enum stage_, std::optional extension_) : header(header_), query(query_), context(context_), scalars(scalars_) - , external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_) + , external_tables(external_tables_), stage(stage_) + , task_iterator(extension_ ? extension_->task_iterator : nullptr) + , parallel_reading_coordinator(extension_ ? extension_->parallel_reading_coordinator : nullptr) {} RemoteQueryExecutor::RemoteQueryExecutor( Connection & connection, const String & query_, const Block & header_, ContextPtr context_, ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_) - : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, task_iterator_) + QueryProcessingStage::Enum stage_, std::optional extension_) + : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_) { - create_connections = [this, &connection, throttler]() + create_connections = [this, &connection, throttler, extension_]() { - return std::make_shared(connection, context->getSettingsRef(), throttler); + auto res = std::make_shared(connection, context->getSettingsRef(), throttler); + if (extension_ && extension_->replica_info) + res->setReplicaInfo(*extension_->replica_info); + return res; }; } @@ -64,12 +71,15 @@ RemoteQueryExecutor::RemoteQueryExecutor( std::shared_ptr connection_ptr, const String & query_, const Block & header_, ContextPtr context_, ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_) - : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, task_iterator_) + QueryProcessingStage::Enum stage_, std::optional extension_) + : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_) { - create_connections = [this, connection_ptr, throttler]() + create_connections = [this, connection_ptr, throttler, extension_]() { - return std::make_shared(connection_ptr, context->getSettingsRef(), throttler); + auto res = std::make_shared(connection_ptr, context->getSettingsRef(), throttler); + if (extension_ && extension_->replica_info) + res->setReplicaInfo(*extension_->replica_info); + return res; }; } @@ -78,12 +88,18 @@ RemoteQueryExecutor::RemoteQueryExecutor( std::vector && connections_, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_) + QueryProcessingStage::Enum stage_, std::optional extension_) : header(header_), query(query_), context(context_) - , scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), pool(pool_) + , scalars(scalars_), external_tables(external_tables_), stage(stage_) + , task_iterator(extension_ ? extension_->task_iterator : nullptr) + , parallel_reading_coordinator(extension_ ? extension_->parallel_reading_coordinator : nullptr) + , pool(pool_) { - create_connections = [this, connections_, throttler]() mutable { - return std::make_shared(std::move(connections_), context->getSettingsRef(), throttler); + create_connections = [this, connections_, throttler, extension_]() mutable { + auto res = std::make_shared(std::move(connections_), context->getSettingsRef(), throttler); + if (extension_ && extension_->replica_info) + res->setReplicaInfo(*extension_->replica_info); + return res; }; } @@ -91,11 +107,14 @@ RemoteQueryExecutor::RemoteQueryExecutor( const ConnectionPoolWithFailoverPtr & pool_, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_) + QueryProcessingStage::Enum stage_, std::optional extension_) : header(header_), query(query_), context(context_) - , scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), pool(pool_) + , scalars(scalars_), external_tables(external_tables_), stage(stage_) + , task_iterator(extension_ ? extension_->task_iterator : nullptr) + , parallel_reading_coordinator(extension_ ? extension_->parallel_reading_coordinator : nullptr) + , pool(pool_) { - create_connections = [this, throttler]()->std::shared_ptr + create_connections = [this, throttler, extension_]()->std::shared_ptr { const Settings & current_settings = context->getSettingsRef(); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); @@ -107,7 +126,10 @@ RemoteQueryExecutor::RemoteQueryExecutor( if (main_table) table_to_check = std::make_shared(main_table.getQualifiedName()); - return std::make_shared(pool, context, timeouts, throttler, pool_mode, table_to_check); + auto res = std::make_shared(pool, context, timeouts, throttler, pool_mode, table_to_check); + if (extension_ && extension_->replica_info) + res->setReplicaInfo(*extension_->replica_info); + return res; } #endif @@ -122,7 +144,10 @@ RemoteQueryExecutor::RemoteQueryExecutor( else connection_entries = pool->getMany(timeouts, ¤t_settings, pool_mode); - return std::make_shared(std::move(connection_entries), current_settings, throttler); + auto res = std::make_shared(std::move(connection_entries), current_settings, throttler); + if (extension_ && extension_->replica_info) + res->setReplicaInfo(*extension_->replica_info); + return res; }; } @@ -344,6 +369,9 @@ std::optional RemoteQueryExecutor::processPacket(Packet packet) { switch (packet.type) { + case Protocol::Server::MergeTreeReadTaskRequest: + processMergeTreeReadTaskRequest(packet.request); + break; case Protocol::Server::ReadTaskRequest: processReadTaskRequest(); break; @@ -440,6 +468,15 @@ void RemoteQueryExecutor::processReadTaskRequest() connections->sendReadTaskResponse(response); } +void RemoteQueryExecutor::processMergeTreeReadTaskRequest(PartitionReadRequest request) +{ + if (!parallel_reading_coordinator) + throw Exception("Coordinator for parallel reading from replicas is not initialized", ErrorCodes::LOGICAL_ERROR); + + auto response = parallel_reading_coordinator->handleRequest(std::move(request)); + connections->sendMergeTreeReadTaskResponse(response); +} + void RemoteQueryExecutor::finish(std::unique_ptr * read_context) { /** If one of: diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index b7a2509ea97..d5603fd2281 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include @@ -7,7 +9,7 @@ #include #include #include -#include +#include namespace DB @@ -35,20 +37,33 @@ class RemoteQueryExecutor public: using ReadContext = RemoteQueryExecutorReadContext; + /// We can provide additional logic for RemoteQueryExecutor + /// For example for s3Cluster table function we provide an Iterator over tasks to do. + /// Nodes involved into the query send request for a new task and we answer them using this object. + /// In case of parallel reading from replicas we provide a Coordinator object + /// Every replica will tell us about parts and mark ranges it wants to read and coordinator will + /// decide whether to deny or to accept that request. + struct Extension + { + std::shared_ptr task_iterator{nullptr}; + std::shared_ptr parallel_reading_coordinator; + std::optional replica_info; + }; + /// Takes already set connection. /// We don't own connection, thus we have to drain it synchronously. RemoteQueryExecutor( Connection & connection, const String & query_, const Block & header_, ContextPtr context_, ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), - QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr task_iterator_ = {}); + QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::optional extension_ = std::nullopt); /// Takes already set connection. RemoteQueryExecutor( std::shared_ptr connection, const String & query_, const Block & header_, ContextPtr context_, ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), - QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr task_iterator_ = {}); + QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::optional extension_ = std::nullopt); /// Accepts several connections already taken from pool. RemoteQueryExecutor( @@ -56,14 +71,14 @@ public: std::vector && connections_, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), - QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr task_iterator_ = {}); + QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::optional extension_ = std::nullopt); /// Takes a pool and gets one or several connections from it. RemoteQueryExecutor( const ConnectionPoolWithFailoverPtr & pool, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), - QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr task_iterator_ = {}); + QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::optional extension_ = std::nullopt); ~RemoteQueryExecutor(); @@ -115,7 +130,7 @@ private: RemoteQueryExecutor( const String & query_, const Block & header_, ContextPtr context_, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_); + QueryProcessingStage::Enum stage_, std::optional extension_); Block header; Block totals; @@ -136,6 +151,13 @@ private: /// Initiator identifier for distributed task processing std::shared_ptr task_iterator; + std::shared_ptr parallel_reading_coordinator; + + /// This is needed only for parallel reading from replicas, because + /// we create a RemoteQueryExecutor per replica and have to store additional info + /// about the number of the current replica or the count of replicas at all. + IConnections::ReplicaInfo replica_info; + std::function()> create_connections; /// Hold a shared reference to the connection pool so that asynchronous connection draining will /// work safely. Make sure it's the first member so that we don't destruct it too early. @@ -203,6 +225,8 @@ private: void processReadTaskRequest(); + void processMergeTreeReadTaskRequest(PartitionReadRequest request); + /// Cancell query and restart it with info about duplicated UUIDs /// only for `allow_experimental_query_deduplication`. std::variant restartQueryWithoutDuplicatedUUIDs(std::unique_ptr * read_context = nullptr); diff --git a/src/Server/PostgreSQLHandler.h b/src/Server/PostgreSQLHandler.h index ded9616296a..1d33f41f255 100644 --- a/src/Server/PostgreSQLHandler.h +++ b/src/Server/PostgreSQLHandler.h @@ -1,15 +1,12 @@ #pragma once #include +#include #include #include #include #include "IServer.h" -#if !defined(ARCADIA_BUILD) -# include -#endif - #if USE_SSL # include #endif diff --git a/src/Server/PostgreSQLHandlerFactory.h b/src/Server/PostgreSQLHandlerFactory.h index 9103cbaad90..dc3d4047d2a 100644 --- a/src/Server/PostgreSQLHandlerFactory.h +++ b/src/Server/PostgreSQLHandlerFactory.h @@ -5,10 +5,7 @@ #include #include #include - -#if !defined(ARCADIA_BUILD) -# include -#endif +#include namespace DB { diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 32154054cc0..cdf1838c06b 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -310,10 +310,25 @@ void TCPHandler::runImpl() query_context->setReadTaskCallback([this]() -> String { std::lock_guard lock(task_callback_mutex); + + if (state.is_cancelled) + return {}; + sendReadTaskRequestAssumeLocked(); return receiveReadTaskResponseAssumeLocked(); }); + query_context->setMergeTreeReadTaskCallback([this](PartitionReadRequest request) -> std::optional + { + std::lock_guard lock(task_callback_mutex); + + if (state.is_cancelled) + return std::nullopt; + + sendMergeTreeReadTaskRequstAssumeLocked(std::move(request)); + return receivePartitionMergeTreeReadTaskResponseAssumeLocked(); + }); + /// Processing Query state.io = executeQuery(state.query, query_context, false, state.stage); @@ -663,10 +678,13 @@ void TCPHandler::processOrdinaryQueryWithProcessors() Block block; while (executor.pull(block, interactive_delay / 1000)) { - std::lock_guard lock(task_callback_mutex); + std::unique_lock lock(task_callback_mutex); if (isQueryCancelled()) { + /// Several callback like callback for parallel reading could be called from inside the pipeline + /// and we have to unlock the mutex from our side to prevent deadlock. + lock.unlock(); /// A packet was received requesting to stop execution of the request. executor.cancel(); break; @@ -786,6 +804,15 @@ void TCPHandler::sendReadTaskRequestAssumeLocked() out->next(); } + +void TCPHandler::sendMergeTreeReadTaskRequstAssumeLocked(PartitionReadRequest request) +{ + writeVarUInt(Protocol::Server::MergeTreeReadTaskRequest, *out); + request.serialize(*out); + out->next(); +} + + void TCPHandler::sendProfileInfo(const ProfileInfo & info) { writeVarUInt(Protocol::Server::ProfileInfo, *out); @@ -1297,6 +1324,35 @@ String TCPHandler::receiveReadTaskResponseAssumeLocked() } +std::optional TCPHandler::receivePartitionMergeTreeReadTaskResponseAssumeLocked() +{ + UInt64 packet_type = 0; + readVarUInt(packet_type, *in); + if (packet_type != Protocol::Client::MergeTreeReadTaskResponse) + { + if (packet_type == Protocol::Client::Cancel) + { + state.is_cancelled = true; + /// For testing connection collector. + if (sleep_in_receive_cancel.totalMilliseconds()) + { + std::chrono::milliseconds ms(sleep_in_receive_cancel.totalMilliseconds()); + std::this_thread::sleep_for(ms); + } + return std::nullopt; + } + else + { + throw Exception(fmt::format("Received {} packet after requesting read task", + Protocol::Client::toString(packet_type)), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + } + } + PartitionReadResponse response; + response.deserialize(*in); + return response; +} + + void TCPHandler::receiveClusterNameAndSalt() { readStringBinary(cluster, *in); @@ -1697,7 +1753,7 @@ bool TCPHandler::isQueryCancelled() return true; default: - throw NetException("Unknown packet from client", ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); + throw NetException("Unknown packet from client " + toString(packet_type), ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); } } diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 3cf3346cd72..4a340e328ed 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -15,6 +15,8 @@ #include #include +#include + #include "IServer.h" #include "base/types.h" @@ -201,6 +203,7 @@ private: void receiveQuery(); void receiveIgnoredPartUUIDs(); String receiveReadTaskResponseAssumeLocked(); + std::optional receivePartitionMergeTreeReadTaskResponseAssumeLocked(); bool receiveData(bool scalar); bool readDataNext(); void readData(); @@ -233,6 +236,7 @@ private: void sendEndOfStream(); void sendPartUUIDs(); void sendReadTaskRequestAssumeLocked(); + void sendMergeTreeReadTaskRequstAssumeLocked(PartitionReadRequest request); void sendProfileInfo(const ProfileInfo & info); void sendTotals(const Block & totals); void sendExtremes(const Block & extremes); diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp index 8389c432db2..ade89ea7228 100644 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ b/src/Storages/ExternalDataSourceConfiguration.cpp @@ -86,7 +86,7 @@ std::optional getExternalDataSourceConfiguration(const configuration.username = config.getString(collection_prefix + ".user", ""); configuration.password = config.getString(collection_prefix + ".password", ""); configuration.database = config.getString(collection_prefix + ".database", ""); - configuration.table = config.getString(collection_prefix + ".table", ""); + configuration.table = config.getString(collection_prefix + ".table", config.getString(collection_prefix + ".collection", "")); configuration.schema = config.getString(collection_prefix + ".schema", ""); configuration.addresses_expr = config.getString(collection_prefix + ".addresses_expr", ""); diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h index b214caa9a12..502f8b800e3 100644 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ b/src/Storages/ExternalDataSourceConfiguration.h @@ -40,7 +40,6 @@ struct StorageMySQLConfiguration : ExternalDataSourceConfiguration struct StorageMongoDBConfiguration : ExternalDataSourceConfiguration { - String collection; String options; }; diff --git a/src/Storages/FileLog/FileLogSource.cpp b/src/Storages/FileLog/FileLogSource.cpp index a8da34f3204..7d4b5ac6fec 100644 --- a/src/Storages/FileLog/FileLogSource.cpp +++ b/src/Storages/FileLog/FileLogSource.cpp @@ -54,12 +54,12 @@ FileLogSource::~FileLogSource() { tryLogCurrentException(__PRETTY_FUNCTION__); } - storage.reduceStreams(); } void FileLogSource::onFinish() { storage.closeFilesAndStoreMeta(start, end); + storage.reduceStreams(); finished = true; } diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 90e63aef46d..70aa3d28174 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -63,7 +64,11 @@ StorageHDFS::StorageHDFS( ContextPtr context_, const String & compression_method_ = "", ASTPtr partition_by_) - : IStorage(table_id_), WithContext(context_), uri(uri_), format_name(format_name_), compression_method(compression_method_) + : IStorage(table_id_) + , WithContext(context_) + , uri(uri_) + , format_name(format_name_) + , compression_method(compression_method_) , partition_by(partition_by_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); @@ -76,8 +81,7 @@ StorageHDFS::StorageHDFS( setInMemoryMetadata(storage_metadata); } -namespace -{ +using StorageHDFSPtr = std::shared_ptr; class HDFSSource : public SourceWithProgress, WithContext { @@ -93,8 +97,12 @@ public: using SourcesInfoPtr = std::shared_ptr; - static Block getHeader(Block header, bool need_path_column, bool need_file_column) + static Block getHeader(const StorageMetadataPtr & metadata_snapshot, bool need_path_column, bool need_file_column) { + auto header = metadata_snapshot->getSampleBlock(); + + /// Note: AddingDefaultsBlockInputStream doesn't change header. + if (need_path_column) header.insert({DataTypeString().createColumn(), std::make_shared(), "_path"}); if (need_file_column) @@ -103,22 +111,35 @@ public: return header; } + static Block getBlockForSource( + const StorageHDFSPtr & storage, + const StorageMetadataPtr & metadata_snapshot, + const ColumnsDescription & columns_description, + const SourcesInfoPtr & files_info) + { + if (storage->isColumnOriented()) + return metadata_snapshot->getSampleBlockForColumns( + columns_description.getNamesOfPhysical(), storage->getVirtuals(), storage->getStorageID()); + else + return getHeader(metadata_snapshot, files_info->need_path_column, files_info->need_file_column); + } + HDFSSource( - SourcesInfoPtr source_info_, - String uri_, - String format_, - String compression_method_, - Block sample_block_, + StorageHDFSPtr storage_, + const StorageMetadataPtr & metadata_snapshot_, ContextPtr context_, - UInt64 max_block_size_) - : SourceWithProgress(getHeader(sample_block_, source_info_->need_path_column, source_info_->need_file_column)) + UInt64 max_block_size_, + SourcesInfoPtr source_info_, + String uri_without_path_, + ColumnsDescription columns_description_) + : SourceWithProgress(getBlockForSource(storage_, metadata_snapshot_, columns_description_, source_info_)) , WithContext(context_) + , storage(std::move(storage_)) + , metadata_snapshot(metadata_snapshot_) , source_info(std::move(source_info_)) - , uri(std::move(uri_)) - , format(std::move(format_)) - , compression_method(compression_method_) + , uri_without_path(std::move(uri_without_path_)) , max_block_size(max_block_size_) - , sample_block(std::move(sample_block_)) + , columns_description(std::move(columns_description_)) { } @@ -138,14 +159,30 @@ public: return {}; auto path = source_info->uris[pos]; - current_path = uri + path; + current_path = uri_without_path + path; - auto compression = chooseCompressionMethod(path, compression_method); - read_buf = wrapReadBufferWithCompressionMethod(std::make_unique(uri, path, getContext()->getGlobalContext()->getConfigRef()), compression); - auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size); - pipeline = QueryPipeline(std::move(input_format)); + auto compression = chooseCompressionMethod(path, storage->compression_method); + read_buf = wrapReadBufferWithCompressionMethod(std::make_unique(uri_without_path, path, getContext()->getGlobalContext()->getConfigRef()), compression); - reader = std::make_unique(pipeline); + auto get_block_for_format = [&]() -> Block + { + if (storage->isColumnOriented()) + return metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); + return metadata_snapshot->getSampleBlock(); + }; + auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, get_block_for_format(), max_block_size); + + QueryPipelineBuilder builder; + builder.init(Pipe(input_format)); + if (columns_description.hasDefaults()) + { + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, columns_description, *input_format, getContext()); + }); + } + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + reader = std::make_unique(*pipeline); } Block res; @@ -180,17 +217,17 @@ public: } private: - std::unique_ptr read_buf; - QueryPipeline pipeline; - std::unique_ptr reader; + StorageHDFSPtr storage; + StorageMetadataPtr metadata_snapshot; SourcesInfoPtr source_info; - String uri; - String format; - String compression_method; - String current_path; - + String uri_without_path; UInt64 max_block_size; - Block sample_block; + ColumnsDescription columns_description; + + std::unique_ptr read_buf; + std::unique_ptr pipeline; + std::unique_ptr reader; + String current_path; }; class HDFSSink : public SinkToStorage @@ -203,7 +240,7 @@ public: const CompressionMethod compression_method) : SinkToStorage(sample_block) { - write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(uri, context->getGlobalContext()->getConfigRef()), compression_method, 3); + write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(uri, context->getGlobalContext()->getConfigRef(), context->getSettingsRef().hdfs_replication), compression_method, 3); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context); } @@ -235,7 +272,6 @@ private: OutputFormatPtr writer; }; - class PartitionedHDFSSink : public PartitionedSink { public: @@ -314,13 +350,14 @@ Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, c } } } - return result; } +bool StorageHDFS::isColumnOriented() const +{ + return format_name != "Distributed" && FormatFactory::instance().checkIfFormatIsColumnOriented(format_name); } - Pipe StorageHDFS::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -355,11 +392,27 @@ Pipe StorageHDFS::read( num_streams = sources_info->uris.size(); Pipes pipes; - + auto this_ptr = std::static_pointer_cast(shared_from_this()); for (size_t i = 0; i < num_streams; ++i) - pipes.emplace_back(std::make_shared( - sources_info, uri_without_path, format_name, compression_method, metadata_snapshot->getSampleBlock(), context_, max_block_size)); + { + const auto get_columns_for_format = [&]() -> ColumnsDescription + { + if (isColumnOriented()) + return ColumnsDescription{ + metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()).getNamesAndTypesList()}; + else + return metadata_snapshot->getColumns(); + }; + pipes.emplace_back(std::make_shared( + this_ptr, + metadata_snapshot, + context_, + max_block_size, + sources_info, + uri_without_path, + get_columns_for_format())); + } return Pipe::unitePipes(std::move(pipes)); } @@ -450,6 +503,7 @@ NamesAndTypesList StorageHDFS::getVirtuals() const {"_file", std::make_shared()} }; } + } #endif diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 945f0b9f0f1..db6b078265d 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -38,7 +38,14 @@ public: bool supportsPartitionBy() const override { return true; } + /// Check if the format is column-oriented. + /// Is is useful because column oriented formats could effectively skip unknown columns + /// So we can create a header of only required columns in read method and ask + /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. + bool isColumnOriented() const; + protected: + friend class HDFSSource; StorageHDFS( const String & uri_, const StorageID & table_id_, diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp index d6a10452f97..9f5e3c1f7d2 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp @@ -29,6 +29,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl explicit WriteBufferFromHDFSImpl( const std::string & hdfs_uri_, const Poco::Util::AbstractConfiguration & config_, + int replication_, int flags) : hdfs_uri(hdfs_uri_) , builder(createHDFSBuilder(hdfs_uri, config_)) @@ -43,7 +44,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl if (!hdfsExists(fs.get(), path.c_str())) throw Exception(ErrorCodes::BAD_ARGUMENTS, "File {} already exists", path); - fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, 0, 0); /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here + fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, replication_, 0); /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here if (fout == nullptr) { @@ -82,10 +83,11 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl WriteBufferFromHDFS::WriteBufferFromHDFS( const std::string & hdfs_name_, const Poco::Util::AbstractConfiguration & config_, + int replication_, size_t buf_size_, int flags_) : BufferWithOwnMemory(buf_size_) - , impl(std::make_unique(hdfs_name_, config_, flags_)) + , impl(std::make_unique(hdfs_name_, config_, replication_, flags_)) { } diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.h b/src/Storages/HDFS/WriteBufferFromHDFS.h index 503371f6118..fe9af7dfba4 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.h +++ b/src/Storages/HDFS/WriteBufferFromHDFS.h @@ -23,6 +23,7 @@ public: WriteBufferFromHDFS( const String & hdfs_name_, const Poco::Util::AbstractConfiguration & config_, + int replication_, size_t buf_size_ = DBMS_DEFAULT_BUFFER_SIZE, int flags = O_WRONLY); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index cf7b075a204..8432e5c48d1 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -20,6 +20,7 @@ #include #include +#include namespace DB diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 1255bf975e6..ea7bc0d4db5 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -424,52 +424,59 @@ void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns) void IMergeTreeDataPart::removeIfNeeded() { - if (state == State::DeleteOnDestroy || is_temp) + if (!is_temp && state != State::DeleteOnDestroy) + return; + + try { - try - { - auto path = getFullRelativePath(); + auto path = getFullRelativePath(); - if (!volume->getDisk()->exists(path)) + if (!volume->getDisk()->exists(path)) + return; + + if (is_temp) + { + String file_name = fileName(relative_path); + + if (file_name.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", relative_path, name); + + if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj")) + { + LOG_ERROR( + storage.log, + "~DataPart() should remove part {} but its name doesn't start with \"tmp\" or end with \".tmp_proj\". Too " + "suspicious, keeping the part.", + path); return; - - if (is_temp) - { - String file_name = fileName(relative_path); - - if (file_name.empty()) - throw Exception("relative_path " + relative_path + " of part " + name + " is invalid or not set", ErrorCodes::LOGICAL_ERROR); - - if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj")) - { - LOG_ERROR( - storage.log, - "~DataPart() should remove part {} but its name doesn't start with \"tmp\" or end with \".tmp_proj\". Too " - "suspicious, keeping the part.", - path); - return; - } - } - - if (parent_part) - { - std::optional keep_shared_data = keepSharedDataInDecoupledStorage(); - if (!keep_shared_data.has_value()) - return; - projectionRemove(parent_part->getFullRelativePath(), *keep_shared_data); - } - else - remove(); - - if (state == State::DeleteOnDestroy) - { - LOG_TRACE(storage.log, "Removed part from old location {}", path); } } - catch (...) + + if (parent_part) { - tryLogCurrentException(__PRETTY_FUNCTION__); + std::optional keep_shared_data = keepSharedDataInDecoupledStorage(); + if (!keep_shared_data.has_value()) + return; + projectionRemove(parent_part->getFullRelativePath(), *keep_shared_data); } + else + remove(); + + if (state == State::DeleteOnDestroy) + { + LOG_TRACE(storage.log, "Removed part from old location {}", path); + } + } + catch (...) + { + /// FIXME If part it temporary, then directory will not be removed for 1 day (temporary_directories_lifetime). + /// If it's tmp_merge_ or tmp_fetch_, + /// then all future attempts to execute part producing operation will fail with "directory already exists". + /// Seems like it's especially important for remote disks, because removal may fail due to network issues. + tryLogCurrentException(__PRETTY_FUNCTION__); + assert(!is_temp); + assert(state != State::DeleteOnDestroy); + assert(state != State::Temporary); } } @@ -546,7 +553,7 @@ String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(const StorageM if (!hasColumnFiles(column)) continue; - const auto size = getColumnSize(column_name, *column_type).data_compressed; + const auto size = getColumnSize(column_name).data_compressed; if (size < minimum_size) { minimum_size = size; @@ -747,7 +754,7 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const for (const auto & part_column : columns) { /// It was compressed with default codec and it's not empty - auto column_size = getColumnSize(part_column.name, *part_column.type); + auto column_size = getColumnSize(part_column.name); if (column_size.data_compressed != 0 && !storage_columns.hasCompressionCodec(part_column.name)) { auto serialization = IDataType::getSerialization(part_column, @@ -885,7 +892,7 @@ void IMergeTreeDataPart::loadRowsCount() /// Most trivial types if (column.type->isValueRepresentedByNumber() && !column.type->haveSubtypes()) { - auto size = getColumnSize(column.name, *column.type); + auto size = getColumnSize(column.name); if (size.data_uncompressed == 0) continue; @@ -933,7 +940,7 @@ void IMergeTreeDataPart::loadRowsCount() if (!column_col->isFixedAndContiguous() || column_col->lowCardinality()) continue; - size_t column_size = getColumnSize(column.name, *column.type).data_uncompressed; + size_t column_size = getColumnSize(column.name).data_uncompressed; if (!column_size) continue; @@ -1157,14 +1164,17 @@ void IMergeTreeDataPart::remove() const * And a race condition can happen that will lead to "File not found" error here. */ + /// NOTE We rename part to delete_tmp_ instead of delete_tmp_ to avoid race condition + /// when we try to remove two parts with the same name, but different relative paths, + /// for example all_1_2_1 (in Deleting state) and tmp_merge_all_1_2_1 (in Temporary state). fs::path from = fs::path(storage.relative_data_path) / relative_path; - fs::path to = fs::path(storage.relative_data_path) / ("delete_tmp_" + name); + fs::path to = fs::path(storage.relative_data_path) / ("delete_tmp_" + relative_path); // TODO directory delete_tmp_ is never removed if server crashes before returning from this function auto disk = volume->getDisk(); if (disk->exists(to)) { - LOG_WARNING(storage.log, "Directory {} (to which part must be renamed before removing) already exists. Most likely this is due to unclean restart. Removing it.", fullPath(disk, to)); + LOG_WARNING(storage.log, "Directory {} (to which part must be renamed before removing) already exists. Most likely this is due to unclean restart or race condition. Removing it.", fullPath(disk, to)); try { disk->removeSharedRecursive(fs::path(to) / "", *keep_shared_data); @@ -1490,7 +1500,7 @@ void IMergeTreeDataPart::calculateSecondaryIndicesSizesOnDisk() } } -ColumnSize IMergeTreeDataPart::getColumnSize(const String & column_name, const IDataType & /* type */) const +ColumnSize IMergeTreeDataPart::getColumnSize(const String & column_name) const { /// For some types of parts columns_size maybe not calculated auto it = columns_sizes.find(column_name); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 1467b0ef03f..a203d45aa25 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -103,7 +103,7 @@ public: /// NOTE: Returns zeros if column files are not found in checksums. /// Otherwise return information about column size on disk. - ColumnSize getColumnSize(const String & column_name, const IDataType & /* type */) const; + ColumnSize getColumnSize(const String & column_name) const; /// NOTE: Returns zeros if secondary indexes are not found in checksums. /// Otherwise return information about secondary index size on disk. @@ -198,6 +198,7 @@ public: mutable std::atomic remove_time { std::numeric_limits::max() }; /// If true, the destructor will delete the directory with the part. + /// FIXME Why do we need this flag? What's difference from Temporary and DeleteOnDestroy state? Can we get rid of this? bool is_temp = false; /// If true it means that there are no ZooKeeper node for this part, so it should be deleted only from filesystem diff --git a/src/Storages/MergeTree/IntersectionsIndexes.h b/src/Storages/MergeTree/IntersectionsIndexes.h new file mode 100644 index 00000000000..68ccbc4a0b1 --- /dev/null +++ b/src/Storages/MergeTree/IntersectionsIndexes.h @@ -0,0 +1,237 @@ +#pragma once + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/// A boundary of a segment (left or right) +struct PartToRead +{ + PartBlockRange range; + struct PartAndProjectionNames + { + String part; + String projection; + bool operator<(const PartAndProjectionNames & rhs) const + { + if (part == rhs.part) + return projection < rhs.projection; + return part < rhs.part; + } + bool operator==(const PartAndProjectionNames & rhs) const + { + return part == rhs.part && projection == rhs.projection; + } + }; + + PartAndProjectionNames name; + + bool operator==(const PartToRead & rhs) const + { + return range == rhs.range && name == rhs.name; + } + + bool operator<(const PartToRead & rhs) const + { + /// We allow only consecutive non-intersecting ranges + const bool intersection = + (range.begin <= rhs.range.begin && rhs.range.begin < range.end) || + (rhs.range.begin <= range.begin && range.begin <= rhs.range.end); + if (intersection) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got intersecting parts. First [{}, {}]. Second [{}, {}]", + range.begin, range.end, rhs.range.begin, rhs.range.end); + return range.begin < rhs.range.begin && range.end <= rhs.range.begin; + } +}; + +/// MergeTreeDataPart is described as a segment (min block and max block) +/// During request handling we have to know how many intersection +/// current part has with already saved parts in our state. +struct PartSegments +{ + enum class IntersectionResult + { + NO_INTERSECTION, + EXACTLY_ONE_INTERSECTION, + REJECT + }; + + void addPart(PartToRead part) { segments.insert(std::move(part)); } + + IntersectionResult getIntersectionResult(PartToRead part) + { + bool intersected_before = false; + for (const auto & segment: segments) + { + auto are_intersect = [](auto & x, auto & y) + { + /// <= is important here, because we are working with segments [a, b] + if ((x.begin <= y.begin) && (y.begin <= x.end)) + return true; + if ((y.begin <= x.begin) && (x.begin <= y.end)) + return true; + return false; + }; + + if (are_intersect(segment.range, part.range)) + { + /// We have two or possibly more intersections + if (intersected_before) + return IntersectionResult::REJECT; + + /// We have intersection with part with different name + /// or with different min or max block + /// It could happens if we have merged part on one replica + /// but not on another. + if (segment != part) + return IntersectionResult::REJECT; + + /// We allow only the intersection with the same part as we have + intersected_before = true; + } + } + + return intersected_before ? IntersectionResult::EXACTLY_ONE_INTERSECTION : IntersectionResult::NO_INTERSECTION; + } + + using OrderedSegments = std::set; + OrderedSegments segments; +}; + +/// This is used only in parallel reading from replicas +/// This struct is an ordered set of half intervals and it is responsible for +/// giving an inversion of that intervals (e.g. [a, b) => {[-inf, a), [b, +inf)}) +/// or giving an intersection of two sets of intervals +/// This is needed, because MarkRange is actually a half-opened interval +/// and during the query execution we receive some kind of request from every replica +/// to read some ranges from a specific part. +/// We have to avoid the situation, where some range is read twice. +/// This struct helps us to do it using only two operations (intersection and inversion) +/// over a set of half opened intervals. +struct HalfIntervals +{ + static HalfIntervals initializeWithEntireSpace() + { + auto left_inf = std::numeric_limits::min(); + auto right_inf = std::numeric_limits::max(); + return HalfIntervals{{{left_inf, right_inf}}}; + } + + static HalfIntervals initializeFromMarkRanges(MarkRanges ranges) + { + OrderedRanges new_intervals; + for (const auto & range : ranges) + new_intervals.insert(range); + + return HalfIntervals{std::move(new_intervals)}; + } + + MarkRanges convertToMarkRangesFinal() + { + MarkRanges result; + std::move(intervals.begin(), intervals.end(), std::back_inserter(result)); + return result; + } + + HalfIntervals & intersect(const HalfIntervals & rhs) + { + /** + * first [ ) [ ) [ ) [ ) [ ) + * second [ ) [ ) [ ) [ ) + */ + OrderedRanges intersected; + + const auto & first_intervals = intervals; + auto first = first_intervals.begin(); + const auto & second_intervals = rhs.intervals; + auto second = second_intervals.begin(); + + while (first != first_intervals.end() && second != second_intervals.end()) + { + auto curr_intersection = MarkRange{ + std::max(second->begin, first->begin), + std::min(second->end, first->end) + }; + + /// Insert only if segments are intersect + if (curr_intersection.begin < curr_intersection.end) + intersected.insert(std::move(curr_intersection)); + + if (first->end <= second->end) + ++first; + else + ++second; + } + + std::swap(intersected, intervals); + + return *this; + } + + HalfIntervals & negate() + { + auto left_inf = std::numeric_limits::min(); + auto right_inf = std::numeric_limits::max(); + + if (intervals.empty()) + { + intervals.insert(MarkRange{left_inf, right_inf}); + return *this; + } + + OrderedRanges new_ranges; + + /// Possibly add (-inf; begin) + if (auto begin = intervals.begin()->begin; begin != left_inf) + new_ranges.insert(MarkRange{left_inf, begin}); + + auto prev = intervals.begin(); + for (auto it = std::next(intervals.begin()); it != intervals.end(); ++it) + { + if (prev->end != it->begin) + new_ranges.insert(MarkRange{prev->end, it->begin}); + prev = it; + } + + /// Try to add (end; +inf) + if (auto end = intervals.rbegin()->end; end != right_inf) + new_ranges.insert(MarkRange{end, right_inf}); + + std::swap(new_ranges, intervals); + + return *this; + } + + bool operator==(const HalfIntervals & rhs) const + { + return intervals == rhs.intervals; + } + + using OrderedRanges = std::set; + OrderedRanges intervals; +}; + + +[[ maybe_unused ]] static std::ostream & operator<< (std::ostream & out, const HalfIntervals & ranges) +{ + for (const auto & range: ranges.intervals) + out << fmt::format("({}, {}) ", range.begin, range.end); + return out; +} + +/// This is needed for tests where we don't need to modify objects +[[ maybe_unused ]] static HalfIntervals getIntersection(const HalfIntervals & first, const HalfIntervals & second) +{ + auto result = first; + result.intersect(second); + return result; +} + +} diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index ccc5fada537..afaf2e7e841 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -1,9 +1,6 @@ #pragma once -#include -#include #include -#include #include #include #include @@ -17,135 +14,74 @@ namespace zkutil * * But then we decided to get rid of leader election, so every replica can become leader. * For now, every replica can become leader if there is no leader among replicas with old version. - * - * It's tempting to remove this class at all, but we have to maintain it, - * to maintain compatibility when replicas with different versions work on the same cluster - * (this is allowed for short time period during cluster update). - * - * Replicas with new versions creates ephemeral sequential nodes with values like "replica_name (multiple leaders Ok)". - * If the first node belongs to a replica with new version, then all replicas with new versions become leaders. */ -class LeaderElection + +void checkNoOldLeaders(Poco::Logger * log, ZooKeeper & zookeeper, const String path) { -public: - using LeadershipHandler = std::function; + /// Previous versions (before 21.12) used to create ephemeral sequential node path/leader_election- + /// Replica with the lexicographically smallest node name becomes leader (before 20.6) or enables multi-leader mode (since 20.6) + constexpr auto persistent_multiple_leaders = "leader_election-0"; /// Less than any sequential node + constexpr auto suffix = " (multiple leaders Ok)"; + constexpr auto persistent_identifier = "all (multiple leaders Ok)"; - /** handler is called when this instance become leader. - * - * identifier - if not empty, must uniquely (within same path) identify participant of leader election. - * It means that different participants of leader election have different identifiers - * and existence of more than one ephemeral node with same identifier indicates an error. - */ - LeaderElection( - DB::BackgroundSchedulePool & pool_, - const std::string & path_, - ZooKeeper & zookeeper_, - LeadershipHandler handler_, - const std::string & identifier_) - : pool(pool_), path(path_), zookeeper(zookeeper_), handler(handler_), identifier(identifier_ + suffix) - , log_name("LeaderElection (" + path + ")") - , log(&Poco::Logger::get(log_name)) + size_t num_tries = 1000; + while (num_tries--) { - task = pool.createTask(log_name, [this] { threadFunction(); }); - createNode(); - } - - void shutdown() - { - if (shutdown_called) + Strings potential_leaders; + Coordination::Error code = zookeeper.tryGetChildren(path, potential_leaders); + /// NOTE zookeeper_path/leader_election node must exist now, but maybe we will remove it in future versions. + if (code == Coordination::Error::ZNONODE) return; + else if (code != Coordination::Error::ZOK) + throw KeeperException(code, path); - shutdown_called = true; - task->deactivate(); - } + Coordination::Requests ops; - ~LeaderElection() - { - releaseNode(); - } - -private: - static inline constexpr auto suffix = " (multiple leaders Ok)"; - DB::BackgroundSchedulePool & pool; - DB::BackgroundSchedulePool::TaskHolder task; - std::string path; - ZooKeeper & zookeeper; - LeadershipHandler handler; - std::string identifier; - std::string log_name; - Poco::Logger * log; - - EphemeralNodeHolderPtr node; - std::string node_name; - - std::atomic shutdown_called {false}; - - void createNode() - { - shutdown_called = false; - node = EphemeralNodeHolder::createSequential(fs::path(path) / "leader_election-", zookeeper, identifier); - - std::string node_path = node->getPath(); - node_name = node_path.substr(node_path.find_last_of('/') + 1); - - task->activateAndSchedule(); - } - - void releaseNode() - { - shutdown(); - node = nullptr; - } - - void threadFunction() - { - bool success = false; - - try + if (potential_leaders.empty()) { - Strings children = zookeeper.getChildren(path); - std::sort(children.begin(), children.end()); - - auto my_node_it = std::lower_bound(children.begin(), children.end(), node_name); - if (my_node_it == children.end() || *my_node_it != node_name) - throw Poco::Exception("Assertion failed in LeaderElection"); - - String value = zookeeper.get(path + "/" + children.front()); - - if (value.ends_with(suffix)) - { - handler(); + /// Ensure that no leaders appeared and enable persistent multi-leader mode + /// May fail with ZNOTEMPTY + ops.emplace_back(makeRemoveRequest(path, 0)); + ops.emplace_back(makeCreateRequest(path, "", zkutil::CreateMode::Persistent)); + /// May fail with ZNODEEXISTS + ops.emplace_back(makeCreateRequest(fs::path(path) / persistent_multiple_leaders, persistent_identifier, zkutil::CreateMode::Persistent)); + } + else + { + if (potential_leaders.front() == persistent_multiple_leaders) return; + + /// Ensure that current leader supports multi-leader mode and make it persistent + auto current_leader = fs::path(path) / potential_leaders.front(); + Coordination::Stat leader_stat; + String identifier; + if (!zookeeper.tryGet(current_leader, identifier, &leader_stat)) + { + LOG_INFO(log, "LeaderElection: leader suddenly changed, will retry"); + continue; } - if (my_node_it == children.begin()) - throw Poco::Exception("Assertion failed in LeaderElection"); + if (!identifier.ends_with(suffix)) + throw Poco::Exception(fmt::format("Found leader replica ({}) with too old version (< 20.6). Stop it before upgrading", identifier)); - /// Watch for the node in front of us. - --my_node_it; - std::string get_path_value; - if (!zookeeper.tryGetWatch(path + "/" + *my_node_it, get_path_value, nullptr, task->getWatchCallback())) - task->schedule(); - - success = true; - } - catch (const KeeperException & e) - { - DB::tryLogCurrentException(log); - - if (e.code == Coordination::Error::ZSESSIONEXPIRED) - return; - } - catch (...) - { - DB::tryLogCurrentException(log); + /// Version does not matter, just check that it still exists. + /// May fail with ZNONODE + ops.emplace_back(makeCheckRequest(current_leader, leader_stat.version)); + /// May fail with ZNODEEXISTS + ops.emplace_back(makeCreateRequest(fs::path(path) / persistent_multiple_leaders, persistent_identifier, zkutil::CreateMode::Persistent)); } - if (!success) - task->scheduleAfter(10 * 1000); + Coordination::Responses res; + code = zookeeper.tryMulti(ops, res); + if (code == Coordination::Error::ZOK) + return; + else if (code == Coordination::Error::ZNOTEMPTY || code == Coordination::Error::ZNODEEXISTS || code == Coordination::Error::ZNONODE) + LOG_INFO(log, "LeaderElection: leader suddenly changed or new node appeared, will retry"); + else + KeeperMultiException::check(code, ops, res); } -}; -using LeaderElectionPtr = std::shared_ptr; + throw Poco::Exception("Cannot check that no old leaders exist"); +} } diff --git a/src/Storages/MergeTree/MarkRange.cpp b/src/Storages/MergeTree/MarkRange.cpp index 7f097cd7106..343c4ecaf22 100644 --- a/src/Storages/MergeTree/MarkRange.cpp +++ b/src/Storages/MergeTree/MarkRange.cpp @@ -3,6 +3,31 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +bool MarkRange::operator==(const MarkRange & rhs) const +{ + return begin == rhs.begin && end == rhs.end; +} + +bool MarkRange::operator<(const MarkRange & rhs) const +{ + /// We allow only consecutive non-intersecting ranges + /// Here we check whether a beginning of one range lies inside another range + /// (ranges are intersect) + const bool is_intersection = (begin <= rhs.begin && rhs.begin < end) || + (rhs.begin <= begin && begin < rhs.end); + + if (is_intersection) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Intersecting mark ranges are not allowed, it is a bug! First range ({}, {}), second range ({}, {})", begin, end, rhs.begin, rhs.end); + + return begin < rhs.begin && end <= rhs.begin; +} + size_t getLastMark(const MarkRanges & ranges) { size_t current_task_last_mark = 0; diff --git a/src/Storages/MergeTree/MarkRange.h b/src/Storages/MergeTree/MarkRange.h index b46913db30c..4f32be6ab14 100644 --- a/src/Storages/MergeTree/MarkRange.h +++ b/src/Storages/MergeTree/MarkRange.h @@ -2,7 +2,9 @@ #include #include +#include +#include namespace DB { @@ -18,6 +20,10 @@ struct MarkRange MarkRange() = default; MarkRange(const size_t begin_, const size_t end_) : begin{begin_}, end{end_} {} + + bool operator==(const MarkRange & rhs) const; + + bool operator<(const MarkRange & rhs) const; }; using MarkRanges = std::deque; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index a3e549ecda3..881086c024b 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -501,7 +501,14 @@ bool MergeTask::VerticalMergeStage::finalizeVerticalMergeForAllColumns() const bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() const { for (const auto & part : global_ctx->future_part->parts) - global_ctx->new_data_part->minmax_idx->merge(*part->minmax_idx); + { + /// Skip empty parts, + /// (that can be created in StorageReplicatedMergeTree::createEmptyPartInsteadOfLost()) + /// since they can incorrectly set min, + /// that will be changed after one more merge/OPTIMIZE. + if (!part->isEmpty()) + global_ctx->new_data_part->minmax_idx->merge(*part->minmax_idx); + } /// Print overall profiling info. NOTE: it may duplicates previous messages { diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h index c59e53fb20e..f4635812e08 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h @@ -34,11 +34,22 @@ struct TaskRuntimeData { TaskRuntimeData(ExecutableTaskPtr && task_, CurrentMetrics::Metric metric_) : task(std::move(task_)) - , increment(std::move(metric_)) - {} + , metric(metric_) + { + /// Increment and decrement a metric with sequentially consistent memory order + /// This is needed, because in unit test this metric is read from another thread + /// and some invariant is checked. With relaxed memory order we could read stale value + /// for this metric, that's why test can be failed. + CurrentMetrics::values[metric].fetch_add(1); + } + + ~TaskRuntimeData() + { + CurrentMetrics::values[metric].fetch_sub(1); + } ExecutableTaskPtr task; - CurrentMetrics::Increment increment; + CurrentMetrics::Metric metric; std::atomic_bool is_currently_deleting{false}; /// Actually autoreset=false is needed only for unit test /// where multiple threads could remove tasks corresponding to the same storage diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 2f46543b03c..fbc818a7de9 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -12,6 +13,8 @@ #include +#include + namespace DB { @@ -33,7 +36,8 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( UInt64 preferred_max_column_in_block_size_bytes_, const MergeTreeReaderSettings & reader_settings_, bool use_uncompressed_cache_, - const Names & virt_column_names_) + const Names & virt_column_names_, + std::optional extension_) : SourceWithProgress(transformHeader(std::move(header), prewhere_info_, storage_.getPartitionValueType(), virt_column_names_)) , storage(storage_) , metadata_snapshot(metadata_snapshot_) @@ -45,6 +49,7 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( , use_uncompressed_cache(use_uncompressed_cache_) , virt_column_names(virt_column_names_) , partition_value_type(storage.getPartitionValueType()) + , extension(extension_) { header_without_virtual_columns = getPort().getHeader(); @@ -71,6 +76,91 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( } +bool MergeTreeBaseSelectProcessor::getNewTask() +{ + /// No parallel reading feature + if (!extension.has_value()) + { + if (getNewTaskImpl()) + { + finalizeNewTask(); + return true; + } + return false; + } + return getNewTaskParallelReading(); +} + + +bool MergeTreeBaseSelectProcessor::getNewTaskParallelReading() +{ + if (getTaskFromBuffer()) + return true; + + if (no_more_tasks) + return getDelayedTasks(); + + while (true) + { + /// The end of execution. No task. + if (!getNewTaskImpl()) + { + no_more_tasks = true; + return getDelayedTasks(); + } + + splitCurrentTaskRangesAndFillBuffer(); + + if (getTaskFromBuffer()) + return true; + } +} + + +bool MergeTreeBaseSelectProcessor::getTaskFromBuffer() +{ + while (!buffered_ranges.empty()) + { + auto ranges = std::move(buffered_ranges.front()); + buffered_ranges.pop_front(); + + assert(!ranges.empty()); + + auto res = performRequestToCoordinator(ranges, /*delayed=*/false); + + if (Status::Accepted == res) + return true; + + if (Status::Cancelled == res) + break; + } + return false; +} + + +bool MergeTreeBaseSelectProcessor::getDelayedTasks() +{ + while (!delayed_tasks.empty()) + { + task = std::move(delayed_tasks.front()); + delayed_tasks.pop_front(); + + assert(!task->mark_ranges.empty()); + + auto res = performRequestToCoordinator(task->mark_ranges, /*delayed=*/true); + + if (Status::Accepted == res) + return true; + + if (Status::Cancelled == res) + break; + } + + finish(); + return false; +} + + Chunk MergeTreeBaseSelectProcessor::generate() { while (!isCancelled()) @@ -479,6 +569,163 @@ std::unique_ptr MergeTreeBaseSelectProcessor::getSi data_part, Names(complete_column_names.begin(), complete_column_names.end()), sample_block); } + +MergeTreeBaseSelectProcessor::Status MergeTreeBaseSelectProcessor::performRequestToCoordinator(MarkRanges requested_ranges, bool delayed) +{ + String partition_id = task->data_part->info.partition_id; + String part_name; + String projection_name; + + if (task->data_part->isProjectionPart()) + { + part_name = task->data_part->getParentPart()->name; + projection_name = task->data_part->name; + } + else + { + part_name = task->data_part->name; + projection_name = ""; + } + + PartBlockRange block_range + { + .begin = task->data_part->info.min_block, + .end = task->data_part->info.max_block + }; + + PartitionReadRequest request + { + .partition_id = std::move(partition_id), + .part_name = std::move(part_name), + .projection_name = std::move(projection_name), + .block_range = std::move(block_range), + .mark_ranges = std::move(requested_ranges) + }; + + /// Constistent hashing won't work with reading in order, because at the end of the execution + /// we could possibly seek back + if (!delayed && canUseConsistentHashingForParallelReading()) + { + const auto hash = request.getConsistentHash(extension->count_participating_replicas); + if (hash != extension->number_of_current_replica) + { + auto delayed_task = std::make_unique(*task); // Create a copy + delayed_task->mark_ranges = std::move(request.mark_ranges); + delayed_tasks.emplace_back(std::move(delayed_task)); + return Status::Denied; + } + } + + auto optional_response = extension.value().callback(std::move(request)); + + if (!optional_response.has_value()) + return Status::Cancelled; + + auto response = optional_response.value(); + + task->mark_ranges = std::move(response.mark_ranges); + + if (response.denied || task->mark_ranges.empty()) + return Status::Denied; + + finalizeNewTask(); + + return Status::Accepted; +} + + +size_t MergeTreeBaseSelectProcessor::estimateMaxBatchSizeForHugeRanges() +{ + /// This is an empirical number and it is so, + /// because we have an adaptive granularity by default. + const size_t average_granule_size_bytes = 8UL * 1024 * 1024 * 10; // 10 MiB + + /// We want to have one RTT per one gigabyte of data read from disk + /// this could be configurable. + const size_t max_size_for_one_request = 8UL * 1024 * 1024 * 1024; // 1 GiB + + size_t sum_average_marks_size = 0; + /// getColumnSize is not fully implemented for compact parts + if (task->data_part->getType() == IMergeTreeDataPart::Type::COMPACT) + { + sum_average_marks_size = average_granule_size_bytes; + } + else + { + for (const auto & name : extension->colums_to_read) + { + auto size = task->data_part->getColumnSize(name); + + assert(size.marks != 0); + sum_average_marks_size += size.data_uncompressed / size.marks; + } + } + + if (sum_average_marks_size == 0) + sum_average_marks_size = average_granule_size_bytes; // 10 MiB + + LOG_TEST(log, "Reading from {} part, average mark size is {}", + task->data_part->getTypeName(), sum_average_marks_size); + + return max_size_for_one_request / sum_average_marks_size; +} + +void MergeTreeBaseSelectProcessor::splitCurrentTaskRangesAndFillBuffer() +{ + const size_t max_batch_size = estimateMaxBatchSizeForHugeRanges(); + + size_t current_batch_size = 0; + buffered_ranges.emplace_back(); + + for (const auto & range : task->mark_ranges) + { + auto expand_if_needed = [&] + { + if (current_batch_size > max_batch_size) + { + buffered_ranges.emplace_back(); + current_batch_size = 0; + } + }; + + expand_if_needed(); + + if (range.end - range.begin < max_batch_size) + { + buffered_ranges.back().push_back(range); + current_batch_size += range.end - range.begin; + continue; + } + + auto current_begin = range.begin; + auto current_end = range.begin + max_batch_size; + + while (current_end < range.end) + { + auto current_range = MarkRange{current_begin, current_end}; + buffered_ranges.back().push_back(current_range); + current_batch_size += current_end - current_begin; + + current_begin = current_end; + current_end = current_end + max_batch_size; + + expand_if_needed(); + } + + if (range.end - current_begin > 0) + { + auto current_range = MarkRange{current_begin, range.end}; + buffered_ranges.back().push_back(current_range); + current_batch_size += range.end - current_begin; + + expand_if_needed(); + } + } + + if (buffered_ranges.back().empty()) + buffered_ranges.pop_back(); +} + MergeTreeBaseSelectProcessor::~MergeTreeBaseSelectProcessor() = default; } diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index d102e4f07a4..c462c34aa83 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -15,6 +16,18 @@ class UncompressedCache; class MarkCache; struct PrewhereExprInfo; + +struct ParallelReadingExtension +{ + MergeTreeReadTaskCallback callback; + size_t count_participating_replicas{0}; + size_t number_of_current_replica{0}; + /// This is needed to estimate the number of bytes + /// between a pair of marks to perform one request + /// over the network for a 1Gb of data. + Names colums_to_read; +}; + /// Base class for MergeTreeThreadSelectProcessor and MergeTreeSelectProcessor class MergeTreeBaseSelectProcessor : public SourceWithProgress { @@ -30,7 +43,8 @@ public: UInt64 preferred_max_column_in_block_size_bytes_, const MergeTreeReaderSettings & reader_settings_, bool use_uncompressed_cache_, - const Names & virt_column_names_ = {}); + const Names & virt_column_names_ = {}, + std::optional extension = {}); ~MergeTreeBaseSelectProcessor() override; @@ -43,10 +57,22 @@ public: const Block & sample_block); protected: + Chunk generate() final; - /// Creates new this->task, and initializes readers. - virtual bool getNewTask() = 0; + /// Creates new this->task and return a flag whether it was successful or not + virtual bool getNewTaskImpl() = 0; + /// Creates new readers for a task it is needed. These methods are separate, because + /// in case of parallel reading from replicas the whole task could be denied by a coodinator + /// or it could modified somehow. + virtual void finalizeNewTask() = 0; + + size_t estimateMaxBatchSizeForHugeRanges(); + + virtual bool canUseConsistentHashingForParallelReading() { return false; } + + /// Closes readers and unlock part locks + virtual void finish() = 0; virtual Chunk readFromPart(); @@ -82,14 +108,62 @@ protected: /// This header is used for chunks from readFromPart(). Block header_without_virtual_columns; - std::unique_ptr task; - std::shared_ptr owned_uncompressed_cache; std::shared_ptr owned_mark_cache; using MergeTreeReaderPtr = std::unique_ptr; MergeTreeReaderPtr reader; MergeTreeReaderPtr pre_reader; + + MergeTreeReadTaskPtr task; + + std::optional extension; + bool no_more_tasks{false}; + std::deque delayed_tasks; + std::deque buffered_ranges; + +private: + Poco::Logger * log = &Poco::Logger::get("MergeTreeBaseSelectProcessor"); + + enum class Status + { + Accepted, + Cancelled, + Denied + }; + + /// Calls getNewTaskImpl() to get new task, then performs a request to a coordinator + /// The coordinator may modify the set of ranges to read from a part or could + /// deny the whole request. In the latter case it creates new task and retries. + /// Then it calls finalizeNewTask() to create readers for a task if it is needed. + bool getNewTask(); + bool getNewTaskParallelReading(); + + /// After PK analysis the range of marks could be extremely big + /// We divide this range to a set smaller consecutive ranges + /// Then, depending on the type of reading (concurrent, in order or in reverse order) + /// we can calculate a consistent hash function with the number of buckets equal to + /// the number of replicas involved. And after that we can throw away some ranges with + /// hash not equals to the number of the current replica. + bool getTaskFromBuffer(); + + /// But we can't throw that ranges completely, because if we have different sets of parts + /// on replicas (have merged part on one, but not on another), then such a situation is possible + /// - Coordinator allows to read from a big merged part, but this part is present only on one replica. + /// And that replica calculates consistent hash and throws away some ranges + /// - Coordinator denies other replicas to read from another parts (source parts for that big one) + /// At the end, the result of the query is wrong, because we didn't read all the data. + /// So, we have to remember parts and mark ranges with hash different then current replica number. + /// An we have to ask the coordinator about its permission to read from that "delayed" parts. + /// It won't work with reading in order or reading in reverse order, because we can possibly seek back. + bool getDelayedTasks(); + + /// It will form a request a request to coordinator and + /// then reinitialize the mark ranges of this->task object + Status performRequestToCoordinator(MarkRanges requested_ranges, bool delayed); + + void splitCurrentTaskRangesAndFillBuffer(); + }; } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 88f3052e833..07d51d25700 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -128,8 +128,6 @@ MergeTreeReadTask::MergeTreeReadTask( { } -MergeTreeReadTask::~MergeTreeReadTask() = default; - MergeTreeBlockSizePredictor::MergeTreeBlockSizePredictor( const MergeTreeData::DataPartPtr & data_part_, const Names & columns, const Block & sample_block) @@ -175,8 +173,7 @@ void MergeTreeBlockSizePredictor::initialize(const Block & sample_block, const C ColumnInfo info; info.name = column_name; /// If column isn't fixed and doesn't have checksum, than take first - ColumnSize column_size = data_part->getColumnSize( - column_name, *column_with_type_and_name.type); + ColumnSize column_size = data_part->getColumnSize(column_name); info.bytes_per_row_global = column_size.data_uncompressed ? column_size.data_uncompressed / number_of_rows_in_part diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index 2dfe6fcf06d..b931a13c027 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -14,7 +14,7 @@ struct MergeTreeReadTask; struct MergeTreeBlockSizePredictor; using MergeTreeReadTaskPtr = std::unique_ptr; -using MergeTreeBlockSizePredictorPtr = std::unique_ptr; +using MergeTreeBlockSizePredictorPtr = std::shared_ptr; /** If some of the requested columns are not in the part, @@ -59,8 +59,6 @@ struct MergeTreeReadTask const Names & ordered_names_, const NameSet & column_name_set_, const NamesAndTypesList & columns_, const NamesAndTypesList & pre_columns_, const bool remove_prewhere_column_, const bool should_reorder_, MergeTreeBlockSizePredictorPtr && size_predictor_); - - virtual ~MergeTreeReadTask(); }; struct MergeTreeReadTaskColumns diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 3cf7023053f..e58472e572b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3163,7 +3163,7 @@ void MergeTreeData::addPartContributionToColumnAndSecondaryIndexSizes(const Data for (const auto & column : part->getColumns()) { ColumnSize & total_column_size = column_sizes[column.name]; - ColumnSize part_column_size = part->getColumnSize(column.name, *column.type); + ColumnSize part_column_size = part->getColumnSize(column.name); total_column_size.add(part_column_size); } @@ -3181,7 +3181,7 @@ void MergeTreeData::removePartContributionToColumnAndSecondaryIndexSizes(const D for (const auto & column : part->getColumns()) { ColumnSize & total_column_size = column_sizes[column.name]; - ColumnSize part_column_size = part->getColumnSize(column.name, *column.type); + ColumnSize part_column_size = part->getColumnSize(column.name); auto log_subtract = [&](size_t & from, size_t value, const char * field) { diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 036e7d89c5a..cdedd37e14a 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -124,7 +124,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( const UInt64 max_block_size, const unsigned num_streams, QueryProcessingStage::Enum processed_stage, - std::shared_ptr max_block_numbers_to_read) const + std::shared_ptr max_block_numbers_to_read, + bool enable_parallel_reading) const { if (query_info.merge_tree_empty_result) return std::make_unique(); @@ -142,7 +143,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( max_block_size, num_streams, max_block_numbers_to_read, - query_info.merge_tree_select_result_ptr); + query_info.merge_tree_select_result_ptr, + enable_parallel_reading); if (plan->isInitialized() && settings.allow_experimental_projection_optimization && settings.force_optimize_projection && !metadata_snapshot->projections.empty()) @@ -184,7 +186,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( max_block_size, num_streams, max_block_numbers_to_read, - query_info.projection->merge_tree_projection_select_result_ptr); + query_info.projection->merge_tree_projection_select_result_ptr, + enable_parallel_reading); } if (projection_plan->isInitialized()) @@ -1210,7 +1213,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( const UInt64 max_block_size, const unsigned num_streams, std::shared_ptr max_block_numbers_to_read, - MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr) const + MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr, + bool enable_parallel_reading) const { /// If merge_tree_select_result_ptr != nullptr, we use analyzed result so parts will always be empty. if (merge_tree_select_result_ptr) @@ -1243,7 +1247,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( sample_factor_column_queried, max_block_numbers_to_read, log, - merge_tree_select_result_ptr + merge_tree_select_result_ptr, + enable_parallel_reading ); QueryPlanPtr plan = std::make_unique(); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index f19d145fc93..3dde324ce22 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -34,7 +34,8 @@ public: UInt64 max_block_size, unsigned num_streams, QueryProcessingStage::Enum processed_stage, - std::shared_ptr max_block_numbers_to_read = nullptr) const; + std::shared_ptr max_block_numbers_to_read = nullptr, + bool enable_parallel_reading = false) const; /// The same as read, but with specified set of parts. QueryPlanPtr readFromParts( @@ -47,7 +48,8 @@ public: UInt64 max_block_size, unsigned num_streams, std::shared_ptr max_block_numbers_to_read = nullptr, - MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr = nullptr) const; + MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr = nullptr, + bool enable_parallel_reading = false) const; /// Get an estimation for the number of marks we are going to read. /// Reads nothing. Secondary indexes are not used. diff --git a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp index 48a9d62d872..961106af51b 100644 --- a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp @@ -8,14 +8,11 @@ namespace ErrorCodes extern const int MEMORY_LIMIT_EXCEEDED; } -bool MergeTreeInOrderSelectProcessor::getNewTask() +bool MergeTreeInOrderSelectProcessor::getNewTaskImpl() try { if (all_mark_ranges.empty()) - { - finish(); return false; - } if (!reader) initializeReaders(); diff --git a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h index ecf648b0291..467292d88bb 100644 --- a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h @@ -12,7 +12,7 @@ class MergeTreeInOrderSelectProcessor final : public MergeTreeSelectProcessor { public: template - MergeTreeInOrderSelectProcessor(Args &&... args) + explicit MergeTreeInOrderSelectProcessor(Args &&... args) : MergeTreeSelectProcessor{std::forward(args)...} { LOG_DEBUG(log, "Reading {} ranges in order from part {}, approx. {} rows starting from {}", @@ -23,7 +23,8 @@ public: String getName() const override { return "MergeTreeInOrder"; } private: - bool getNewTask() override; + bool getNewTaskImpl() override; + void finalizeNewTask() override {} Poco::Logger * log = &Poco::Logger::get("MergeTreeInOrderSelectProcessor"); }; diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 124f13b14a8..8481cee0f86 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -14,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } @@ -185,7 +186,7 @@ MergeTreeRangeReader::Stream::Stream( void MergeTreeRangeReader::Stream::checkNotFinished() const { if (isFinished()) - throw Exception("Cannot read out of marks range.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Cannot read out of marks range.", ErrorCodes::BAD_ARGUMENTS); } void MergeTreeRangeReader::Stream::checkEnoughSpaceInCurrentGranule(size_t num_rows) const @@ -290,7 +291,7 @@ void MergeTreeRangeReader::ReadResult::adjustLastGranule() size_t num_rows_to_subtract = total_rows_per_granule - num_read_rows; if (rows_per_granule.empty()) - throw Exception("Can't adjust last granule because no granules were added.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Can't adjust last granule because no granules were added", ErrorCodes::LOGICAL_ERROR); if (num_rows_to_subtract > rows_per_granule.back()) throw Exception(ErrorCodes::LOGICAL_ERROR, diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index 16ce9823ebb..6c4059d64d0 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -8,14 +8,11 @@ namespace ErrorCodes extern const int MEMORY_LIMIT_EXCEEDED; } -bool MergeTreeReverseSelectProcessor::getNewTask() +bool MergeTreeReverseSelectProcessor::getNewTaskImpl() try { if (chunks.empty() && all_mark_ranges.empty()) - { - finish(); return false; - } /// We have some blocks to return in buffer. /// Return true to continue reading, but actually don't create a task. diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h index 18ab51c03a0..395f5d5cd2a 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h @@ -13,7 +13,7 @@ class MergeTreeReverseSelectProcessor final : public MergeTreeSelectProcessor { public: template - MergeTreeReverseSelectProcessor(Args &&... args) + explicit MergeTreeReverseSelectProcessor(Args &&... args) : MergeTreeSelectProcessor{std::forward(args)...} { LOG_DEBUG(log, "Reading {} ranges in reverse order from part {}, approx. {} rows starting from {}", @@ -24,7 +24,9 @@ public: String getName() const override { return "MergeTreeReverse"; } private: - bool getNewTask() override; + bool getNewTaskImpl() override; + void finalizeNewTask() override {} + Chunk readFromPart() override; Chunks chunks; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 203ce7a57d2..2d4d3617cee 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -22,12 +22,13 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( const MergeTreeReaderSettings & reader_settings_, const Names & virt_column_names_, size_t part_index_in_query_, - bool has_limit_below_one_block_) + bool has_limit_below_one_block_, + std::optional extension_) : MergeTreeBaseSelectProcessor{ metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals(), storage_.getStorageID()), storage_, metadata_snapshot_, prewhere_info_, std::move(actions_settings), max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, - reader_settings_, use_uncompressed_cache_, virt_column_names_}, + reader_settings_, use_uncompressed_cache_, virt_column_names_, extension_}, required_columns{std::move(required_columns_)}, data_part{owned_data_part_}, sample_block(metadata_snapshot_->getSampleBlock()), @@ -36,7 +37,11 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( has_limit_below_one_block(has_limit_below_one_block_), total_rows(data_part->index_granularity.getRowsCountInRanges(all_mark_ranges)) { - addTotalRowsApprox(total_rows); + /// Actually it means that parallel reading from replicas enabled + /// and we have to collaborate with initiator. + /// In this case we won't set approximate rows, because it will be accounted multiple times + if (!extension_.has_value()) + addTotalRowsApprox(total_rows); ordered_names = header_without_virtual_columns.getNames(); } @@ -64,6 +69,7 @@ void MergeTreeSelectProcessor::initializeReaders() } + void MergeTreeSelectProcessor::finish() { /** Close the files (before destroying the object). diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index f9b19f9f692..2ecdc3b59a8 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -31,17 +31,16 @@ public: const MergeTreeReaderSettings & reader_settings, const Names & virt_column_names = {}, size_t part_index_in_query_ = 0, - bool has_limit_below_one_block_ = false); + bool has_limit_below_one_block_ = false, + std::optional extension_ = {}); ~MergeTreeSelectProcessor() override; - /// Closes readers and unlock part locks - void finish(); - protected: /// Defer initialization from constructor, because it may be heavy - /// and it's better to do it lazily in `getNewTask`, which is executing in parallel. + /// and it's better to do it lazily in `getNewTaskImpl`, which is executing in parallel. void initializeReaders(); + void finish() override final; /// Used by Task Names required_columns; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index df8d6a7c127..687458ee681 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -36,6 +36,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( data_part->getMarksCount(), data_part->name, data_part->rows_count); } + /// Note, that we don't check setting collaborate_with_coordinator presence, because this source + /// is only used in background merges. addTotalRowsApprox(data_part->rows_count); /// Add columns because we don't want to read empty blocks diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp index 6a8ef860c87..6a44da06f1f 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp @@ -7,6 +7,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} MergeTreeThreadSelectProcessor::MergeTreeThreadSelectProcessor( const size_t thread_, @@ -21,12 +25,13 @@ MergeTreeThreadSelectProcessor::MergeTreeThreadSelectProcessor( const PrewhereInfoPtr & prewhere_info_, ExpressionActionsSettings actions_settings, const MergeTreeReaderSettings & reader_settings_, - const Names & virt_column_names_) + const Names & virt_column_names_, + std::optional extension_) : MergeTreeBaseSelectProcessor{ pool_->getHeader(), storage_, metadata_snapshot_, prewhere_info_, std::move(actions_settings), max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, - reader_settings_, use_uncompressed_cache_, virt_column_names_}, + reader_settings_, use_uncompressed_cache_, virt_column_names_, extension_}, thread{thread_}, pool{pool_} { @@ -39,28 +44,61 @@ MergeTreeThreadSelectProcessor::MergeTreeThreadSelectProcessor( min_marks_to_read = (min_marks_to_read_ * fixed_index_granularity + max_block_size_rows - 1) / max_block_size_rows * max_block_size_rows / fixed_index_granularity; } + else if (extension.has_value()) + { + /// Parallel reading from replicas is enabled. + /// We try to estimate the average number of bytes in a granule + /// to make one request over the network per one gigabyte of data + /// Actually we will ask MergeTreeReadPool to provide us heavier tasks to read + /// because the most part of each task will be postponed + /// (due to using consistent hash for better cache affinity) + const size_t amount_of_read_bytes_per_one_request = 1024 * 1024 * 1024; // 1GiB + /// In case of reading from compact parts (for which we can't estimate the average size of marks) + /// we will use this value + const size_t empirical_size_of_mark = 1024 * 1024 * 10; // 10 MiB + + if (extension->colums_to_read.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "A set of column to read is empty. It is a bug"); + + size_t sum_average_marks_size = 0; + auto column_sizes = storage.getColumnSizes(); + for (const auto & name : extension->colums_to_read) + { + auto it = column_sizes.find(name); + if (it == column_sizes.end()) + continue; + auto size = it->second; + + if (size.data_compressed == 0 || size.data_uncompressed == 0 || size.marks == 0) + continue; + + sum_average_marks_size += size.data_uncompressed / size.marks; + } + + if (sum_average_marks_size == 0) + sum_average_marks_size = empirical_size_of_mark * extension->colums_to_read.size(); + + min_marks_to_read = extension->count_participating_replicas * amount_of_read_bytes_per_one_request / sum_average_marks_size; + } else + { min_marks_to_read = min_marks_to_read_; + } + ordered_names = getPort().getHeader().getNames(); } /// Requests read task from MergeTreeReadPool and signals whether it got one -bool MergeTreeThreadSelectProcessor::getNewTask() +bool MergeTreeThreadSelectProcessor::getNewTaskImpl() { task = pool->getTask(min_marks_to_read, thread, ordered_names); + return static_cast(task); +} - if (!task) - { - /** Close the files (before destroying the object). - * When many sources are created, but simultaneously reading only a few of them, - * buffers don't waste memory. - */ - reader.reset(); - pre_reader.reset(); - return false; - } +void MergeTreeThreadSelectProcessor::finalizeNewTask() +{ const std::string part_name = task->data_part->isProjectionPart() ? task->data_part->getParentPart()->name : task->data_part->name; /// Allows pool to reduce number of threads in case of too slow reads. @@ -99,8 +137,13 @@ bool MergeTreeThreadSelectProcessor::getNewTask() } last_readed_part_name = part_name; +} - return true; + +void MergeTreeThreadSelectProcessor::finish() +{ + reader.reset(); + pre_reader.reset(); } diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h index d17b15c3635..110c4fa34e6 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h @@ -11,7 +11,7 @@ class MergeTreeReadPool; /** Used in conjunction with MergeTreeReadPool, asking it for more work to do and performing whatever reads it is asked * to perform. */ -class MergeTreeThreadSelectProcessor : public MergeTreeBaseSelectProcessor +class MergeTreeThreadSelectProcessor final : public MergeTreeBaseSelectProcessor { public: MergeTreeThreadSelectProcessor( @@ -27,8 +27,8 @@ public: const PrewhereInfoPtr & prewhere_info_, ExpressionActionsSettings actions_settings, const MergeTreeReaderSettings & reader_settings_, - - const Names & virt_column_names_); + const Names & virt_column_names_, + std::optional extension_); String getName() const override { return "MergeTreeThread"; } @@ -36,7 +36,13 @@ public: protected: /// Requests read task from MergeTreeReadPool and signals whether it got one - bool getNewTask() override; + bool getNewTaskImpl() override; + + void finalizeNewTask() override; + + void finish() override; + + bool canUseConsistentHashingForParallelReading() override { return true; } private: /// "thread" index (there are N threads and each thread is assigned index in interval [0..N-1]) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp new file mode 100644 index 00000000000..80f438a46db --- /dev/null +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -0,0 +1,143 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include "IO/WriteBufferFromString.h" +#include +#include + +namespace DB +{ + +class ParallelReplicasReadingCoordinator::Impl +{ +public: + using PartitionReadRequestPtr = std::unique_ptr; + using PartToMarkRanges = std::map; + + struct PartitionReading + { + PartSegments part_ranges; + PartToMarkRanges mark_ranges_in_part; + }; + + using PartitionToBlockRanges = std::map; + PartitionToBlockRanges partitions; + + std::mutex mutex; + + PartitionReadResponse handleRequest(PartitionReadRequest request); +}; + + +PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(PartitionReadRequest request) +{ + AtomicStopwatch watch; + std::lock_guard lock(mutex); + + auto partition_it = partitions.find(request.partition_id); + + SCOPE_EXIT({ + LOG_TRACE(&Poco::Logger::get("ParallelReplicasReadingCoordinator"), "Time for handling request: {}ns", watch.elapsed()); + }); + + PartToRead::PartAndProjectionNames part_and_projection + { + .part = request.part_name, + .projection = request.projection_name + }; + + /// We are the first who wants to process parts in partition + if (partition_it == partitions.end()) + { + PartitionReading partition_reading; + + PartToRead part_to_read; + part_to_read.range = request.block_range; + part_to_read.name = part_and_projection; + + partition_reading.part_ranges.addPart(std::move(part_to_read)); + + /// As this query is first in partition, we will accept all ranges from it. + /// We need just to update our state. + auto request_ranges = HalfIntervals::initializeFromMarkRanges(request.mark_ranges); + auto mark_ranges_index = HalfIntervals::initializeWithEntireSpace(); + mark_ranges_index.intersect(request_ranges.negate()); + + partition_reading.mark_ranges_in_part.insert({part_and_projection, std::move(mark_ranges_index)}); + partitions.insert({request.partition_id, std::move(partition_reading)}); + + return {.denied = false, .mark_ranges = std::move(request.mark_ranges)}; + } + + auto & partition_reading = partition_it->second; + + PartToRead part_to_read; + part_to_read.range = request.block_range; + part_to_read.name = part_and_projection; + + auto part_intersection_res = partition_reading.part_ranges.getIntersectionResult(part_to_read); + + switch (part_intersection_res) + { + case PartSegments::IntersectionResult::REJECT: + { + return {.denied = true, .mark_ranges = {}}; + } + case PartSegments::IntersectionResult::EXACTLY_ONE_INTERSECTION: + { + auto marks_it = partition_reading.mark_ranges_in_part.find(part_and_projection); + + auto & intervals_to_do = marks_it->second; + auto result = HalfIntervals::initializeFromMarkRanges(request.mark_ranges); + result.intersect(intervals_to_do); + + /// Update intervals_to_do + intervals_to_do.intersect(HalfIntervals::initializeFromMarkRanges(std::move(request.mark_ranges)).negate()); + + auto result_ranges = result.convertToMarkRangesFinal(); + const bool denied = result_ranges.empty(); + return {.denied = denied, .mark_ranges = std::move(result_ranges)}; + } + case PartSegments::IntersectionResult::NO_INTERSECTION: + { + partition_reading.part_ranges.addPart(std::move(part_to_read)); + + auto mark_ranges_index = HalfIntervals::initializeWithEntireSpace().intersect( + HalfIntervals::initializeFromMarkRanges(request.mark_ranges).negate() + ); + partition_reading.mark_ranges_in_part.insert({part_and_projection, std::move(mark_ranges_index)}); + + return {.denied = false, .mark_ranges = std::move(request.mark_ranges)}; + } + } + + __builtin_unreachable(); +} + +PartitionReadResponse ParallelReplicasReadingCoordinator::handleRequest(PartitionReadRequest request) +{ + return pimpl->handleRequest(std::move(request)); +} + +ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator() +{ + pimpl = std::make_unique(); +} + +ParallelReplicasReadingCoordinator::~ParallelReplicasReadingCoordinator() = default; + +} diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h new file mode 100644 index 00000000000..af74e0fae49 --- /dev/null +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParallelReplicasReadingCoordinator +{ +public: + ParallelReplicasReadingCoordinator(); + ~ParallelReplicasReadingCoordinator(); + PartitionReadResponse handleRequest(PartitionReadRequest request); +private: + class Impl; + std::unique_ptr pimpl; +}; + +} diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index b3a17250549..4d18adc1dfc 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 5731092f2a8..ff37a341205 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -2,9 +2,9 @@ #include #include #include +#include #include -#include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 3bb592dcdcb..0cc6955ff72 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -197,11 +197,6 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() updateQuorumIfWeHavePart(); - if (storage_settings->replicated_can_become_leader) - storage.enterLeaderElection(); - else - LOG_INFO(log, "Will not enter leader election because replicated_can_become_leader=0"); - /// Anything above can throw a KeeperException if something is wrong with ZK. /// Anything below should not throw exceptions. @@ -380,8 +375,6 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown() LOG_TRACE(log, "Waiting for threads to finish"); - storage.exitLeaderElection(); - storage.queue_updating_task->deactivate(); storage.mutations_updating_task->deactivate(); storage.mutations_finalizing_task->deactivate(); diff --git a/src/Storages/MergeTree/RequestResponse.cpp b/src/Storages/MergeTree/RequestResponse.cpp new file mode 100644 index 00000000000..a266540b99a --- /dev/null +++ b/src/Storages/MergeTree/RequestResponse.cpp @@ -0,0 +1,141 @@ +#include + +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_PROTOCOL; +} + +static void readMarkRangesBinary(MarkRanges & ranges, ReadBuffer & buf, size_t MAX_RANGES_SIZE = DEFAULT_MAX_STRING_SIZE) +{ + size_t size = 0; + readVarUInt(size, buf); + + if (size > MAX_RANGES_SIZE) + throw Poco::Exception("Too large ranges size."); + + ranges.resize(size); + for (size_t i = 0; i < size; ++i) + { + readBinary(ranges[i].begin, buf); + readBinary(ranges[i].end, buf); + } +} + + +static void writeMarkRangesBinary(const MarkRanges & ranges, WriteBuffer & buf) +{ + writeVarUInt(ranges.size(), buf); + + for (const auto & [begin, end] : ranges) + { + writeBinary(begin, buf); + writeBinary(end, buf); + } +} + + +void PartitionReadRequest::serialize(WriteBuffer & out) const +{ + /// Must be the first + writeVarUInt(DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION, out); + + writeStringBinary(partition_id, out); + writeStringBinary(part_name, out); + writeStringBinary(projection_name, out); + + writeVarInt(block_range.begin, out); + writeVarInt(block_range.end, out); + + writeMarkRangesBinary(mark_ranges, out); +} + + +void PartitionReadRequest::describe(WriteBuffer & out) const +{ + String result; + result += fmt::format("partition_id: {} \n", partition_id); + result += fmt::format("part_name: {} \n", part_name); + result += fmt::format("projection_name: {} \n", projection_name); + result += fmt::format("block_range: ({}, {}) \n", block_range.begin, block_range.end); + result += "mark_ranges: "; + for (const auto & range : mark_ranges) + result += fmt::format("({}, {}) ", range.begin, range.end); + result += '\n'; + out.write(result.c_str(), result.size()); +} + +void PartitionReadRequest::deserialize(ReadBuffer & in) +{ + UInt64 version; + readVarUInt(version, in); + if (version != DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION) + throw Exception(ErrorCodes::UNKNOWN_PROTOCOL, "Protocol versions for parallel reading \ + from replicas differ. Got: {}, supported version: {}", + version, DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION); + + readStringBinary(partition_id, in); + readStringBinary(part_name, in); + readStringBinary(projection_name, in); + + readVarInt(block_range.begin, in); + readVarInt(block_range.end, in); + + readMarkRangesBinary(mark_ranges, in); +} + +UInt64 PartitionReadRequest::getConsistentHash(size_t buckets) const +{ + auto hash = SipHash(); + hash.update(partition_id); + hash.update(part_name); + hash.update(projection_name); + + hash.update(block_range.begin); + hash.update(block_range.end); + + for (const auto & range : mark_ranges) + { + hash.update(range.begin); + hash.update(range.end); + } + + return ConsistentHashing(hash.get64(), buckets); +} + + +void PartitionReadResponse::serialize(WriteBuffer & out) const +{ + /// Must be the first + writeVarUInt(DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION, out); + + writeVarUInt(static_cast(denied), out); + writeMarkRangesBinary(mark_ranges, out); +} + + +void PartitionReadResponse::deserialize(ReadBuffer & in) +{ + UInt64 version; + readVarUInt(version, in); + if (version != DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION) + throw Exception(ErrorCodes::UNKNOWN_PROTOCOL, "Protocol versions for parallel reading \ + from replicas differ. Got: {}, supported version: {}", + version, DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION); + + UInt64 value; + readVarUInt(value, in); + denied = static_cast(value); + readMarkRangesBinary(mark_ranges, in); +} + +} diff --git a/src/Storages/MergeTree/RequestResponse.h b/src/Storages/MergeTree/RequestResponse.h new file mode 100644 index 00000000000..85c8f7181af --- /dev/null +++ b/src/Storages/MergeTree/RequestResponse.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include + +#include + +#include +#include + +#include + + +namespace DB +{ + +/// Represents a segment [left; right] +struct PartBlockRange +{ + Int64 begin; + Int64 end; + + bool operator==(const PartBlockRange & rhs) const + { + return begin == rhs.begin && end == rhs.end; + } +}; + +struct PartitionReadRequest +{ + String partition_id; + String part_name; + String projection_name; + PartBlockRange block_range; + MarkRanges mark_ranges; + + void serialize(WriteBuffer & out) const; + void describe(WriteBuffer & out) const; + void deserialize(ReadBuffer & in); + + UInt64 getConsistentHash(size_t buckets) const; +}; + +struct PartitionReadResponse +{ + bool denied{false}; + MarkRanges mark_ranges{}; + + void serialize(WriteBuffer & out) const; + void deserialize(ReadBuffer & in); +}; + + +using MergeTreeReadTaskCallback = std::function(PartitionReadRequest)>; + + +} diff --git a/src/Storages/MergeTree/tests/gtest_coordinator.cpp b/src/Storages/MergeTree/tests/gtest_coordinator.cpp new file mode 100644 index 00000000000..7bcf3304c2b --- /dev/null +++ b/src/Storages/MergeTree/tests/gtest_coordinator.cpp @@ -0,0 +1,240 @@ +#include + +#include +#include +#include + +#include + +#include + +using namespace DB; + + +TEST(HalfIntervals, Simple) +{ + ASSERT_TRUE(( + HalfIntervals{{{1, 2}, {3, 4}}}.negate() == + HalfIntervals{{{0, 1}, {2, 3}, {4, 18446744073709551615UL}}} + )); + + { + auto left = HalfIntervals{{{0, 2}, {4, 6}}}.negate(); + ASSERT_TRUE(( + left == + HalfIntervals{{{2, 4}, {6, 18446744073709551615UL}}} + )); + } + + { + auto left = HalfIntervals{{{0, 2}, {4, 6}}}; + auto right = HalfIntervals{{{1, 5}}}.negate(); + auto intersection = left.intersect(right); + + ASSERT_TRUE(( + intersection == + HalfIntervals{{{0, 1}, {5, 6}}} + )); + } + + { + auto left = HalfIntervals{{{1, 2}, {2, 3}}}; + auto right = HalfIntervals::initializeWithEntireSpace(); + auto intersection = right.intersect(left.negate()); + + ASSERT_TRUE(( + intersection == + HalfIntervals{{{0, 1}, {3, 18446744073709551615UL}}} + )); + } + + { + auto left = HalfIntervals{{{1, 2}, {2, 3}, {3, 4}, {4, 5}}}; + + ASSERT_EQ(getIntersection(left, HalfIntervals{{{1, 4}}}).convertToMarkRangesFinal().size(), 3); + ASSERT_EQ(getIntersection(left, HalfIntervals{{{1, 5}}}).convertToMarkRangesFinal().size(), 4); + } + + { + auto left = HalfIntervals{{{1, 3}, {3, 5}, {5, 7}}}; + + ASSERT_EQ(getIntersection(left, HalfIntervals{{{3, 5}}}).convertToMarkRangesFinal().size(), 1); + ASSERT_EQ(getIntersection(left, HalfIntervals{{{3, 7}}}).convertToMarkRangesFinal().size(), 2); + ASSERT_EQ(getIntersection(left, HalfIntervals{{{4, 6}}}).convertToMarkRangesFinal().size(), 2); + ASSERT_EQ(getIntersection(left, HalfIntervals{{{1, 7}}}).convertToMarkRangesFinal().size(), 3); + } + + { + auto left = HalfIntervals{{{1, 3}}}; + + ASSERT_EQ(getIntersection(left, HalfIntervals{{{3, 4}}}).convertToMarkRangesFinal().size(), 0); + } + + { + auto left = HalfIntervals{{{1, 2}, {3, 4}, {5, 6}}}; + + ASSERT_EQ(getIntersection(left, HalfIntervals{{{2, 3}}}).convertToMarkRangesFinal().size(), 0); + ASSERT_EQ(getIntersection(left, HalfIntervals{{{4, 5}}}).convertToMarkRangesFinal().size(), 0); + ASSERT_EQ(getIntersection(left, HalfIntervals{{{1, 6}}}).convertToMarkRangesFinal().size(), 3); + } +} + +TEST(HalfIntervals, TwoRequests) +{ + auto left = HalfIntervals{{{1, 2}, {2, 3}}}; + auto right = HalfIntervals{{{2, 3}, {3, 4}}}; + auto intersection = left.intersect(right); + + ASSERT_TRUE(( + intersection == + HalfIntervals{{{2, 3}}} + )); + + /// With negation + left = HalfIntervals{{{1, 2}, {2, 3}}}.negate(); + right = HalfIntervals{{{2, 3}, {3, 4}}}; + intersection = left.intersect(right); + + + ASSERT_TRUE(( + intersection == + HalfIntervals{{{3, 4}}} + )); +} + +TEST(HalfIntervals, SelfIntersection) +{ + auto left = HalfIntervals{{{1, 2}, {2, 3}, {4, 5}}}; + auto right = left; + auto intersection = left.intersect(right); + + ASSERT_TRUE(( + intersection == right + )); + + left = HalfIntervals{{{1, 2}, {2, 3}, {4, 5}}}; + right = left; + right.negate(); + intersection = left.intersect(right); + + ASSERT_TRUE(( + intersection == HalfIntervals{} + )); +} + + +TEST(Coordinator, Simple) +{ + PartitionReadRequest request; + request.partition_id = "a"; + request.part_name = "b"; + request.projection_name = "c"; + request.block_range = PartBlockRange{1, 2}; + request.mark_ranges = MarkRanges{{1, 2}, {3, 4}}; + + ParallelReplicasReadingCoordinator coordinator; + auto response = coordinator.handleRequest(request); + + ASSERT_FALSE(response.denied) << "Process request at first has to be accepted"; + + ASSERT_EQ(response.mark_ranges.size(), request.mark_ranges.size()); + + for (int i = 0; i < response.mark_ranges.size(); ++i) + EXPECT_EQ(response.mark_ranges[i], request.mark_ranges[i]); + + response = coordinator.handleRequest(request); + ASSERT_TRUE(response.denied) << "Process the same request second time"; +} + + +TEST(Coordinator, TwoRequests) +{ + PartitionReadRequest first; + first.partition_id = "a"; + first.part_name = "b"; + first.projection_name = "c"; + first.block_range = PartBlockRange{0, 0}; + first.mark_ranges = MarkRanges{{1, 2}, {2, 3}}; + + auto second = first; + second.mark_ranges = MarkRanges{{2, 3}, {3, 4}}; + + ParallelReplicasReadingCoordinator coordinator; + auto response = coordinator.handleRequest(first); + + ASSERT_FALSE(response.denied) << "First request must me accepted"; + + ASSERT_EQ(response.mark_ranges.size(), first.mark_ranges.size()); + for (int i = 0; i < response.mark_ranges.size(); ++i) + EXPECT_EQ(response.mark_ranges[i], first.mark_ranges[i]); + + response = coordinator.handleRequest(second); + ASSERT_FALSE(response.denied); + ASSERT_EQ(response.mark_ranges.size(), 1); + ASSERT_EQ(response.mark_ranges.front(), (MarkRange{3, 4})); +} + + +TEST(Coordinator, PartIntersections) +{ + { + PartSegments boundaries; + + boundaries.addPart(PartToRead{{1, 1}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{2, 2}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{3, 3}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{4, 4}, {"TestPart", "TestProjection"}}); + + ASSERT_EQ(boundaries.getIntersectionResult({{1, 4}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{0, 5}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 1}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::EXACTLY_ONE_INTERSECTION); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 1}, {"ClickHouse", "AnotherProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 2}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + + boundaries.addPart(PartToRead{{5, 5}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{0, 0}, {"TestPart", "TestProjection"}}); + + ASSERT_EQ(boundaries.getIntersectionResult({{0, 5}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 1}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::EXACTLY_ONE_INTERSECTION); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 1}, {"ClickHouse", "AnotherProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 2}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{0, 3}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + } + + { + PartSegments boundaries; + boundaries.addPart(PartToRead{{1, 3}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{4, 5}, {"TestPart", "TestProjection"}}); + + ASSERT_EQ(boundaries.getIntersectionResult({{2, 4}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{0, 6}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + } + + { + PartSegments boundaries; + boundaries.addPart(PartToRead{{1, 3}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{4, 6}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{7, 9}, {"TestPart", "TestProjection"}}); + + ASSERT_EQ(boundaries.getIntersectionResult({{2, 8}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{4, 6}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::EXACTLY_ONE_INTERSECTION); + ASSERT_EQ(boundaries.getIntersectionResult({{3, 7}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{5, 7}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + } + + { + PartSegments boundaries; + + ASSERT_EQ(boundaries.getIntersectionResult({{1, 1}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::NO_INTERSECTION); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 3}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::NO_INTERSECTION); + ASSERT_EQ(boundaries.getIntersectionResult({{0, 100500}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::NO_INTERSECTION); + + boundaries.addPart(PartToRead{{1, 1}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{2, 2}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{3, 3}, {"TestPart", "TestProjection"}}); + + ASSERT_EQ(boundaries.getIntersectionResult({{1, 1}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::EXACTLY_ONE_INTERSECTION); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 3}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{100, 100500}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::NO_INTERSECTION); + } +} diff --git a/src/Storages/MergeTree/tests/gtest_executor.cpp b/src/Storages/MergeTree/tests/gtest_executor.cpp index a208e7dc233..d2895215ebe 100644 --- a/src/Storages/MergeTree/tests/gtest_executor.cpp +++ b/src/Storages/MergeTree/tests/gtest_executor.cpp @@ -147,7 +147,7 @@ TEST(Executor, RemoveTasksStress) for (size_t j = 0; j < tasks_kinds; ++j) executor->removeTasksCorrespondingToStorage({"test", std::to_string(j)}); - ASSERT_EQ(CurrentMetrics::values[CurrentMetrics::BackgroundMergesAndMutationsPoolTask], 0); - executor->wait(); + + ASSERT_EQ(CurrentMetrics::values[CurrentMetrics::BackgroundMergesAndMutationsPoolTask], 0); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index ac60d748e36..c8f199d098e 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -20,7 +20,6 @@ namespace ErrorCodes } ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( - ChannelPtr consumer_channel_, RabbitMQHandler & event_handler_, std::vector & queues_, size_t channel_id_base_, @@ -30,7 +29,6 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( uint32_t queue_size_, const std::atomic & stopped_) : ReadBuffer(nullptr, 0) - , consumer_channel(std::move(consumer_channel_)) , event_handler(event_handler_) , queues(queues_) , channel_base(channel_base_) @@ -129,9 +127,6 @@ void ReadBufferFromRabbitMQConsumer::setupChannel() if (!consumer_channel) return; - /// We mark initialized only once. - initialized = true; - wait_subscription.store(true); consumer_channel->onReady([&]() diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 55d129856b8..8a527011a3c 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -20,7 +20,6 @@ class ReadBufferFromRabbitMQConsumer : public ReadBuffer public: ReadBufferFromRabbitMQConsumer( - ChannelPtr consumer_channel_, RabbitMQHandler & event_handler_, std::vector & queues_, size_t channel_id_base_, @@ -37,7 +36,7 @@ public: UInt64 delivery_tag; String channel_id; - AckTracker() : delivery_tag(0), channel_id("") {} + AckTracker() = default; AckTracker(UInt64 tag, String id) : delivery_tag(tag), channel_id(id) {} }; @@ -75,12 +74,6 @@ public: auto getMessageID() const { return current.message_id; } auto getTimestamp() const { return current.timestamp; } - void initialize() - { - if (!initialized) - setupChannel(); - } - private: bool nextImpl() override; @@ -105,9 +98,6 @@ private: AckTracker last_inserted_record_info; UInt64 prev_tag = 0, channel_id_counter = 0; - - /// Has initial setup after constructor been made? - bool initialized = false; }; } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 66772e7015b..ac299657ae6 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -577,7 +577,7 @@ bool StorageRabbitMQ::updateChannel(ChannelPtr & channel) try { channel = connection->createChannel(); - return channel->usable(); + return true; } catch (...) { @@ -587,6 +587,21 @@ bool StorageRabbitMQ::updateChannel(ChannelPtr & channel) } +void StorageRabbitMQ::prepareChannelForBuffer(ConsumerBufferPtr buffer) +{ + if (!buffer) + return; + + if (buffer->queuesCount() != queues.size()) + buffer->updateQueues(queues); + + buffer->updateAckTracker(); + + if (updateChannel(buffer->getChannel())) + buffer->setupChannel(); +} + + void StorageRabbitMQ::unbindExchange() { /* This is needed because with RabbitMQ (without special adjustments) can't, for example, properly make mv if there was insert query @@ -715,9 +730,9 @@ void StorageRabbitMQ::startup() } catch (...) { - tryLogCurrentException(log); if (!is_attach) throw; + tryLogCurrentException(log); } } else @@ -731,15 +746,14 @@ void StorageRabbitMQ::startup() try { auto buffer = createReadBuffer(); - if (rabbit_is_ready) - buffer->initialize(); pushReadBuffer(std::move(buffer)); ++num_created_consumers; } - catch (const AMQP::Exception & e) + catch (...) { - LOG_ERROR(log, "Got AMQ exception {}", e.what()); - throw; + if (!is_attach) + throw; + tryLogCurrentException(log); } } @@ -871,9 +885,8 @@ ConsumerBufferPtr StorageRabbitMQ::popReadBuffer(std::chrono::milliseconds timeo ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() { - ChannelPtr consumer_channel = connection->createChannel(); return std::make_shared( - std::move(consumer_channel), connection->getHandler(), queues, ++consumer_id, + connection->getHandler(), queues, ++consumer_id, unique_strbase, log, row_delimiter, queue_size, shutdown_called); } @@ -921,7 +934,7 @@ void StorageRabbitMQ::initializeBuffers() if (!initialized) { for (const auto & buffer : buffers) - buffer->initialize(); + prepareChannelForBuffer(buffer); initialized = true; } } @@ -1086,19 +1099,7 @@ bool StorageRabbitMQ::streamToViews() if (source->needChannelUpdate()) { auto buffer = source->getBuffer(); - if (buffer) - { - if (buffer->queuesCount() != queues.size()) - buffer->updateQueues(queues); - - buffer->updateAckTracker(); - - if (updateChannel(buffer->getChannel())) - { - LOG_TRACE(log, "Connection is active, but channel update is needed"); - buffer->setupChannel(); - } - } + prepareChannelForBuffer(buffer); } /* false is returned by the sendAck function in only two cases: diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index a27a5bd59f1..9633326366d 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -66,6 +66,7 @@ public: bool updateChannel(ChannelPtr & channel); void updateQueues(std::vector & queues_) { queues_ = queues; } + void prepareChannelForBuffer(ConsumerBufferPtr buffer); void incrementReader(); void decrementReader(); diff --git a/src/Storages/SelectQueryDescription.cpp b/src/Storages/SelectQueryDescription.cpp index 018a9f0ea98..2cc8f769cf1 100644 --- a/src/Storages/SelectQueryDescription.cpp +++ b/src/Storages/SelectQueryDescription.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -12,7 +13,6 @@ namespace DB namespace ErrorCodes { extern const int QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW; -extern const int LOGICAL_ERROR; } SelectQueryDescription::SelectQueryDescription(const SelectQueryDescription & other) @@ -60,9 +60,9 @@ StorageID extractDependentTableFromSelectQuery(ASTSelectQuery & query, ContextPt { auto * ast_select = subquery->as(); if (!ast_select) - throw Exception("Logical error while creating StorageMaterializedView. " - "Could not retrieve table name from select query.", - DB::ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, + "StorageMaterializedView cannot be created from table functions ({})", + serializeAST(*subquery)); if (ast_select->list_of_selects->children.size() != 1) throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index a45afd847e6..e033d319fc8 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -110,6 +110,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INCORRECT_NUMBER_OF_COLUMNS; extern const int INFINITE_LOOP; + extern const int ILLEGAL_FINAL; extern const int TYPE_MISMATCH; extern const int TOO_MANY_ROWS; extern const int UNABLE_TO_SKIP_UNUSED_SHARDS; @@ -273,7 +274,7 @@ size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & clus { size_t num_local_shards = cluster->getLocalShardCount(); size_t num_remote_shards = cluster->getRemoteShardCount(); - return (num_remote_shards * settings.max_parallel_replicas) + num_local_shards; + return (num_remote_shards + num_local_shards) * settings.max_parallel_replicas; } } @@ -590,6 +591,10 @@ void StorageDistributed::read( const size_t /*max_block_size*/, const unsigned /*num_streams*/) { + const auto * select_query = query_info.query->as(); + if (select_query->final() && local_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas) + throw Exception(ErrorCodes::ILLEGAL_FINAL, "Final modifier is not allowed together with parallel reading from replicas feature"); + const auto & modified_query_ast = rewriteSelectQuery( query_info.query, remote_database, remote_table, remote_table_function_ptr); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 8a3d786532e..470a406dbe4 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -191,7 +191,14 @@ void StorageMergeTree::read( size_t max_block_size, unsigned num_streams) { - if (auto plan = reader.read(column_names, metadata_snapshot, query_info, local_context, max_block_size, num_streams, processed_stage)) + /// If true, then we will ask initiator if we can read chosen ranges + bool enable_parallel_reading = local_context->getClientInfo().collaborate_with_initiator; + + if (enable_parallel_reading) + LOG_TRACE(log, "Parallel reading from replicas enabled {}", enable_parallel_reading); + + if (auto plan = reader.read( + column_names, metadata_snapshot, query_info, local_context, max_block_size, num_streams, processed_stage, nullptr, enable_parallel_reading)) query_plan = std::move(*plan); } diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 9b48f3fc3b3..fe0f9b8d4b4 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -67,9 +67,12 @@ void StorageMongoDB::connectIfNotConnected() if (!authenticated) { # if POCO_VERSION >= 0x01070800 - Poco::MongoDB::Database poco_db(database_name); - if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1)) - throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); + if (!username.empty() && !password.empty()) + { + Poco::MongoDB::Database poco_db(database_name); + if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1)) + throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); + } # else authenticate(*connection, database_name, username, password); # endif @@ -112,9 +115,7 @@ StorageMongoDBConfiguration StorageMongoDB::getConfiguration(ASTs engine_args, C for (const auto & [arg_name, arg_value] : storage_specific_args) { - if (arg_name == "collection") - configuration.collection = arg_value->as()->value.safeGet(); - else if (arg_name == "options") + if (arg_name == "options") configuration.options = arg_value->as()->value.safeGet(); else throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -139,7 +140,7 @@ StorageMongoDBConfiguration StorageMongoDB::getConfiguration(ASTs engine_args, C configuration.host = parsed_host_port.first; configuration.port = parsed_host_port.second; configuration.database = engine_args[1]->as().value.safeGet(); - configuration.collection = engine_args[2]->as().value.safeGet(); + configuration.table = engine_args[2]->as().value.safeGet(); configuration.username = engine_args[3]->as().value.safeGet(); configuration.password = engine_args[4]->as().value.safeGet(); @@ -163,7 +164,7 @@ void registerStorageMongoDB(StorageFactory & factory) configuration.host, configuration.port, configuration.database, - configuration.collection, + configuration.table, configuration.username, configuration.password, configuration.options, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7f600fc054c..ca877d8a72d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -47,8 +48,10 @@ #include #include +#include #include #include +#include #include #include @@ -60,6 +63,8 @@ #include #include #include +#include +#include #include @@ -1368,9 +1373,6 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil: const auto storage_settings_ptr = getSettings(); String part_path = fs::path(replica_path) / "parts" / part_name; - //ops.emplace_back(zkutil::makeCheckRequest( - // zookeeper_path + "/columns", expected_columns_version)); - if (storage_settings_ptr->use_minimalistic_part_header_in_zookeeper) { ops.emplace_back(zkutil::makeCreateRequest( @@ -1416,6 +1418,7 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd Coordination::Requests new_ops; for (const String & part_path : absent_part_paths_on_replicas) { + /// NOTE Create request may fail with ZNONODE if replica is being dropped, we will throw an exception new_ops.emplace_back(zkutil::makeCreateRequest(part_path, "", zkutil::CreateMode::Persistent)); new_ops.emplace_back(zkutil::makeRemoveRequest(part_path, -1)); } @@ -3400,53 +3403,29 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n } -void StorageReplicatedMergeTree::enterLeaderElection() +void StorageReplicatedMergeTree::startBeingLeader() { - auto callback = [this]() + if (!getSettings()->replicated_can_become_leader) { - LOG_INFO(log, "Became leader"); - - is_leader = true; - merge_selecting_task->activateAndSchedule(); - }; - - try - { - leader_election = std::make_shared( - getContext()->getSchedulePool(), - fs::path(zookeeper_path) / "leader_election", - *current_zookeeper, /// current_zookeeper lives for the lifetime of leader_election, - /// since before changing `current_zookeeper`, `leader_election` object is destroyed in `partialShutdown` method. - callback, - replica_name); - } - catch (...) - { - leader_election = nullptr; - throw; + LOG_INFO(log, "Will not enter leader election because replicated_can_become_leader=0"); + return; } + + zkutil::checkNoOldLeaders(log, *current_zookeeper, fs::path(zookeeper_path) / "leader_election"); + + LOG_INFO(log, "Became leader"); + is_leader = true; + merge_selecting_task->activateAndSchedule(); } -void StorageReplicatedMergeTree::exitLeaderElection() +void StorageReplicatedMergeTree::stopBeingLeader() { - if (!leader_election) + if (!is_leader) return; - /// Shut down the leader election thread to avoid suddenly becoming the leader again after - /// we have stopped the merge_selecting_thread, but before we have deleted the leader_election object. - leader_election->shutdown(); - - if (is_leader) - { - LOG_INFO(log, "Stopped being leader"); - - is_leader = false; - merge_selecting_task->deactivate(); - } - - /// Delete the node in ZK only after we have stopped the merge_selecting_thread - so that only one - /// replica assigns merges at any given time. - leader_election = nullptr; + LOG_INFO(log, "Stopped being leader"); + is_leader = false; + merge_selecting_task->deactivate(); } ConnectionTimeouts StorageReplicatedMergeTree::getFetchPartHTTPTimeouts(ContextPtr local_context) @@ -4112,9 +4091,12 @@ void StorageReplicatedMergeTree::startup() /// In this thread replica will be activated. restarting_thread.start(); - /// Wait while restarting_thread initializes LeaderElection (and so on) or makes first attempt to do it + /// Wait while restarting_thread finishing initialization startup_event.wait(); + /// Restarting thread has initialized replication queue, replica can become leader now + startBeingLeader(); + startBackgroundMovesIfNeeded(); part_moves_between_shards_orchestrator.start(); @@ -4145,6 +4127,7 @@ void StorageReplicatedMergeTree::shutdown() fetcher.blocker.cancelForever(); merger_mutator.merges_blocker.cancelForever(); parts_mover.moves_blocker.cancelForever(); + stopBeingLeader(); restarting_thread.shutdown(); background_operations_assignee.finish(); @@ -4247,6 +4230,9 @@ void StorageReplicatedMergeTree::read( const size_t max_block_size, const unsigned num_streams) { + /// If true, then we will ask initiator if we can read chosen ranges + const bool enable_parallel_reading = local_context->getClientInfo().collaborate_with_initiator; + /** The `select_sequential_consistency` setting has two meanings: * 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas. * 2. Do not read parts that have not yet been written to the quorum of the replicas. @@ -4256,13 +4242,18 @@ void StorageReplicatedMergeTree::read( { auto max_added_blocks = std::make_shared(getMaxAddedBlocks()); if (auto plan = reader.read( - column_names, metadata_snapshot, query_info, local_context, max_block_size, num_streams, processed_stage, std::move(max_added_blocks))) + column_names, metadata_snapshot, query_info, local_context, + max_block_size, num_streams, processed_stage, std::move(max_added_blocks), enable_parallel_reading)) query_plan = std::move(*plan); return; } - if (auto plan = reader.read(column_names, metadata_snapshot, query_info, local_context, max_block_size, num_streams, processed_stage)) + if (auto plan = reader.read( + column_names, metadata_snapshot, query_info, local_context, + max_block_size, num_streams, processed_stage, nullptr, enable_parallel_reading)) + { query_plan = std::move(*plan); + } } Pipe StorageReplicatedMergeTree::read( diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 65daf82a633..bcd364df30e 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -320,7 +319,6 @@ private: * It can be false only when old ClickHouse versions are working on the same cluster, because now we allow multiple leaders. */ std::atomic is_leader {false}; - zkutil::LeaderElectionPtr leader_election; InterserverIOEndpointPtr data_parts_exchange_endpoint; @@ -514,15 +512,10 @@ private: bool processQueueEntry(ReplicatedMergeTreeQueue::SelectedEntryPtr entry); - /// Postcondition: - /// either leader_election is fully initialized (node in ZK is created and the watching thread is launched) - /// or an exception is thrown and leader_election is destroyed. - void enterLeaderElection(); - - /// Postcondition: - /// is_leader is false, merge_selecting_thread is stopped, leader_election is nullptr. - /// leader_election node in ZK is either deleted, or the session is marked expired. - void exitLeaderElection(); + /// Start being leader (if not disabled by setting). + /// Since multi-leaders are allowed, it just sets is_leader flag. + void startBeingLeader(); + void stopBeingLeader(); /** Selects the parts to merge and writes to the log. */ diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index f49fd35044d..659071b392d 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -126,7 +126,7 @@ Pipe StorageS3Cluster::read( scalars, Tables(), processed_stage, - callback); + RemoteQueryExecutor::Extension{.task_iterator = callback}); pipes.emplace_back(std::make_shared(remote_query_executor, add_agg_info, false)); } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 382466c32d4..fe05d168c31 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -230,8 +230,10 @@ StorageURLSink::StorageURLSink( const String & http_method) : SinkToStorage(sample_block) { + std::string content_type = FormatFactory::instance().getContentType(format, context, format_settings); + write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique(Poco::URI(uri), http_method, timeouts), + std::make_unique(Poco::URI(uri), http_method, content_type, timeouts), compression_method, 3); writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block, context, {} /* write callback */, format_settings); diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index c33fa6cad44..87bd266af96 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -1,48 +1,41 @@ // autogenerated by ./StorageSystemContributors.sh const char * auto_contributors[] { - "박현우", "0xflotus", "20018712", "243f6a88 85a308d3", "243f6a8885a308d313198a2e037", "3ldar-nasyrov", "821008736@qq.com", - "abdrakhmanov", - "abel-wang", - "abyss7", - "achimbab", - "achulkov2", - "adevyatova", - "ageraab", + "ANDREI STAROVEROV", "Ahmed Dardery", "Aimiyoo", - "akazz", "Akazz", - "akonyaev", - "akuzm", "Alain BERRIER", "Albert Kidrachev", "Alberto", - "Aleksandra (Ася)", "Aleksandr Karo", - "Aleksandrov Vladimir", "Aleksandr Shalimov", - "alekseik1", + "Aleksandra (Ася)", + "Aleksandrov Vladimir", "Aleksei Levushkin", "Aleksei Semiglazov", - "Aleksey", "Aleksey Akulovich", - "alesapin", + "Aleksey", + "Alex Bocharov", + "Alex Cao", + "Alex Karo", + "Alex Krash", + "Alex Ryndin", + "Alex Zatelepin", "Alex", "Alexander Avdonkin", "Alexander Bezpiatov", "Alexander Burmak", "Alexander Chashnikov", "Alexander Ermolaev", - "Alexander Gololobov", "Alexander GQ Gerasiov", + "Alexander Gololobov", "Alexander Kazakov", - "alexander kozhikhov", "Alexander Kozhikhov", "Alexander Krasheninnikov", "Alexander Kuranoff", @@ -59,63 +52,46 @@ const char * auto_contributors[] { "Alexander Sapin", "Alexander Tokmakov", "Alexander Tretiakov", - "Alexandra", - "Alexandra Latysheva", - "Alexandre Snarskii", "Alexandr Kondratev", "Alexandr Krasheninnikov", "Alexandr Orlov", - "Alex Bocharov", + "Alexandra Latysheva", + "Alexandra", + "Alexandre Snarskii", "Alexei Averchenko", - "Alexey", "Alexey Arno", "Alexey Boykov", "Alexey Dushechkin", "Alexey Elymanov", "Alexey Ilyukhov", - "alexey-milovidov", "Alexey Milovidov", "Alexey Tronov", "Alexey Vasiliev", "Alexey Zatelepin", - "Alex Karo", - "Alex Krash", - "alex.lvxin", - "Alex Ryndin", + "Alexey", "Alexsey Shestakov", - "alex-zaitsev", - "Alex Zatelepin", - "alfredlu", + "Ali Demirci", "Aliaksandr Pliutau", "Aliaksandr Shylau", - "Ali Demirci", "Alina Terekhova", - "amesaru", "Amesaru", "Amir Vaza", "Amos Bird", - "amoschen", - "amudong", + "Amr Alaa", "Amy Krishnevsky", + "AnaUvarova", "Anastasiya Rodigina", "Anastasiya Tsarkova", "Anatoly Pugachev", - "ana-uvarova", - "AnaUvarova", "Andr0901", - "andrc1901", "Andreas Hunkeler", "AndreevDm", "Andrei Bodrov", "Andrei Ch", "Andrei Chulkov", - "andrei-karpliuk", "Andrei Nekrashevich", - "ANDREI STAROVEROV", "Andrew Grigorev", "Andrew Onyshchuk", - "andrewsg", - "Andrey", "Andrey Chulkov", "Andrey Dudin", "Andrey Kadochnikov", @@ -124,20 +100,16 @@ const char * auto_contributors[] { "Andrey M", "Andrey Mironov", "Andrey Skobtsov", + "Andrey Torsunov", "Andrey Urusov", "Andrey Z", + "Andrey", "Andy Liang", "Andy Yang", "Anmol Arora", - "Anna", "Anna Shakhova", - "anneji", - "anneji-dev", - "annvsh", - "anrodigina", + "Anna", "Anthony N. Simon", - "antikvist", - "anton", "Anton Ivashkin", "Anton Kobzev", "Anton Kvasha", @@ -149,151 +121,94 @@ const char * auto_contributors[] { "Anton Tikhonov", "Anton Yuzhaninov", "Anton Zhabolenko", - "ap11", - "a.palagashvili", - "aprudaev", "Ariel Robaldo", "Arsen Hakobyan", "Arslan G", "ArtCorp", "Artem Andreenko", - "Artemeey", "Artem Gavrilov", "Artem Hnilov", - "Artemkin Pavel", "Artem Konovalov", "Artem Streltsov", "Artem Zuikov", + "Artemeey", + "Artemkin Pavel", "Arthur Petukhovsky", "Arthur Tokarchuk", "Arthur Wong", - "artpaul", - "Artur", "Artur Beglaryan", "Artur Filatenkov", + "Artur", "AsiaKorushkina", - "asiana21", - "atereh", "Atri Sharma", - "avasiliev", - "avogar", "Avogar", - "avsharapov", - "awesomeleo", "Azat Khuzhin", + "BSD_Conqueror", "Babacar Diassé", "Bakhtiyor Ruziev", "BanyRule", "Baudouin Giard", "BayoNet", - "bbkas", - "benamazing", - "benbiti", "Benjamin Naecker", "Bertrand Junqua", - "bgranvea", "Bharat Nallan", - "bharatnc", "Big Elephant", "Bill", "BiteTheDDDDt", "BlahGeek", - "blazerer", - "bluebirddm", - "bobrovskij artemij", - "Bogdan", "Bogdan Voronin", + "Bogdan", "BohuTANG", "Bolinov", "BoloniniD", - "booknouse", "Boris Granveaud", "Bowen Masco", - "bo zeng", "Braulio Valdivielso", "Brett Hoerner", - "BSD_Conqueror", - "bseng", "Bulat Gaifullin", "Carbyn", - "caspian", "Caspian", - "cekc", - "centos7", - "champtar", - "chang.chen", - "changvvb", "Chao Ma", "Chao Wang", - "chasingegg", - "chengy8934", - "chenjian", - "chenqi", - "chenxing-xc", - "chenxing.xc", "Chen Yufei", - "chertus", "Chienlung Cheung", - "chou.fan", "Christian", - "christophe.kalenzaga", "Ciprian Hacman", "Clement Rodriguez", - "Clément Rodriguez", "ClickHouse Admin", - "cn-ds", + "Clément Rodriguez", "Cody Baker", "Colum", - "comunodi", "Constantin S. Pan", - "coraxster", + "Constantine Peresypkin", "CurtizJ", - "damozhaeva", + "DIAOZHAFENG", "Daniel Bershatsky", "Daniel Dao", "Daniel Qin", "Danila Kutenin", - "dankondr", "Dao Minh Thuc", - "daoready", "Daria Mozhaeva", "Dario", - "Darío", "DarkWanderer", - "dasmfm", - "davydovska", - "decaseal", + "Darío", "Denis Burlaka", "Denis Glazachev", "Denis Krivak", "Denis Zhuravlev", "Denny Crane", - "dependabot[bot]", - "dependabot-preview[bot]", "Derek Perkins", - "detailyang", - "dfenelonov", - "dgrr", - "DIAOZHAFENG", - "dimarub2000", "Ding Xiang Fei", - "dinosaur", - "divanorama", - "dkxiaohei", - "dmi-feo", "Dmitriev Mikhail", - "dmitrii", "Dmitrii Kovalkov", "Dmitrii Raev", - "dmitriiut", - "Dmitriy", + "Dmitriy Dorofeev", "Dmitriy Lushnikov", - "Dmitry", + "Dmitriy", "Dmitry Belyavtsev", "Dmitry Bilunov", "Dmitry Galuza", "Dmitry Krylov", - "dmitry kuzmin", "Dmitry Luhtionov", "Dmitry Moskowski", "Dmitry Muzyka", @@ -302,148 +217,89 @@ const char * auto_contributors[] { "Dmitry Rubashkin", "Dmitry S..ky / skype: dvska-at-skype", "Dmitry Ukolov", + "Dmitry", "Doge", "Dongdong Yang", "DoomzD", "Dr. Strange Looker", "DuckSoft", - "d.v.semenov", - "eaxdev", - "eejoin", - "egatov", "Egor O'Sten", "Egor Savin", "Ekaterina", - "elBroom", "Eldar Zaitov", "Elena Baskakova", - "elenaspb2019", - "elevankoff", "Elghazal Ahmed", "Elizaveta Mironyuk", - "emakarov", - "emhlbmc", - "emironyuk", + "Elykov Alexandr", "Emmanuel Donin de Rosière", - "Eric", "Eric Daniel", + "Eric", "Erixonich", - "ermaotech", "Ernest Poletaev", "Eugene Klimov", "Eugene Konkov", "Evgenia Sudarikova", - "Evgeniia Sudarikova", "Evgenii Pravda", + "Evgeniia Sudarikova", "Evgeniy Gatov", "Evgeniy Udodov", "Evgeny Konkov", "Evgeny Markov", - "evtan", + "Evgeny", "Ewout", - "exprmntr", - "ezhaka", - "f1yegor", - "Fabiano Francesconi", + "FArthur-cmd", "Fabian Stäber", + "Fabiano Francesconi", "Fadi Hadzh", "Fan()", - "fancno", - "FArthur-cmd", - "fastio", - "favstovol", "FawnD2", "Federico Ceratto", "FeehanG", - "feihengye", - "felixoid", - "felixxdu", - "feng lv", - "fenglv", - "fessmage", "FgoDt", - "fibersel", "Filatenkov Artur", - "filimonov", - "filipe", "Filipe Caixeta", - "flow", "Flowyi", - "flynn", - "foxxmary", "Francisco Barón", - "frank", - "franklee", + "Frank Chen", "Frank Zhao", - "fredchenbj", "Fruit of Eden", - "Fullstop000", - "fuqi", - "Fuwang Hu", - "fuwhu", "Fu Zhe", - "fuzhe1989", - "fuzzERot", + "Fullstop000", + "Fuwang Hu", "Gagan Arneja", "Gao Qiang", - "g-arslan", "Gary Dotzler", + "George G", "George", "George3d6", - "George G", "Georgy Ginzburg", "Gervasio Varela", - "ggerogery", - "giordyb", "Gleb Kanterov", "Gleb Novikov", "Gleb-Tretyakov", - "glockbender", - "glushkovds", "Gregory", - "Grigory", "Grigory Buteyko", "Grigory Pervakov", + "Grigory", "Guillaume Tassery", - "guoleiyi", "Guo Wei (William)", - "gyuton", "Haavard Kvaalen", "Habibullah Oladepo", "Hamoon", - "hao.he", "Hasitha Kanchana", "Hasnat", - "hchen9", - "hcz", - "heng zhao", - "hermano", - "hexiaoting", - "hhell", "Hiroaki Nakamura", - "hotid", - "huangzhaowei", "HuFuwang", "Hui Wang", - "hustnn", - "huzhichengdd", - "ice1x", - "idfer", - "igomac", - "igor", - "Igor", "Igor Hatarist", - "igor.lapko", "Igor Mineev", "Igor Strykhar", - "Igr", + "Igor", "Igr Mineev", - "ikarishinjieva", + "Igr", "Ikko Ashimine", - "ikopylov", "Ildar Musin", "Ildus Kurbangaliev", - "Ilya", "Ilya Breev", "Ilya Golshtein", "Ilya Khomutov", @@ -454,78 +310,56 @@ const char * auto_contributors[] { "Ilya Shipitsin", "Ilya Skrypitsa", "Ilya Yatsishin", + "Ilya", "ImgBotApp", - "imgbot[bot]", - "ip", - "Islam Israfilov", "Islam Israfilov (Islam93)", - "it1804", - "Ivan", + "Islam Israfilov", "Ivan A. Torgashov", "Ivan Babrou", "Ivan Blinkov", "Ivan He", - "ivan-kush", "Ivan Kush", "Ivan Kushnarenko", "Ivan Lezhankin", "Ivan Milov", "Ivan Remen", "Ivan Starkov", - "ivanzhukov", "Ivan Zhukov", - "ivoleg", + "Ivan", "Jack Song", "JackyWoo", "Jacob Hayes", - "jakalletti", "Jakub Kuklis", "JaosnHsieh", - "jasine", - "Jason", "Jason Keirstead", - "jasperzhu", - "javartisan", - "javi", - "javi santana", + "Jason", "Javi Santana", "Javi santana bot", "Jean Baptiste Favre", "Jeffrey Dang", - "jennyma", - "jetgm", "Jiading Guo", "Jiang Tao", - "jianmei zhang", - "jkuklis", - "João Figueiredo", "Jochen Schalanda", - "John", "John Hummel", "John Skopis", + "John", "Jonatas Freitas", + "João Figueiredo", "Julian Zhou", - "jyz0309", "Kang Liu", "Karl Pietrzak", - "karnevil13", - "keenwolf", "Keiji Yoshida", "Ken Chen", "Ken MacInnis", "Kevin Chiang", "Kevin Michel", - "kevin wan", "Kiran", "Kirill Danshin", "Kirill Ershov", - "kirillikoff", "Kirill Malev", "Kirill Shvakov", - "kmeaw", "Koblikov Mihail", "KochetovNicolai", - "kolsys", "Konstantin Grabar", "Konstantin Ilchenko", "Konstantin Lebedev", @@ -534,73 +368,36 @@ const char * auto_contributors[] { "Konstantin Rudenskii", "Korenevskiy Denis", "Korviakov Andrey", - "koshachy", "Kostiantyn Storozhuk", "Kozlov Ivan", - "kreuzerkrieg", "Kruglov Pavel", - "ks1322", "Kseniia Sumarokova", - "kshvakov", - "kssenii", "Ky Li", - "l", - "l1tsolaiki", - "lalex", + "LB", "Latysheva Alexandra", - "laurieliyang", - "lehasm", - "Léo Ercolanelli", "Leonardo Cecchi", "Leopold Schabel", - "leozhang", "Lev Borodin", - "levie", - "levushkin aleksej", - "levysh", "Lewinma", - "lhuang0928", - "lhuang09287750", - "liang.huang", - "liangqian", - "libenwang", - "lichengxiang", - "linceyou", - "listar", - "litao91", - "liu-bov", "Liu Cong", "LiuCong", - "liuyangkuan", "LiuYangkuan", - "liuyimin", - "liyang", - "lomberts", - "long2ice", "Lopatin Konstantin", "Loud_Scream", - "lthaooo", - "ltybc-coder", - "luc1ph3r", "Lucid Dreams", "Luis Bosque", - "lulichao", "Lv Feng", + "Léo Ercolanelli", "M0r64n", - "madianjun", "MagiaGroz", - "maiha", - "Maksim", + "Maks Skorokhod", "Maksim Fedotov", "Maksim Kita", - "Maks Skorokhod", - "malkfilipp", + "Maksim", "Malte", - "manmitya", - "maqroll", "Marat IDRISOV", - "Marek Vavruša", "Marek Vavrusa", + "Marek Vavruša", "Marek Vavruša", "Mariano Benítez Mulet", "Mark Andreev", @@ -609,21 +406,18 @@ const char * auto_contributors[] { "Maroun Maroun", "Marquitos", "Marsel Arduanov", - "Martijn Bakker", "Marti Raudsepp", + "Martijn Bakker", "Marvin Taschenberger", "Masha", - "mastertheknife", "Matthew Peveler", "Matwey V. Kornilov", - "Mátyás Jani", - "Max", "Max Akhmedov", "Max Bruce", - "maxim", + "Max Vetrov", + "Max", + "MaxWk", "Maxim Akhmedov", - "MaximAL", - "maxim-babenko", "Maxim Babenko", "Maxim Fedotov", "Maxim Fridental", @@ -634,141 +428,100 @@ const char * auto_contributors[] { "Maxim Serebryakov", "Maxim Smirnov", "Maxim Ulanovskiy", - "maxkuzn", - "maxulan", - "Max Vetrov", - "MaxWk", + "MaximAL", "Mc.Spring", - "mehanizm", "MeiK", - "melin", - "memo", "Memo", - "meo", - "meoww-bot", - "mergify[bot]", "Metehan Çetinkaya", "Metikov Vadim", - "mf5137", - "mfridental", "Michael Furmur", "Michael Kolupaev", "Michael Monashev", "Michael Razuvaev", "Michael Smitasin", "Michal Lisowski", - "michon470", "MicrochipQ", "Miguel Fernández", - "miha-g", "Mihail Fandyushin", - "mikael", "Mikahil Nacharov", - "Mike", "Mike F", "Mike Kot", - "mikepop7", - "Mikhail", + "Mike", "Mikhail Andreev", "Mikhail Cheshkov", "Mikhail Fandyushin", "Mikhail Filimonov", - "Mikhail f. Shiryaev", "Mikhail Gaidamaka", "Mikhail Korotov", "Mikhail Malafeev", "Mikhail Nacharov", "Mikhail Salosin", "Mikhail Surin", + "Mikhail f. Shiryaev", + "Mikhail", "MikuSugar", "Milad Arabi", - "millb", "Misko Lee", - "mnkonkova", - "mo-avatar", "Mohamad Fadhil", "Mohammad Hossein Sekhavat", - "morty", - "moscas", "Mostafa Dahab", "MovElb", "Mr.General", "Murat Kabilov", - "muzzlerator", - "m-ves", - "mwish", "MyroTk", - "myrrc", - "nagorny", + "Mátyás Jani", + "NIKITA MIKHAILOV", "Narek Galstyan", - "nauta", - "nautaa", + "Natasha Murashkina", + "NeZeD [Mac Pro]", "Neeke Gao", - "neng.liu", "Neng Liu", "NengLiu", - "never lee", - "NeZeD [Mac Pro]", - "nicelulu", - "Nickita", "Nickita Taranov", + "Nickita", "Nickolay Yastrebov", - "nickzhwang", - "Nicolae Vartolomei", "Nico Mandery", "Nico Piderman", + "Nicolae Vartolomei", "Nik", "Nikhil Nadig", "Nikhil Raman", "Nikita Lapkov", "Nikita Mikhailov", - "NIKITA MIKHAILOV", "Nikita Mikhalev", - "nikitamikhaylov", "Nikita Mikhaylov", "Nikita Orlov", "Nikita Tikhomirov", "Nikita Vasilev", "Nikolai Kochetov", "Nikolai Sorokin", - "Nikolay", "Nikolay Degterinsky", "Nikolay Kirsh", "Nikolay Semyachkin", "Nikolay Shcheglov", "Nikolay Vasiliev", "Nikolay Volosatov", + "Nikolay", "Niu Zhaojie", - "nonexistence", - "ns-vasilev", - "nvartolomei", - "oandrew", - "objatie_groba", - "ocadaruma", "Odin Hultgren Van Der Horst", - "ogorbacheva", "Okada Haruki", "Oleg Ershov", "Oleg Favstov", "Oleg Komarov", - "olegkv", "Oleg Matrokhin", "Oleg Obleukhov", "Oleg Strokachuk", "Olga Khvostikova", - "olgarev", "Olga Revyakina", "OmarBazaraa", - "Onehr7", "OnePiece", - "orantius", + "Onehr7", "Orivej Desh", "Oskar Wojciski", "OuO", - "palasonicq", + "PHO", "Paramtamtam", "Patrick Zippenfenig", - "Pavel", "Pavel Cheremushkin", "Pavel Kartaviy", "Pavel Kartavyy", @@ -778,87 +531,59 @@ const char * auto_contributors[] { "Pavel Medvedev", "Pavel Patrin", "Pavel Yakunin", + "Pavel", "Pavlo Bashynskiy", "Pawel Rog", - "pawelsz-rb", - "pdv-ru", + "Peignon Melvyn", "Peng Jian", "Persiyanov Dmitriy Andreevich", "Pervakov Grigorii", "Pervakov Grigory", - "peshkurov", - "philip.han", "Philippe Ombredanne", - "PHO", - "pingyu", - "potya", "Potya", "Pradeep Chhetri", - "presto53", - "proller", - "pufit", - "pyos", "Pysaoke", - "qianlixiang", - "qianmoQ", - "quid", "Quid37", - "quoctan132", - "r1j1k", "Rafael David Tinoco", - "rainbowsysu", "Ramazan Polat", - "Raúl Marín", "Ravengg", + "Raúl Marín", "Realist007", - "redclusive", "RedClusive", "RegulusZ", "Reilee", "Reto Kromer", "Ri", - "ritaank", - "robert", + "Rich Raposa", "Robert Hodges", - "robot-clickhouse", - "robot-metrika-test", - "rodrigargar", "Rohit Agarwal", "Romain Neutron", - "roman", "Roman Bug", + "Roman Chyrva", "Roman Lipovsky", "Roman Nikolaev", "Roman Nozdrin", "Roman Peshkurov", "Roman Tsisyk", - "romanzhukov", "Roman Zhukov", - "root", - "roverxu", - "ruct", - "Ruslan", + "Roman", "Ruslan Savchenko", + "Ruslan", "Russ Frank", "Ruzal Ibragimov", - "ryzuo", + "S.M.A. Djawadi", "Sabyanin Maxim", "SaltTan", "Sami Kerola", "Samuel Chou", - "santaux", - "satanson", "Saulius Valatka", - "sdk2", "Sean Haynes", - "Sébastien", - "Sébastien Launay", - "serebrserg", + "Serg Kulakov", + "Serge Rider", "Sergei Bocharov", "Sergei Semin", "Sergei Shtykov", "Sergei Tsetlin (rekub)", - "Serge Rider", "Sergey Demurin", "Sergey Elantsev", "Sergey Fedorov", @@ -872,144 +597,101 @@ const char * auto_contributors[] { "Sergey Zaikin", "Sergi Almacellas Abellana", "Sergi Vladykin", - "Serg Kulakov", - "sev7e0", "SevaCode", - "sevirov", "Seyed Mehrshad Hosseini", - "sfod", - "shangshujie", - "shedx", "Sherry Wang", "Shoh Jahon", "Silviu Caragea", "Simeon Emanuilov", "Simon Liu", "Simon Podlipsky", - "Šimon Podlipský", - "simon-says", "Sina", "Sjoerd Mulder", "Slach", - "S.M.A. Djawadi", "Snow", "Sofia Antipushina", - "songenjie", - "spff", - "spongedc", - "spyros87", "Stanislav Pavlovichev", "Stas Kelvich", "Stas Pavlovichev", - "stavrolia", "Stefan Thies", - "Stepan", "Stepan Herold", - "stepenhu", + "Stepan", "Steve-金勇", "Stig Bakken", "Storozhuk Kostiantyn", "Stupnikov Andrey", - "su-houzhen", - "sundy", - "sundy-li", - "sundyli", "SuperBot", - "svladykin", + "SuperDJY", + "Sébastien Launay", + "Sébastien", "TAC", + "TCeason", "Tagir Kuskarov", - "tai", - "taichong", "Tai White", - "taiyang-li", "Taleh Zaliyev", "Tangaev", - "tao jiang", - "Tatiana", "Tatiana Kirillova", - "tavplubix", - "TCeason", - "Teja", + "Tatiana", "Teja Srivastasa", + "Teja", "Tema Novikov", - "templarzq", "Tentoshka", - "terrylin", "The-Alchemist", - "Thomas Berdy", "Thom O'Connor", - "tianzhou", + "Thomas Berdy", "Tiaonmmn", - "tiger.yan", "Tigran Khudaverdyan", - "tison", + "Timur Magomedov", "TiunovNN", "Tobias Adamson", "Tobias Lins", "Tom Bombadil", - "topvisor", + "Tom Risse", + "Tomáš Hromada", "Tsarkova Anastasia", "TszkitLo40", - "turbo jason", - "tyrionhuang", - "ubuntu", "Ubuntu", "Ubus", "UnamedRus", - "unegare", - "unknown", - "urgordeadbeef", "V", - "Vadim", - "VadimPE", + "VDimir", "Vadim Plakhtinskiy", "Vadim Skipin", "Vadim Volodin", + "Vadim", + "VadimPE", "Val", "Valera Ryaboshapko", - "Vasilyev Nikita", "Vasily Kozhukhovskiy", "Vasily Morozov", "Vasily Nemkov", "Vasily Okunev", "Vasily Vasilkov", - "vdimir", - "VDimir", - "velom", + "Vasilyev Nikita", "Veloman Yunkan", "Veniamin Gvozdikov", "Veselkov Konstantin", - "vesslanjin", - "vgocoder", "Viachaslau Boben", - "vic", - "vicdashkov", - "vicgao", - "Victor", "Victor Tarnavsky", + "Victor", "Viktor Taranenko", - "vinity", "Vitalii S", "Vitaliy Fedorchenko", "Vitaliy Karnienko", "Vitaliy Kozlovskiy", "Vitaliy Lyudvichenko", "Vitaliy Zakaznikov", - "Vitaly", "Vitaly Baranov", "Vitaly Orlov", "Vitaly Samigullin", "Vitaly Stoyan", - "vitstn", - "vivarum", + "Vitaly", "Vivien Maisonneuve", "Vlad Arkhipov", - "Vladimir", "Vladimir Bunchuk", "Vladimir C", "Vladimir Ch", "Vladimir Chebotarev", - "vladimir golovchenko", "Vladimir Golovchenko", "Vladimir Goncharov", "Vladimir Klimontovich", @@ -1017,81 +699,437 @@ const char * auto_contributors[] { "Vladimir Kopysov", "Vladimir Kozbin", "Vladimir Smirnov", + "Vladimir", "Vladislav Rassokhin", "Vladislav Smirnov", "Vojtech Splichal", "Volodymyr Kuznetsov", "Vsevolod Orlov", - "vxider", "Vxider", "Vyacheslav Alipov", - "vzakaznikov", - "wangchao", "Wang Fenjin", "WangZengrui", - "weeds085490", "Weiqing Xu", "William Shallum", "Winter Zhang", - "wzl", "Xianda Ke", "Xiang Zhou", - "xiedeyantu", - "xPoSx", - "Yağızcan Değirmenci", - "yang", + "Y Lu", "Yangkuan Liu", - "yangshuai", "Yatsishin Ilya", - "yeer", + "Yağızcan Değirmenci", "Yegor Andreenko", "Yegor Levankov", - "ygrek", - "yhgcn", - "Yiğit Konur", - "yiguolei", "Yingchun Lai", "Yingfan Chen", + "Yiğit Konur", + "Yohann Jardin", + "Yuntao Wu", + "Yuri Dyachenko", + "Yurii Vlasenko", + "Yuriy Baranov", + "Yuriy Chernyshov", + "Yuriy Korzhenevskiy", + "Yuriy", + "Yury Karpovich", + "Yury Stankevich", + "ZhiYong Wang", + "Zhichang Yu", + "Zhichun Wu", + "Zhipeng", + "Zijie Lu", + "Zoran Pandovski", + "a.palagashvili", + "abdrakhmanov", + "abel-wang", + "abyss7", + "achimbab", + "achulkov2", + "adevyatova", + "ageraab", + "akazz", + "akonyaev", + "akuzm", + "alekseik1", + "alesapin", + "alex-zaitsev", + "alex.lvxin", + "alexander kozhikhov", + "alexey-milovidov", + "alfredlu", + "amesaru", + "amoschen", + "amudong", + "ana-uvarova", + "andrc1901", + "andrei-karpliuk", + "andrewsg", + "anneji", + "anneji-dev", + "annvsh", + "anrodigina", + "antikvist", + "anton", + "ap11", + "aprudaev", + "artpaul", + "asiana21", + "atereh", + "avasiliev", + "avogar", + "avsharapov", + "awesomeleo", + "bbkas", + "benamazing", + "benbiti", + "bgranvea", + "bharatnc", + "blazerer", + "bluebirddm", + "bo zeng", + "bobrovskij artemij", + "booknouse", + "bseng", + "caspian", + "cekc", + "centos7", + "cfcz48", + "cgp", + "champtar", + "chang.chen", + "changvvb", + "chasingegg", + "chengy8934", + "chenjian", + "chenqi", + "chenxing-xc", + "chenxing.xc", + "chertus", + "chou.fan", + "christophe.kalenzaga", + "cms", + "cmsxbc", + "cn-ds", + "comunodi", + "congbaoyangrou", + "coraxster", + "d.v.semenov", + "damozhaeva", + "dankondr", + "daoready", + "dasmfm", + "davydovska", + "decaseal", + "dependabot-preview[bot]", + "dependabot[bot]", + "detailyang", + "dfenelonov", + "dgrr", + "dimarub2000", + "dinosaur", + "divanorama", + "dkxiaohei", + "dmi-feo", + "dmitrii", + "dmitriiut", + "dmitry kuzmin", + "dongyifeng", + "eaxdev", + "eejoin", + "egatov", + "elBroom", + "elenaspb2019", + "elevankoff", + "emakarov", + "emhlbmc", + "emironyuk", + "ermaotech", + "evtan", + "exprmntr", + "ezhaka", + "f1yegor", + "fancno", + "fastio", + "favstovol", + "feihengye", + "felixoid", + "felixxdu", + "feng lv", + "fenglv", + "fessmage", + "fibersel", + "filimonov", + "filipe", + "flow", + "flynn", + "foxxmary", + "frank chen", + "frank", + "franklee", + "fredchenbj", + "freedomDR", + "fuqi", + "fuwhu", + "fuzhe1989", + "fuzzERot", + "g-arslan", + "ggerogery", + "giordyb", + "glockbender", + "glushkovds", + "guoleiyi", + "gyuton", + "hao.he", + "hchen9", + "hcz", + "heng zhao", + "hermano", + "hexiaoting", + "hhell", + "hotid", + "huangzhaowei", + "hustnn", + "huzhichengdd", + "ice1x", + "idfer", + "igomac", + "igor", + "igor.lapko", + "ikarishinjieva", + "ikopylov", + "imgbot[bot]", + "ip", + "it1804", + "ivan-kush", + "ivanzhukov", + "ivoleg", + "jakalletti", + "jasine", + "jasperzhu", + "javartisan", + "javi santana", + "javi", + "jennyma", + "jetgm", + "jianmei zhang", + "jkuklis", + "jus1096", + "jyz0309", + "karnevil13", + "keenwolf", + "kevin wan", + "khamadiev", + "kirillikoff", + "kmeaw", + "kolsys", + "koshachy", + "kreuzerkrieg", + "ks1322", + "kshvakov", + "kssenii", + "l", + "l1tsolaiki", + "lalex", + "laurieliyang", + "lehasm", + "leosunli", + "leozhang", + "levie", + "levushkin aleksej", + "levysh", + "lhuang0928", + "lhuang09287750", + "liang.huang", + "liangqian", + "libenwang", + "lichengxiang", + "linceyou", + "listar", + "litao91", + "liu-bov", + "liuyangkuan", + "liuyimin", + "liyang", + "liyang830", + "lomberts", + "loneylee", + "long2ice", + "lthaooo", + "ltybc-coder", + "luc1ph3r", + "lulichao", + "m-ves", + "madianjun", + "maiha", + "malkfilipp", + "manmitya", + "maqroll", + "mastertheknife", + "maxim", + "maxim-babenko", + "maxkuzn", + "maxulan", + "mehanizm", + "melin", + "memo", + "meo", + "meoww-bot", + "mergify[bot]", + "mf5137", + "mfridental", + "michael1589", + "michon470", + "miha-g", + "mikael", + "mikepop7", + "millb", + "mnkonkova", + "mo-avatar", + "morty", + "moscas", + "msaf1980", + "muzzlerator", + "mwish", + "myrrc", + "nagorny", + "nauta", + "nautaa", + "neng.liu", + "never lee", + "nicelulu", + "nickzhwang", + "nikitamikhaylov", + "nonexistence", + "ns-vasilev", + "nvartolomei", + "oandrew", + "objatie_groba", + "ocadaruma", + "ogorbacheva", + "olegkv", + "olgarev", + "orantius", + "p0ny", + "palasonicq", + "pawelsz-rb", + "pdv-ru", + "peshkurov", + "philip.han", + "pingyu", + "potya", + "presto53", + "proller", + "pufit", + "pyos", + "qianlixiang", + "qianmoQ", + "qieqieplus", + "quid", + "quoctan132", + "r1j1k", + "rainbowsysu", + "redclusive", + "ritaank", + "robert", + "robot-clickhouse", + "robot-metrika-test", + "rodrigargar", + "roman", + "romanzhukov", + "root", + "roverxu", + "ruct", + "ryzuo", + "santaux", + "satanson", + "sdk2", + "serebrserg", + "sev7e0", + "sevirov", + "sfod", + "shangshujie", + "shedx", + "simon-says", + "songenjie", + "spff", + "spongedc", + "spume", + "spyros87", + "stavrolia", + "stepenhu", + "su-houzhen", + "sundy", + "sundy-li", + "sundyli", + "sunlisheng", + "svladykin", + "tai", + "taichong", + "taiyang-li", + "tao jiang", + "tavplubix", + "templarzq", + "terrylin", + "tianzhou", + "tiger.yan", + "tison", + "topvisor", + "turbo jason", + "tyrionhuang", + "ubuntu", + "unegare", + "unknown", + "urgordeadbeef", + "vdimir", + "velom", + "vesslanjin", + "vgocoder", + "vic", + "vicdashkov", + "vicgao", + "vinity", + "vitstn", + "vivarum", + "vladimir golovchenko", + "vxider", + "vzakaznikov", + "wangchao", + "weeds085490", + "wzl", + "xPoSx", + "xiedeyantu", + "yandd", + "yang", + "yangshuai", + "yeer", + "ygrek", + "yhgcn", + "yiguolei", "yingjinghan", "ylchou", - "Y Lu", - "Yohann Jardin", "yonesko", "yuchuansun", "yuefoo", "yulu86", "yuluxu", - "Yuntao Wu", - "Yuri Dyachenko", - "Yurii Vlasenko", - "Yuriy", - "Yuriy Baranov", - "Yuriy Chernyshov", - "Yuriy Korzhenevskiy", - "Yury Karpovich", - "Yury Stankevich", "ywill3", "zamulla", "zhang2014", + "zhanghuajie", "zhanglistar", "zhangshengyu", "zhangxiao018", "zhangxiao871", "zhen ni", - "Zhichang Yu", - "Zhichun Wu", - "Zhipeng", - "ZhiYong Wang", "zhongyuankai", "zhukai", - "Zijie Lu", "zlx19950903", - "Zoran Pandovski", "zvonand", "zvrr", "zvvr", "zxc111", "zzsmdfj", + "Šimon Podlipský", "Артем Стрельцов", "Владислав Тихонов", "Георгий Кондратьев", @@ -1121,4 +1159,5 @@ const char * auto_contributors[] { "靳阳", "黄朝晖", "黄璞", + "박현우", nullptr}; diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 4aed5098bd1..c09279e65ac 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -7,6 +7,7 @@ #include #include + namespace DB { diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index 33ec5c457f6..8dbd73628ca 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -206,7 +206,7 @@ void StorageSystemPartsColumns::processNextStorage( columns[res_index++]->insertDefault(); } - ColumnSize column_size = part->getColumnSize(column.name, *column.type); + ColumnSize column_size = part->getColumnSize(column.name); if (columns_mask[src_index++]) columns[res_index++]->insert(column_size.data_compressed + column_size.marks); if (columns_mask[src_index++]) diff --git a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp index bdbe9a46846..f6490177014 100644 --- a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp +++ b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp @@ -237,7 +237,7 @@ void StorageSystemProjectionPartsColumns::processNextStorage( columns[res_index++]->insertDefault(); } - ColumnSize column_size = part->getColumnSize(column.name, *column.type); + ColumnSize column_size = part->getColumnSize(column.name); if (columns_mask[src_index++]) columns[res_index++]->insert(column_size.data_compressed + column_size.marks); if (columns_mask[src_index++]) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 510140c4f1d..272276c5164 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -62,7 +62,7 @@ namespace ErrorCodes namespace { - /// Fetch all window info and replace TUMPLE or HOP node names with WINDOW_ID + /// Fetch all window info and replace tumble or hop node names with windowID struct FetchQueryInfoMatcher { using Visitor = InDepthNodeVisitor; @@ -85,33 +85,38 @@ namespace { if (auto * t = ast->as()) { - if (t->name == "TUMBLE" || t->name == "HOP") + if (t->name == "tumble" || t->name == "hop") { - data.is_tumble = t->name == "TUMBLE"; - data.is_hop = t->name == "HOP"; + data.is_tumble = t->name == "tumble"; + data.is_hop = t->name == "hop"; + auto temp_node = t->clone(); + temp_node->setAlias(""); + if (startsWith(t->arguments->children[0]->getColumnName(), "toDateTime")) + throw Exception( + "The first argument of time window function should not be a constant value.", + ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); if (!data.window_function) { - t->name = "WINDOW_ID"; + data.serialized_window_function = serializeAST(*temp_node); + t->name = "windowID"; data.window_id_name = t->getColumnName(); data.window_id_alias = t->alias; data.window_function = t->clone(); data.window_function->setAlias(""); - data.serialized_window_function = serializeAST(*data.window_function); data.timestamp_column_name = t->arguments->children[0]->getColumnName(); } else { - auto temp_node = t->clone(); - temp_node->setAlias(""); if (serializeAST(*temp_node) != data.serialized_window_function) - throw Exception("WINDOW VIEW only support ONE WINDOW FUNCTION", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); + throw Exception("WINDOW VIEW only support ONE TIME WINDOW FUNCTION", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); + t->name = "windowID"; } } } } }; - /// Replace WINDOW_ID node name with either TUMBLE or HOP. + /// Replace windowID node name with either tumble or hop struct ReplaceWindowIdMatcher { public: @@ -127,29 +132,40 @@ namespace { if (auto * t = ast->as()) { - if (t->name == "WINDOW_ID") + if (t->name == "windowID") t->name = data.window_name; } } }; - /// GROUP BY TUMBLE(now(), INTERVAL '5' SECOND) + /// GROUP BY tumble(now(), INTERVAL '5' SECOND) /// will become - /// GROUP BY TUMBLE(____timestamp, INTERVAL '5' SECOND) + /// GROUP BY tumble(____timestamp, INTERVAL '5' SECOND) struct ReplaceFunctionNowData { using TypeToVisit = ASTFunction; bool is_time_column_func_now = false; String window_id_name; + String now_timezone; void visit(ASTFunction & node, ASTPtr & node_ptr) { - if (node.name == "WINDOW_ID") + if (node.name == "windowID" || node.name == "tumble" || node.name == "hop") { if (const auto * t = node.arguments->children[0]->as(); t && t->name == "now") { + if (!t->children.empty()) + { + const auto & children = t->children[0]->as()->children; + if (!children.empty()) + { + const auto * timezone_ast = children[0]->as(); + if (timezone_ast) + now_timezone = timezone_ast->value.safeGet(); + } + } is_time_column_func_now = true; node_ptr->children[0]->children[0] = std::make_shared("____timestamp"); window_id_name = node.getColumnName(); @@ -172,8 +188,8 @@ namespace { if (auto * t = ast->as()) { - if (t->name == "HOP" || t->name == "TUMBLE") - t->name = "WINDOW_ID"; + if (t->name == "hop" || t->name == "tumble") + t->name = "windowID"; } } }; @@ -205,12 +221,12 @@ namespace { if (node.name == "tuple") { - /// tuple(WINDOW_ID(timestamp, toIntervalSecond('5'))) + /// tuple(windowID(timestamp, toIntervalSecond('5'))) return; } else { - /// WINDOW_ID(timestamp, toIntervalSecond('5')) -> identifier. + /// windowID(timestamp, toIntervalSecond('5')) -> identifier. /// and other... node_ptr = std::make_shared(node.getColumnName()); } @@ -291,10 +307,12 @@ namespace } } -static void extractDependentTable(ContextPtr context, ASTSelectQuery & query, String & select_database_name, String & select_table_name) +static void extractDependentTable(ContextPtr context, ASTPtr & query, String & select_database_name, String & select_table_name) { - auto db_and_table = getDatabaseAndTable(query, 0); - ASTPtr subquery = extractTableExpression(query, 0); + ASTSelectQuery & select_query = typeid_cast(*query); + + auto db_and_table = getDatabaseAndTable(select_query, 0); + ASTPtr subquery = extractTableExpression(select_query, 0); if (!db_and_table && !subquery) return; @@ -307,7 +325,7 @@ static void extractDependentTable(ContextPtr context, ASTSelectQuery & query, St { db_and_table->database = select_database_name; AddDefaultDatabaseVisitor visitor(context, select_database_name); - visitor.visit(query); + visitor.visit(select_query); } else select_database_name = db_and_table->database; @@ -319,7 +337,7 @@ static void extractDependentTable(ContextPtr context, ASTSelectQuery & query, St auto & inner_select_query = ast_select->list_of_selects->children.at(0); - extractDependentTable(context, inner_select_query->as(), select_database_name, select_table_name); + extractDependentTable(context, inner_select_query, select_database_name, select_table_name); } else throw Exception( @@ -333,14 +351,14 @@ static size_t getWindowIDColumnPosition(const Block & header) auto position = -1; for (const auto & column : header.getColumnsWithTypeAndName()) { - if (startsWith(column.name, "WINDOW_ID")) + if (startsWith(column.name, "windowID")) { position = header.getPositionByName(column.name); break; } } if (position < 0) - throw Exception("Not found column WINDOW_ID", ErrorCodes::LOGICAL_ERROR); + throw Exception("Not found column windowID", ErrorCodes::LOGICAL_ERROR); return position; } @@ -609,8 +627,11 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( auto node = ast->clone(); /// now() -> ____timestamp if (is_time_column_func_now) + { time_now_visitor.visit(node); - /// TUMBLE/HOP -> WINDOW_ID + function_now_timezone = time_now_data.now_timezone; + } + /// tumble/hop -> windowID func_window_visitor.visit(node); to_identifier_visitor.visit(node); new_storage->set(field, node); @@ -640,7 +661,10 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( ASTPtr order_by_ptr = order_by; if (is_time_column_func_now) + { time_now_visitor.visit(order_by_ptr); + function_now_timezone = time_now_data.now_timezone; + } to_identifier_visitor.visit(order_by_ptr); for (auto & child : order_by->arguments->children) @@ -829,7 +853,6 @@ void StorageWindowView::threadFuncFireProc() std::unique_lock lock(fire_signal_mutex); UInt32 timestamp_now = std::time(nullptr); - LOG_TRACE(log, "Processing time. Now: {}. Next fire time: {}", timestamp_now, next_fire_signal); while (next_fire_signal <= timestamp_now) { try @@ -862,7 +885,6 @@ void StorageWindowView::threadFuncFireEvent() while (!fire_signal.empty()) { - LOG_TRACE(log, "Fire signals: {}", fire_signal.size()); fire(fire_signal.front()); fire_signal.pop_front(); } @@ -923,10 +945,11 @@ StorageWindowView::StorageWindowView( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "UNION is not supported for {}", getName()); - ASTSelectQuery & select_query = typeid_cast(*query.select->list_of_selects->children.at(0)); + select_query = query.select->list_of_selects->children.at(0)->clone(); String select_database_name = getContext()->getCurrentDatabase(); String select_table_name; - extractDependentTable(getContext(), select_query, select_database_name, select_table_name); + auto select_query_tmp = select_query->clone(); + extractDependentTable(getContext(), select_query_tmp, select_database_name, select_table_name); /// If the table is not specified - use the table `system.one` if (select_table_name.empty()) @@ -937,8 +960,8 @@ StorageWindowView::StorageWindowView( select_table_id = StorageID(select_database_name, select_table_name); DatabaseCatalog::instance().addDependency(select_table_id, table_id_); - /// Extract all info from query; substitute Function_TUMPLE and Function_HOP with Function_WINDOW_ID. - auto inner_query = innerQueryParser(select_query); + /// Extract all info from query; substitute Function_tumble and Function_hop with Function_windowID. + auto inner_query = innerQueryParser(select_query->as()); // Parse mergeable query mergeable_query = inner_query->clone(); @@ -948,13 +971,13 @@ StorageWindowView::StorageWindowView( if (is_time_column_func_now) window_id_name = func_now_data.window_id_name; - // Parse final query (same as mergeable query but has TUMBLE/HOP instead of WINDOW_ID) + // Parse final query (same as mergeable query but has tumble/hop instead of windowID) final_query = mergeable_query->clone(); ReplaceWindowIdMatcher::Data final_query_data; if (is_tumble) - final_query_data.window_name = "TUMBLE"; + final_query_data.window_name = "tumble"; else - final_query_data.window_name = "HOP"; + final_query_data.window_name = "hop"; ReplaceWindowIdMatcher::Visitor(final_query_data).visit(final_query); is_watermark_strictly_ascending = query.is_watermark_strictly_ascending; @@ -966,19 +989,25 @@ StorageWindowView::StorageWindowView( eventTimeParser(query); if (is_tumble) - window_column_name = std::regex_replace(window_id_name, std::regex("WINDOW_ID"), "TUMBLE"); + window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), "tumble"); else - window_column_name = std::regex_replace(window_id_name, std::regex("WINDOW_ID"), "HOP"); + window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), "hop"); + + auto generate_inner_table_name = [](const StorageID & storage_id) + { + if (storage_id.hasUUID()) + return ".inner." + toString(storage_id.uuid); + return ".inner." + storage_id.table_name; + }; - auto generate_inner_table_name = [](const String & table_name) { return ".inner." + table_name; }; if (attach_) { - inner_table_id = StorageID(table_id_.database_name, generate_inner_table_name(table_id_.table_name)); + inner_table_id = StorageID(table_id_.database_name, generate_inner_table_name(table_id_)); } else { auto inner_create_query - = getInnerTableCreateQuery(inner_query, query.storage, table_id_.database_name, generate_inner_table_name(table_id_.table_name)); + = getInnerTableCreateQuery(inner_query, query.storage, table_id_.database_name, generate_inner_table_name(table_id_)); auto create_context = Context::createCopy(context_); InterpreterCreateQuery create_interpreter(inner_create_query, create_context); @@ -1001,7 +1030,7 @@ StorageWindowView::StorageWindowView( } -ASTPtr StorageWindowView::innerQueryParser(ASTSelectQuery & query) +ASTPtr StorageWindowView::innerQueryParser(const ASTSelectQuery & query) { if (!query.groupBy()) throw Exception(ErrorCodes::INCORRECT_QUERY, "GROUP BY query is required for {}", getName()); @@ -1013,14 +1042,14 @@ ASTPtr StorageWindowView::innerQueryParser(ASTSelectQuery & query) if (!query_info_data.is_tumble && !query_info_data.is_hop) throw Exception(ErrorCodes::INCORRECT_QUERY, - "WINDOW FUNCTION is not specified for {}", getName()); + "TIME WINDOW FUNCTION is not specified for {}", getName()); window_id_name = query_info_data.window_id_name; window_id_alias = query_info_data.window_id_alias; timestamp_column_name = query_info_data.timestamp_column_name; is_tumble = query_info_data.is_tumble; - // Parse window function + // Parse time window function ASTFunction & window_function = typeid_cast(*query_info_data.window_function); const auto & arguments = window_function.arguments->children; extractWindowArgument( @@ -1165,7 +1194,11 @@ void StorageWindowView::writeIntoWindowView( { ColumnWithTypeAndName column; column.name = "____timestamp"; - column.type = std::make_shared(); + const auto & timezone = window_view.function_now_timezone; + if (timezone.empty()) + column.type = std::make_shared(); + else + column.type = std::make_shared(timezone); column.column = column.type->createColumnConst(0, Field(std::time(nullptr))); auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); @@ -1319,7 +1352,7 @@ Block & StorageWindowView::getHeader() const if (!sample_block) { sample_block = InterpreterSelectQuery( - getFinalQuery(), window_view_context, getParentStorage(), nullptr, + select_query->clone(), window_view_context, getParentStorage(), nullptr, SelectQueryOptions(QueryProcessingStage::Complete)).getSampleBlock(); for (size_t i = 0; i < sample_block.columns(); ++i) diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index f71e7a986d8..aaa9f7093e7 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -22,11 +22,11 @@ using ASTPtr = std::shared_ptr; * [ENGINE [db.]name] * [WATERMARK strategy] [ALLOWED_LATENESS interval_function] * AS SELECT ... - * GROUP BY [TUBLE/HOP(...)] + * GROUP BY [tumble/hop(...)] * * - only stores data that has not been triggered yet; * - fire_task checks if there is a window ready to be fired - * (each window result is fired in one output at the end of TUMBLE/HOP window interval); + * (each window result is fired in one output at the end of tumble/hop window interval); * - intermediate data is stored in inner table with * AggregatingMergeTree engine by default, but any other -MergeTree * engine might be used as inner table engine; @@ -35,24 +35,24 @@ using ASTPtr = std::shared_ptr; * Here function in GROUP BY clause results in a "window_id" * represented as Tuple(DateTime, DateTime) - lower and upper bounds of the window. * Function might be one of the following: - * 1. TUMBLE(time_attr, interval [, timezone]) + * 1. tumble(time_attr, interval [, timezone]) * - non-overlapping, continuous windows with a fixed duration (interval); * - example: - * SELECT TUMBLE(toDateTime('2021-01-01 00:01:45'), INTERVAL 10 SECOND) + * SELECT tumble(toDateTime('2021-01-01 00:01:45'), INTERVAL 10 SECOND) * results in ('2021-01-01 00:01:40','2021-01-01 00:01:50') - * 2. HOP(time_attr, hop_interval, window_interval [, timezone]) + * 2. hop(time_attr, hop_interval, window_interval [, timezone]) * - sliding window; * - has a fixed duration (window_interval parameter) and hops by a * specified hop interval (hop_interval parameter); * If the hop_interval is smaller than the window_interval, hopping windows * are overlapping. Thus, records can be assigned to multiple windows. * - example: - * SELECT HOP(toDateTime('2021-01-01 00:00:45'), INTERVAL 3 SECOND, INTERVAL 10 SECOND) + * SELECT hop(toDateTime('2021-01-01 00:00:45'), INTERVAL 3 SECOND, INTERVAL 10 SECOND) * results in ('2021-01-01 00:00:38','2021-01-01 00:00:48') * * DateTime value can be used with the following functions to find out start/end of the window: - * - TUMPLE_START(time_attr, interval [, timezone]), TUMPLE_END(time_attr, interval [, timezone]) - * - HOP_START(time_attr, hop_interval, window_interval [, timezone]), HOP_END(time_attr, hop_interval, window_interval [, timezone]) + * - tumbleStart(time_attr, interval [, timezone]), tumbleEnd(time_attr, interval [, timezone]) + * - hopStart(time_attr, hop_interval, window_interval [, timezone]), hopEnd(time_attr, hop_interval, window_interval [, timezone]) * * * Time processing options. @@ -61,8 +61,8 @@ using ASTPtr = std::shared_ptr; * - produces results based on the time of the local machine; * - example: * CREATE WINDOW VIEW test.wv TO test.dst - * AS SELECT count(number), TUMBLE_START(w_id) as w_start FROM test.mt - * GROUP BY TUMBLE(now(), INTERVAL '5' SECOND) as w_id + * AS SELECT count(number), tumbleStart(w_id) as w_start FROM test.mt + * GROUP BY tumble(now(), INTERVAL '5' SECOND) as w_id * * 2. event time * - produces results based on the time that is contained in every record; @@ -79,7 +79,7 @@ using ASTPtr = std::shared_ptr; * CREATE WINDOW VIEW test.wv TO test.dst * WATERMARK=STRICTLY_ASCENDING * AS SELECT count(number) FROM test.mt - * GROUP BY TUMBLE(timestamp, INTERVAL '5' SECOND); + * GROUP BY tumble(timestamp, INTERVAL '5' SECOND); * (where `timestamp` is a DateTime column in test.mt) * * @@ -90,8 +90,8 @@ using ASTPtr = std::shared_ptr; * - Can be enabled by using ALLOWED_LATENESS=INTERVAL, like this: * CREATE WINDOW VIEW test.wv TO test.dst * WATERMARK=ASCENDING ALLOWED_LATENESS=INTERVAL '2' SECOND - * AS SELECT count(a) AS count, TUMBLE_END(wid) AS w_end FROM test.mt - * GROUP BY TUMBLE(timestamp, INTERVAL '5' SECOND) AS wid; + * AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM test.mt + * GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid; * * - Instead of firing at the end of windows, WINDOW VIEW will fire * immediately when encountering late events; @@ -150,7 +150,11 @@ public: private: Poco::Logger * log; + /// Stored query, e.g. SELECT * FROM * GROUP BY tumble(now(), *) + ASTPtr select_query; + /// Used to generate the mergeable state of select_query, e.g. SELECT * FROM * GROUP BY windowID(____timestamp, *) ASTPtr mergeable_query; + /// Used to fetch the mergeable state and generate the final result. e.g. SELECT * FROM * GROUP BY tumble(____timestamp, *) ASTPtr final_query; ContextMutablePtr window_view_context; @@ -206,7 +210,9 @@ private: BackgroundSchedulePool::TaskHolder clean_cache_task; BackgroundSchedulePool::TaskHolder fire_task; - ASTPtr innerQueryParser(ASTSelectQuery & inner_query); + String function_now_timezone; + + ASTPtr innerQueryParser(const ASTSelectQuery & query); void eventTimeParser(const ASTCreateQuery & query); std::shared_ptr getInnerTableCreateQuery( diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 656e9fdbe50..bbf822c3879 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -21,7 +21,7 @@ IMAGE_NAME = 'clickhouse/fuzzer' def get_run_command(pr_number, sha, download_url, workspace_path, image): return f'docker run --network=host --volume={workspace_path}:/workspace ' \ - '--cap-add syslog --cap-add sys_admin ' \ + '--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE ' \ f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '\ f'{image}' diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 28974662745..36db7d596c9 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -14,6 +14,7 @@ from version_helper import get_version_from_repo, update_version_local from ccache_utils import get_ccache_if_not_exists, upload_ccache from ci_config import CI_CONFIG from docker_pull_helper import get_image_with_version +from tee_popen import TeePopen def get_build_config(build_check_name, build_name): @@ -77,8 +78,8 @@ def get_image_name(build_config): def build_clickhouse(packager_cmd, logs_path): build_log_path = os.path.join(logs_path, 'build_log.log') - with open(build_log_path, 'w') as log_file: - retcode = subprocess.Popen(packager_cmd, shell=True, stderr=log_file, stdout=log_file).wait() + with TeePopen(packager_cmd, build_log_path) as process: + retcode = process.wait() if retcode == 0: logging.info("Built successfully") else: diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index f807dcc7cc2..3d97a973017 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -121,6 +121,9 @@ if __name__ == "__main__": build_logs += build_logs_url logging.info("Totally got %s results", len(build_results)) + if len(build_results) == 0: + logging.info("No builds, failing check") + sys.exit(1) s3_helper = S3Helper('https://s3.amazonaws.com') diff --git a/tests/ci/docker_pull_helper.py b/tests/ci/docker_pull_helper.py index f9804744820..50354da6801 100644 --- a/tests/ci/docker_pull_helper.py +++ b/tests/ci/docker_pull_helper.py @@ -25,6 +25,11 @@ def get_images_with_versions(reports_path, required_image, pull=True): images_path = os.path.join(root, 'changed_images.json') break + if not images_path: + logging.info("Images file not found") + else: + logging.info("Images file path %s", images_path) + if images_path is not None and os.path.exists(images_path): logging.info("Images file exists") with open(images_path, 'r', encoding='utf-8') as images_fd: diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py index 0430d566b38..87c327f2776 100644 --- a/tests/ci/docs_check.py +++ b/tests/ci/docs_check.py @@ -13,6 +13,7 @@ from commit_status_helper import post_commit_status, get_commit from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch from rerun_helper import RerunHelper +from tee_popen import TeePopen NAME = "Docs Check (actions)" @@ -55,17 +56,16 @@ if __name__ == "__main__": run_log_path = os.path.join(test_output, 'runlog.log') - with open(run_log_path, 'w', encoding='utf-8') as log: - with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as process: - retcode = process.wait() - if retcode == 0: - logging.info("Run successfully") - status = "success" - description = "Docs check passed" - else: - description = "Docs check failed (non zero exit code)" - status = "failure" - logging.info("Run failed") + with TeePopen(cmd, run_log_path) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + status = "success" + description = "Docs check passed" + else: + description = "Docs check failed (non zero exit code)" + status = "failure" + logging.info("Run failed") subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) files = os.listdir(test_output) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 7953dcdf5d3..30eabe87dce 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -16,6 +16,8 @@ from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch from rerun_helper import RerunHelper +from tee_popen import TeePopen +from ccache_utils import get_ccache_if_not_exists, upload_ccache NAME = 'Fast test (actions)' @@ -86,7 +88,12 @@ if __name__ == "__main__": os.makedirs(output_path) cache_path = os.path.join(caches_path, "fasttest") + + logging.info("Will try to fetch cache for our build") + get_ccache_if_not_exists(cache_path, s3_helper, pr_info.number, temp_path) + if not os.path.exists(cache_path): + logging.info("cache was not fetched, will create empty dir") os.makedirs(cache_path) repo_path = os.path.join(temp_path, "fasttest-repo") @@ -101,8 +108,8 @@ if __name__ == "__main__": os.makedirs(logs_path) run_log_path = os.path.join(logs_path, 'runlog.log') - with open(run_log_path, 'w') as log: - retcode = subprocess.Popen(run_cmd, shell=True, stderr=log, stdout=log).wait() + with TeePopen(run_cmd, run_log_path) as process: + retcode = process.wait() if retcode == 0: logging.info("Run successfully") else: @@ -137,6 +144,9 @@ if __name__ == "__main__": else: state, description, test_results, additional_logs = process_results(output_path) + logging.info("Will upload cache") + upload_ccache(cache_path, s3_helper, pr_info.number, temp_path) + ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, NAME, test_results) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 8ab4fc3c9c4..fb157db31ba 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -18,16 +18,22 @@ from commit_status_helper import post_commit_status, get_commit from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch from rerun_helper import RerunHelper +from tee_popen import TeePopen -def get_additional_envs(check_name): +def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total): + result = [] if 'DatabaseReplicated' in check_name: - return ["USE_DATABASE_REPLICATED=1"] + result.append("USE_DATABASE_REPLICATED=1") if 'DatabaseOrdinary' in check_name: - return ["USE_DATABASE_ORDINARY=1"] + result.append("USE_DATABASE_ORDINARY=1") if 'wide parts enabled' in check_name: - return ["USE_POLYMORPHIC_PARTS=1"] + result.append("USE_POLYMORPHIC_PARTS=1") - return [] + if run_by_hash_total != 0: + result.append(f"RUN_BY_HASH_NUM={run_by_hash_num}") + result.append(f"RUN_BY_HASH_TOTAL={run_by_hash_total}") + + return result def get_image_name(check_name): if 'stateless' in check_name.lower(): @@ -88,7 +94,12 @@ def process_results(result_folder, server_log_path): additional_files = additional_files + [os.path.join(server_log_path, f) for f in server_log_files] status_path = os.path.join(result_folder, "check_status.tsv") - logging.info("Found test_results.tsv") + if os.path.exists(status_path): + logging.info("Found check_status.tsv") + else: + logging.info("Files in result folder %s", os.listdir(result_folder)) + raise Exception("File check_status.tsv not found") + with open(status_path, 'r', encoding='utf-8') as status_file: status = list(csv.reader(status_file, delimiter='\t')) @@ -97,6 +108,13 @@ def process_results(result_folder, server_log_path): state, description = status[0][0], status[0][1] results_path = os.path.join(result_folder, "test_results.tsv") + + if os.path.exists(results_path): + logging.info("Found test_results.tsv") + else: + logging.info("Files in result folder %s", os.listdir(result_folder)) + raise Exception("File test_results.tsv not found") + with open(results_path, 'r', encoding='utf-8') as results_file: test_results = list(csv.reader(results_file, delimiter='\t')) if len(test_results) == 0: @@ -116,12 +134,22 @@ if __name__ == "__main__": check_name = sys.argv[1] kill_timeout = int(sys.argv[2]) + flaky_check = 'flaky' in check_name.lower() gh = Github(get_best_robot_token()) pr_info = PRInfo(get_event(), need_changed_files=flaky_check) - rerun_helper = RerunHelper(gh, pr_info, check_name) + if 'RUN_BY_HASH_NUM' in os.environ: + run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) + run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) + check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]' + else: + run_by_hash_num = 0 + run_by_hash_total = 0 + check_name_with_group = check_name + + rerun_helper = RerunHelper(gh, pr_info, check_name_with_group) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) @@ -134,7 +162,7 @@ if __name__ == "__main__": tests_to_run = get_tests_to_run(pr_info) if not tests_to_run: commit = get_commit(gh, pr_info.sha) - commit.create_status(context=check_name, description='Not found changed stateless tests', state='success') + commit.create_status(context=check_name_with_group, description='Not found changed stateless tests', state='success') sys.exit(0) image_name = get_image_name(check_name) @@ -156,17 +184,16 @@ if __name__ == "__main__": run_log_path = os.path.join(result_path, "runlog.log") - additional_envs = get_additional_envs(check_name) + additional_envs = get_additional_envs(check_name, run_by_hash_num, run_by_hash_total) run_command = get_run_command(packages_path, result_path, server_log_path, kill_timeout, additional_envs, docker_image, flaky_check, tests_to_run) logging.info("Going to run func tests: %s", run_command) - with open(run_log_path, 'w', encoding='utf-8') as log: - with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process: - retcode = process.wait() - if retcode == 0: - logging.info("Run successfully") - else: - logging.info("Run failed") + with TeePopen(run_command, run_log_path) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) @@ -176,12 +203,12 @@ if __name__ == "__main__": ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name_with_group) print(f"::notice ::Report url: {report_url}") - post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) + post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url) - prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) if state != 'success': diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index c4950c93422..01799447184 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -19,23 +19,22 @@ from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch from rerun_helper import RerunHelper +from tee_popen import TeePopen -DOWNLOAD_RETRIES_COUNT = 5 - IMAGES = [ - "yandex/clickhouse-integration-tests-runner", - "yandex/clickhouse-mysql-golang-client", - "yandex/clickhouse-mysql-java-client", - "yandex/clickhouse-mysql-js-client", - "yandex/clickhouse-mysql-php-client", - "yandex/clickhouse-postgresql-java-client", - "yandex/clickhouse-integration-test", - "yandex/clickhouse-kerberos-kdc", - "yandex/clickhouse-integration-helper", + "clickhouse/integration-tests-runner", + "clickhouse/mysql-golang-client", + "clickhouse/mysql-java-client", + "clickhouse/mysql-js-client", + "clickhouse/mysql-php-client", + "clickhouse/postgresql-java-client", + "clickhouse/integration-test", + "clickhouse/kerberos-kdc", + "clickhouse/integration-helper", ] -def get_json_params_dict(check_name, pr_info, docker_images): +def get_json_params_dict(check_name, pr_info, docker_images, run_by_hash_total, run_by_hash_num): return { 'context_name': check_name, 'commit': pr_info.sha, @@ -45,6 +44,8 @@ def get_json_params_dict(check_name, pr_info, docker_images): 'shuffle_test_groups': False, 'use_tmpfs': False, 'disable_net_host': True, + 'run_by_hash_total': run_by_hash_total, + 'run_by_hash_num': run_by_hash_num, } def get_env_for_runner(build_path, repo_path, result_path, work_path): @@ -106,6 +107,15 @@ if __name__ == "__main__": check_name = sys.argv[1] + if 'RUN_BY_HASH_NUM' in os.environ: + run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) + run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) + check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]' + else: + run_by_hash_num = 0 + run_by_hash_total = 0 + check_name_with_group = check_name + if not os.path.exists(temp_path): os.makedirs(temp_path) @@ -114,12 +124,12 @@ if __name__ == "__main__": gh = Github(get_best_robot_token()) - rerun_helper = RerunHelper(gh, pr_info, check_name) + rerun_helper = RerunHelper(gh, pr_info, check_name_with_group) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) - images = get_images_with_versions(temp_path, IMAGES) + images = get_images_with_versions(reports_path, IMAGES) images_with_versions = {i.name: i.version for i in images} result_path = os.path.join(temp_path, "output_dir") if not os.path.exists(result_path): @@ -139,20 +149,19 @@ if __name__ == "__main__": json_path = os.path.join(work_path, 'params.json') with open(json_path, 'w', encoding='utf-8') as json_params: - json_params.write(json.dumps(get_json_params_dict(check_name, pr_info, images_with_versions))) + json_params.write(json.dumps(get_json_params_dict(check_name, pr_info, images_with_versions, run_by_hash_total, run_by_hash_num))) output_path_log = os.path.join(result_path, "main_script_log.txt") runner_path = os.path.join(repo_path, "tests/integration", "ci-runner.py") run_command = f"sudo -E {runner_path} | tee {output_path_log}" - with open(output_path_log, 'w', encoding='utf-8') as log: - with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log, env=my_env) as process: - retcode = process.wait() - if retcode == 0: - logging.info("Run tests successfully") - else: - logging.info("Some tests failed") + with TeePopen(run_command, output_path_log, my_env) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run tests successfully") + else: + logging.info("Some tests failed") subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) @@ -162,9 +171,9 @@ if __name__ == "__main__": mark_flaky_tests(ch_helper, check_name, test_results) s3_helper = S3Helper('https://s3.amazonaws.com') - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name, False) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name_with_group, False) print(f"::notice ::Report url: {report_url}") - post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) + post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url) - prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index a26b19f4bc9..88d4595bc66 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -145,6 +145,47 @@ class PRInfo: return True return False + def can_skip_builds_and_use_version_from_master(self): + if 'force tests' in self.labels: + return False + + if self.changed_files is None or not self.changed_files: + return False + + for f in self.changed_files: + if (not f.startswith('tests/queries') + or not f.startswith('tests/integration') + or not f.startswith('tests/performance')): + return False + + return True + + def can_skip_integration_tests(self): + if 'force tests' in self.labels: + return False + + if self.changed_files is None or not self.changed_files: + return False + + for f in self.changed_files: + if not f.startswith('tests/queries') or not f.startswith('tests/performance'): + return False + + return True + + def can_skip_functional_tests(self): + if 'force tests' in self.labels: + return False + + if self.changed_files is None or not self.changed_files: + return False + + for f in self.changed_files: + if not f.startswith('tests/integration') or not f.startswith('tests/performance'): + return False + + return True + class FakePRInfo: def __init__(self): diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py index 1ce1c5839f4..aa4a130902b 100644 --- a/tests/ci/pvs_check.py +++ b/tests/ci/pvs_check.py @@ -2,7 +2,6 @@ # pylint: disable=line-too-long -import subprocess import os import json import logging @@ -16,6 +15,7 @@ from commit_status_helper import get_commit from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch from rerun_helper import RerunHelper +from tee_popen import TeePopen NAME = 'PVS Studio (actions)' LICENCE_NAME = 'Free license: ClickHouse, Yandex' @@ -73,9 +73,16 @@ if __name__ == "__main__": cmd = f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" commit = get_commit(gh, pr_info.sha) - try: - subprocess.check_output(cmd, shell=True) - except: + run_log_path = os.path.join(temp_path, 'run_log.log') + + with TeePopen(cmd, run_log_path) as process: + retcode = process.wait() + if retcode != 0: + logging.info("Run failed") + else: + logging.info("Run Ok") + + if retcode != 0: commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") sys.exit(1) diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 254f0f6c199..911d370a594 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -18,10 +18,11 @@ from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch from rerun_helper import RerunHelper +from tee_popen import TeePopen def get_run_command(build_path, result_folder, server_log_folder, image): - cmd = "docker run -e S3_URL='https://clickhouse-datasets.s3.amazonaws.com' " + \ + cmd = "docker run --cap-add=SYS_PTRACE -e S3_URL='https://clickhouse-datasets.s3.amazonaws.com' " + \ f"--volume={build_path}:/package_folder " \ f"--volume={result_folder}:/test_output " \ f"--volume={server_log_folder}:/var/log/clickhouse-server {image}" @@ -107,13 +108,12 @@ if __name__ == "__main__": run_command = get_run_command(packages_path, result_path, server_log_path, docker_image) logging.info("Going to run func tests: %s", run_command) - with open(run_log_path, 'w', encoding='utf-8') as log: - with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process: - retcode = process.wait() - if retcode == 0: - logging.info("Run successfully") - else: - logging.info("Run failed") + with TeePopen(run_command, run_log_path) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py new file mode 100644 index 00000000000..cbb915e6de7 --- /dev/null +++ b/tests/ci/tee_popen.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 + +from subprocess import Popen, PIPE, STDOUT +import sys +import os + + +# Very simple tee logic implementation. You can specify shell command, output +# logfile and env variables. After TeePopen is created you can only wait until +# it finishes. stderr and stdout will be redirected both to specified file and +# stdout. +class TeePopen: + # pylint: disable=W0102 + def __init__(self, command, log_file, env=os.environ.copy()): + self.command = command + self.log_file = log_file + self.env = env + + def __enter__(self): + # pylint: disable=W0201 + self.process = Popen(self.command, shell=True, universal_newlines=True, env=self.env, stderr=STDOUT, stdout=PIPE, bufsize=1) + self.log_file = open(self.log_file, 'w', encoding='utf-8') + return self + + def __exit__(self, t, value, traceback): + for line in self.process.stdout: + sys.stdout.write(line) + self.log_file.write(line) + + self.process.wait() + self.log_file.close() + + def wait(self): + for line in self.process.stdout: + sys.stdout.write(line) + self.log_file.write(line) + + return self.process.wait() diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index 0c2cff083d5..abccbcd4512 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -17,6 +17,7 @@ from commit_status_helper import post_commit_status from clickhouse_helper import ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse from stopwatch import Stopwatch from rerun_helper import RerunHelper +from tee_popen import TeePopen IMAGE_NAME = 'clickhouse/unit-test' @@ -128,13 +129,12 @@ if __name__ == "__main__": logging.info("Going to run func tests: %s", run_command) - with open(run_log_path, 'w', encoding='utf-8') as log: - with subprocess.Popen(run_command, shell=True, stderr=log, stdout=log) as process: - retcode = process.wait() - if retcode == 0: - logging.info("Run successfully") - else: - logging.info("Run failed") + with TeePopen(run_command, run_log_path) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Run successfully") + else: + logging.info("Run failed") subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) diff --git a/tests/ci/approve_lambda/Dockerfile b/tests/ci/workflow_approve_rerun_lambda/Dockerfile similarity index 100% rename from tests/ci/approve_lambda/Dockerfile rename to tests/ci/workflow_approve_rerun_lambda/Dockerfile diff --git a/tests/ci/approve_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py similarity index 82% rename from tests/ci/approve_lambda/app.py rename to tests/ci/workflow_approve_rerun_lambda/app.py index 619c80ce299..436e9b06ede 100644 --- a/tests/ci/approve_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -25,8 +25,8 @@ SUSPICIOUS_PATTERNS = [ MAX_RETRY = 5 WorkflowDescription = namedtuple('WorkflowDescription', - ['name', 'action', 'run_id', 'event', 'workflow_id', - 'fork_owner_login', 'fork_branch']) + ['name', 'action', 'run_id', 'event', 'workflow_id', 'conclusion', 'status', + 'fork_owner_login', 'fork_branch', 'rerun_url', 'jobs_url', 'attempt', 'url']) TRUSTED_WORKFLOW_IDS = { 14586616, # Cancel workflows, always trusted @@ -38,6 +38,12 @@ TRUSTED_ORG_IDS = { 54801242, # clickhouse } +NEED_RERUN_WORKFLOWS = { + 13241696, # PR + 15834118, # Docs + 15522500, # MasterCI +} + # Individual trusted contirbutors who are not in any trusted organization. # Can be changed in runtime: we will append users that we learned to be in # a trusted org, to save GitHub API calls. @@ -180,6 +186,12 @@ def get_workflow_description_from_event(event): fork_branch = event['workflow_run']['head_branch'] name = event['workflow_run']['name'] workflow_id = event['workflow_run']['workflow_id'] + conclusion = event['workflow_run']['conclusion'] + attempt = event['workflow_run']['run_attempt'] + status = event['workflow_run']['status'] + jobs_url = event['workflow_run']['jobs_url'] + rerun_url = event['workflow_run']['rerun_url'] + url = event['workflow_run']['html_url'] return WorkflowDescription( name=name, action=action, @@ -188,6 +200,12 @@ def get_workflow_description_from_event(event): fork_owner_login=fork_owner, fork_branch=fork_branch, workflow_id=workflow_id, + conclusion=conclusion, + attempt=attempt, + status=status, + jobs_url=jobs_url, + rerun_url=rerun_url, + url=url ) def get_pr_author_and_orgs(pull_request): @@ -255,12 +273,49 @@ def get_token_from_aws(): installation_id = get_installation_id(encoded_jwt) return get_access_token(encoded_jwt, installation_id) +def check_need_to_rerun(workflow_description): + if workflow_description.attempt >= 2: + print("Not going to rerun workflow because it's already tried more than two times") + return False + print("Going to check jobs") + + jobs = _exec_get_with_retry(workflow_description.jobs_url + "?per_page=100") + print("Got jobs", len(jobs['jobs'])) + for job in jobs['jobs']: + if job['conclusion'] not in ('success', 'skipped'): + print("Job", job['name'], "failed, checking steps") + for step in job['steps']: + # always the last job + if step['name'] == 'Complete job': + print("Found Complete job step for job", job['name']) + break + else: + print("Checked all steps and doesn't found Complete job, going to rerun") + return True + + return False + +def rerun_workflow(workflow_description, token): + print("Going to rerun workflow") + _exec_post_with_retry(workflow_description.rerun_url, token) + def main(event): token = get_token_from_aws() event_data = json.loads(event['body']) workflow_description = get_workflow_description_from_event(event_data) print("Got workflow description", workflow_description) + if workflow_description.action == 'completed' and workflow_description.conclusion == 'failure': + print("Workflow", workflow_description.url, "completed and failed, let's check for rerun") + + if workflow_description.workflow_id not in NEED_RERUN_WORKFLOWS: + print("Workflow", workflow_description.workflow_id, "not in list of rerunable workflows") + return + + if check_need_to_rerun(workflow_description): + rerun_workflow(workflow_description, token) + return + if workflow_description.action != "requested": print("Exiting, event action is", workflow_description.action) return diff --git a/tests/ci/approve_lambda/requirements.txt b/tests/ci/workflow_approve_rerun_lambda/requirements.txt similarity index 100% rename from tests/ci/approve_lambda/requirements.txt rename to tests/ci/workflow_approve_rerun_lambda/requirements.txt diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 01d632a1f50..a75e0a2b4ea 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -17,6 +17,8 @@ import math import http.client import urllib.parse import json +# for crc32 +import zlib from argparse import ArgumentParser from typing import Tuple, Union, Optional, Dict, Set, List @@ -57,6 +59,13 @@ MAX_RETRIES = 3 TEST_FILE_EXTENSIONS = ['.sql', '.sql.j2', '.sh', '.py', '.expect'] + +def stringhash(s): + # default hash() function consistent + # only during process invocation https://stackoverflow.com/a/42089311 + return zlib.crc32(s.encode('utf-8')) + + class HTTPError(Exception): def __init__(self, message=None, code=None): self.message = message @@ -205,26 +214,31 @@ def get_stacktraces_from_gdb(server_pid): # collect server stacktraces from system.stack_trace table # it does not work in Sandbox -def get_stacktraces_from_clickhouse(client, replicated_database=False): +def get_stacktraces_from_clickhouse(args): + settings_str = ' '.join([ + get_additional_client_options(args), + '--allow_introspection_functions=1', + '--skip_unavailable_shards=1', + ]) replicated_msg = \ - "{} --allow_introspection_functions=1 --skip_unavailable_shards=1 --query \ + "{} {} --query \ \"SELECT materialize((hostName(), tcpPort())) as host, thread_id, \ arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), \ arrayMap(x -> addressToLine(x), trace), \ arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace \ FROM clusterAllReplicas('test_cluster_database_replicated', 'system.stack_trace') \ - ORDER BY host, thread_id FORMAT Vertical\"".format(client) + ORDER BY host, thread_id FORMAT Vertical\"".format(args.client, settings_str) msg = \ - "{} --allow_introspection_functions=1 --query \ + "{} {} --query \ \"SELECT arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), \ arrayMap(x -> addressToLine(x), trace), \ arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace \ - FROM system.stack_trace FORMAT Vertical\"".format(client) + FROM system.stack_trace FORMAT Vertical\"".format(args.client, settings_str) try: return subprocess.check_output( - replicated_msg if replicated_database else msg, + replicated_msg if args.replicated_database else msg, shell=True, stderr=subprocess.STDOUT).decode('utf-8') except Exception as e: print(f"Error occurred while receiving stack traces from client: {e}") @@ -250,8 +264,7 @@ def print_stacktraces() -> None: if bt is None: print("\nCollecting stacktraces from system.stacktraces table:") - bt = get_stacktraces_from_clickhouse( - args.client, args.replicated_database) + bt = get_stacktraces_from_clickhouse(args) if bt is not None: print(bt) @@ -560,7 +573,7 @@ class TestCase: database = args.testcase_database # This is for .sh tests - os.environ["CLICKHOUSE_LOG_COMMENT"] = self.case_file + os.environ["CLICKHOUSE_LOG_COMMENT"] = args.testcase_basename params = { 'client': client + ' --database=' + database, @@ -752,7 +765,15 @@ class TestSuite: self.suite_tmp_path: str = suite_tmp_path self.suite: str = suite - self.all_tests: List[str] = self.get_tests_list(self.tests_in_suite_key_func) + filter_func = lambda x: True + + if args.run_by_hash_num is not None and args.run_by_hash_total is not None: + if args.run_by_hash_num > args.run_by_hash_total: + raise Exception(f"Incorrect run by hash, value {args.run_by_hash_num} bigger than total {args.run_by_hash_total}") + + filter_func = lambda x: stringhash(x) % args.run_by_hash_total == args.run_by_hash_num + + self.all_tests: List[str] = self.get_tests_list(self.tests_in_suite_key_func, filter_func) self.all_tags: Dict[str, Set[str]] = self.read_test_tags(self.suite_path, self.all_tests) self.sequential_tests = [] @@ -773,17 +794,17 @@ class TestSuite: return ('no-parallel' in self.all_tags[test_name]) or ('sequential' in self.all_tags[test_name]) - def get_tests_list(self, sort_key): + def get_tests_list(self, sort_key, filter_func): """ Return list of tests file names to run """ - all_tests = list(self.get_selected_tests()) + all_tests = list(self.get_selected_tests(filter_func)) all_tests = all_tests * self.args.test_runs all_tests.sort(key=sort_key) return all_tests - def get_selected_tests(self): + def get_selected_tests(self, filter_func): """ Find all files with tests, filter, render templates """ @@ -800,11 +821,13 @@ class TestSuite: continue if USE_JINJA and test_name.endswith(".gen.sql"): continue + if not filter_func(test_name): + continue test_name = self.render_test_template(j2env, self.suite_path, test_name) yield test_name @staticmethod - def readTestSuite(args, suite_dir_name: str): + def read_test_suite(args, suite_dir_name: str): def is_data_present(): return int(clickhouse_execute(args, 'EXISTS TABLE test.hits')) @@ -1188,7 +1211,7 @@ def main(args): if server_died.is_set(): break - test_suite = TestSuite.readTestSuite(args, suite) + test_suite = TestSuite.read_test_suite(args, suite) if test_suite is None: continue @@ -1321,6 +1344,9 @@ if __name__ == '__main__': parser.add_argument('--print-time', action='store_true', dest='print_time', help='Print test time') parser.add_argument('--check-zookeeper-session', action='store_true', help='Check ZooKeeper session uptime to determine if failed test should be retried') + parser.add_argument('--run-by-hash-num', type=int, help='Run tests matching crc32(test_name) % run_by_hash_total == run_by_hash_num') + parser.add_argument('--run-by-hash-total', type=int, help='Total test groups for crc32(test_name) % run_by_hash_total == run_by_hash_num') + group = parser.add_mutually_exclusive_group(required=False) group.add_argument('--zookeeper', action='store_true', default=None, dest='zookeeper', help='Run zookeeper related tests') group.add_argument('--no-zookeeper', action='store_false', default=None, dest='zookeeper', help='Do not run zookeeper related tests') diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 25d09a8c4c5..c8745294c5b 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -10,6 +10,8 @@ from collections import defaultdict import random import json import csv +# for crc32 +import zlib MAX_RETRY = 3 @@ -26,6 +28,9 @@ MAX_TIME_SECONDS = 3600 MAX_TIME_IN_SANDBOX = 20 * 60 # 20 minutes TASK_TIMEOUT = 8 * 60 * 60 # 8 hours +def stringhash(s): + return zlib.crc32(s.encode('utf-8')) + def get_tests_to_run(pr_info): result = set([]) changed_files = pr_info['changed_files'] @@ -183,6 +188,13 @@ class ClickhouseIntegrationTestsRunner: self.start_time = time.time() self.soft_deadline_time = self.start_time + (TASK_TIMEOUT - MAX_TIME_IN_SANDBOX) + if 'run_by_hash_total' in self.params: + self.run_by_hash_total = self.params['run_by_hash_total'] + self.run_by_hash_num = self.params['run_by_hash_num'] + else: + self.run_by_hash_total = 0 + self.run_by_hash_num = 0 + def path(self): return self.result_path @@ -576,6 +588,15 @@ class ClickhouseIntegrationTestsRunner: self._install_clickhouse(build_path) logging.info("Dump iptables before run %s", subprocess.check_output("sudo iptables -L", shell=True)) all_tests = self._get_all_tests(repo_path) + + if self.run_by_hash_total != 0: + grouped_tests = self.group_test_by_file(all_tests) + all_filtered_by_hash_tests = [] + for group, tests_in_group in grouped_tests.items(): + if stringhash(group) % self.run_by_hash_total == self.run_by_hash_num: + all_filtered_by_hash_tests += tests_in_group + all_tests = all_filtered_by_hash_tests + parallel_skip_tests = self._get_parallel_tests_skip_list(repo_path) logging.info("Found %s tests first 3 %s", len(all_tests), ' '.join(all_tests[:3])) filtered_sequential_tests = list(filter(lambda test: test in all_tests, parallel_skip_tests)) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 0817cc882b4..66bc8a0ab09 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -725,6 +725,8 @@ class ClickHouseCluster: env_variables['MONGO_HOST'] = self.mongo_host env_variables['MONGO_EXTERNAL_PORT'] = str(self.mongo_port) env_variables['MONGO_INTERNAL_PORT'] = "27017" + env_variables['MONGO_EXTERNAL_PORT_2'] = "27018" + env_variables['MONGO_INTERNAL_PORT_2'] = "27017" self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')]) self.base_mongo_cmd = ['docker-compose', '--env-file', instance.env_file, '--project-name', self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')] @@ -2107,7 +2109,7 @@ class ClickHouseInstance: except Exception as e: logging.warning(f"Current start attempt failed. Will kill {pid} just in case.") self.exec_in_container(["bash", "-c", f"kill -9 {pid}"], user='root', nothrow=True) - time.sleep(time_to_sleep) + time.sleep(time_to_sleep) raise Exception("Cannot start ClickHouse, see additional info in logs") diff --git a/tests/integration/test_backward_compatibility/test.py b/tests/integration/test_backward_compatibility/test.py index 71aedb78e5b..a8f4968956c 100644 --- a/tests/integration/test_backward_compatibility/test.py +++ b/tests/integration/test_backward_compatibility/test.py @@ -11,13 +11,14 @@ node2 = cluster.add_instance('node2', main_configs=['configs/wide_parts_only.xml def start_cluster(): try: cluster.start() - for i, node in enumerate([node1, node2]): - node.query_with_retry( - '''CREATE TABLE t(date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/t', '{}') - PARTITION BY toYYYYMM(date) - ORDER BY id'''.format(i)) - + create_query = '''CREATE TABLE t(date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/t', '{}') + PARTITION BY toYYYYMM(date) + ORDER BY id''' + node1.query(create_query.format(1)) + node1.query("DETACH TABLE t") # stop being leader + node2.query(create_query.format(2)) + node1.query("ATTACH TABLE t") yield cluster finally: diff --git a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/node.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/node.xml new file mode 100644 index 00000000000..2cd957a3720 --- /dev/null +++ b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/node.xml @@ -0,0 +1,25 @@ + + + node + + + localhost + 9000 + default + + system + select dummy, toString(dummy) from system.one + + + 0 + + + key + + name + String + + + + + diff --git a/tests/integration/test_dictionaries_dependency_xml/test.py b/tests/integration/test_dictionaries_dependency_xml/test.py index 1b3ea32d09c..13635c7b969 100644 --- a/tests/integration/test_dictionaries_dependency_xml/test.py +++ b/tests/integration/test_dictionaries_dependency_xml/test.py @@ -3,7 +3,7 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry DICTIONARY_FILES = ['configs/dictionaries/dep_x.xml', 'configs/dictionaries/dep_y.xml', - 'configs/dictionaries/dep_z.xml'] + 'configs/dictionaries/dep_z.xml', 'configs/dictionaries/node.xml'] cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', dictionaries=DICTIONARY_FILES, stay_alive=True) @@ -117,3 +117,10 @@ def test_dependent_tables(started_cluster): query("drop table system.join") query("drop database a") query("drop database lazy") + + +def test_xml_dict_same_name(started_cluster): + instance.query("create table default.node ( key UInt64, name String ) Engine=Dictionary(node);") + instance.restart_clickhouse() + assert "node" in instance.query("show tables from default") + instance.query("drop table default.node") diff --git a/tests/integration/test_dictionaries_update_field/test.py b/tests/integration/test_dictionaries_update_field/test.py index 2e46403c63b..8fb0d67e8b8 100644 --- a/tests/integration/test_dictionaries_update_field/test.py +++ b/tests/integration/test_dictionaries_update_field/test.py @@ -34,7 +34,7 @@ def started_cluster(): @pytest.mark.parametrize("dictionary_name,dictionary_type", [ ("flat_update_field_dictionary", "FLAT"), ("simple_key_hashed_update_field_dictionary", "HASHED"), - ("complex_key_hashed_update_field_dictionary", "HASHED") + ("complex_key_hashed_update_field_dictionary", "COMPLEX_KEY_HASHED") ]) def test_update_field(started_cluster, dictionary_name, dictionary_type): create_dictionary_query = """ diff --git a/tests/integration/test_executable_table_function/test.py b/tests/integration/test_executable_table_function/test.py index 1473212552a..f5537e26b94 100644 --- a/tests/integration/test_executable_table_function/test.py +++ b/tests/integration/test_executable_table_function/test.py @@ -63,8 +63,8 @@ def test_executable_storage_no_input(started_cluster): def test_executable_storage_input(started_cluster): skip_test_msan(node) node.query("DROP TABLE IF EXISTS test_table") - node.query("CREATE TABLE test_table (value String) ENGINE=Executable('test_no_input.sh', 'TabSeparated', (SELECT 1))") - assert node.query("SELECT * FROM test_table") == '1\n' + node.query("CREATE TABLE test_table (value String) ENGINE=Executable('test_input.sh', 'TabSeparated', (SELECT 1))") + assert node.query("SELECT * FROM test_table") == 'Key 1\n' node.query("DROP TABLE test_table") def test_executable_storage_input_multiple_pipes(started_cluster): diff --git a/tests/integration/test_postgresql_replica_database_engine_1/test.py b/tests/integration/test_postgresql_replica_database_engine_1/test.py index 0dd36d64516..cba9e93c056 100644 --- a/tests/integration/test_postgresql_replica_database_engine_1/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_1/test.py @@ -985,18 +985,29 @@ def test_abrupt_server_restart_while_heavy_replication(started_cluster): cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) -def test_quoting(started_cluster): - table_name = 'user' - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) +def test_quoting_1(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) cursor = conn.cursor() + table_name = 'user' create_postgres_table(cursor, table_name); - instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(50)".format(table_name)) + instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)") create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) check_tables_are_synchronized(table_name); - drop_postgres_table(cursor, table_name) drop_materialized_db() + drop_postgres_table(cursor, table_name) + + +def test_quoting_2(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) + cursor = conn.cursor() + table_name = 'user' + create_postgres_table(cursor, table_name); + instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)") + create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + settings=[f"materialized_postgresql_tables_list = '{table_name}'"]) + check_tables_are_synchronized(table_name); + drop_materialized_db() + drop_postgres_table(cursor, table_name) def test_user_managed_slots(started_cluster): diff --git a/tests/integration/test_prometheus_endpoint/test.py b/tests/integration/test_prometheus_endpoint/test.py index 06276803c3d..60d9164acd2 100644 --- a/tests/integration/test_prometheus_endpoint/test.py +++ b/tests/integration/test_prometheus_endpoint/test.py @@ -30,7 +30,7 @@ def parse_response_line(line): if line.startswith("#"): return {} - match = re.match('^([a-zA-Z_:][a-zA-Z0-9_:]+)(\{.*\})? (\d)', line) + match = re.match('^([a-zA-Z_:][a-zA-Z0-9_:]+)(\{.*\})? -?(\d)', line) assert match, line name, _, val = match.groups() return {name: int(val)} diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index fda4d89805a..ede1dafefb1 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -310,6 +310,18 @@ def test_seekable_formats(started_cluster): result = node1.query(f"SELECT count() FROM {table_function}") assert(int(result) == 5000000) +def test_read_table_with_default(started_cluster): + hdfs_api = started_cluster.hdfs_api + + data = "n\n100\n" + hdfs_api.write_data("/simple_table_function", data) + assert hdfs_api.read_data("/simple_table_function") == data + + output = "n\tm\n100\t200\n" + assert node1.query( + "select * from hdfs('hdfs://hdfs1:9000/simple_table_function', 'TSVWithNames', 'n UInt32, m UInt32 DEFAULT n * 2') FORMAT TSVWithNames") == output + + if __name__ == '__main__': cluster.start() diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/message_with_repeated.proto b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/message_with_repeated.proto new file mode 100644 index 00000000000..791a5086866 --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/message_with_repeated.proto @@ -0,0 +1,19 @@ +syntax = "proto3"; +option optimize_for = SPEED; +message Message { + uint32 tnow = 1; + string server = 2; + string clien = 3; + uint32 sPort = 4; + uint32 cPort = 5; + repeated dd r = 6; + string method = 7; +} + +message dd { + string name = 1; + uint32 class = 2; + uint32 type = 3; + uint64 ttl = 4; + bytes data = 5; +} \ No newline at end of file diff --git a/tests/integration/test_storage_kafka/message_with_repeated_pb2.py b/tests/integration/test_storage_kafka/message_with_repeated_pb2.py new file mode 100644 index 00000000000..69702307e7f --- /dev/null +++ b/tests/integration/test_storage_kafka/message_with_repeated_pb2.py @@ -0,0 +1,180 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: clickhouse_path/format_schemas/message_with_repeated.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='clickhouse_path/format_schemas/message_with_repeated.proto', + package='', + syntax='proto3', + serialized_options=_b('H\001'), + serialized_pb=_b('\n:clickhouse_path/format_schemas/message_with_repeated.proto\"t\n\x07Message\x12\x0c\n\x04tnow\x18\x01 \x01(\r\x12\x0e\n\x06server\x18\x02 \x01(\t\x12\r\n\x05\x63lien\x18\x03 \x01(\t\x12\r\n\x05sPort\x18\x04 \x01(\r\x12\r\n\x05\x63Port\x18\x05 \x01(\r\x12\x0e\n\x01r\x18\x06 \x03(\x0b\x32\x03.dd\x12\x0e\n\x06method\x18\x07 \x01(\t\"J\n\x02\x64\x64\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05\x63lass\x18\x02 \x01(\r\x12\x0c\n\x04type\x18\x03 \x01(\r\x12\x0b\n\x03ttl\x18\x04 \x01(\x04\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\x42\x02H\x01\x62\x06proto3') +) + + + + +_MESSAGE = _descriptor.Descriptor( + name='Message', + full_name='Message', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='tnow', full_name='Message.tnow', index=0, + number=1, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='server', full_name='Message.server', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='clien', full_name='Message.clien', index=2, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='sPort', full_name='Message.sPort', index=3, + number=4, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='cPort', full_name='Message.cPort', index=4, + number=5, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='r', full_name='Message.r', index=5, + number=6, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='method', full_name='Message.method', index=6, + number=7, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=62, + serialized_end=178, +) + + +_DD = _descriptor.Descriptor( + name='dd', + full_name='dd', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='name', full_name='dd.name', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='class', full_name='dd.class', index=1, + number=2, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='type', full_name='dd.type', index=2, + number=3, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='ttl', full_name='dd.ttl', index=3, + number=4, type=4, cpp_type=4, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='data', full_name='dd.data', index=4, + number=5, type=12, cpp_type=9, label=1, + has_default_value=False, default_value=_b(""), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=180, + serialized_end=254, +) + +_MESSAGE.fields_by_name['r'].message_type = _DD +DESCRIPTOR.message_types_by_name['Message'] = _MESSAGE +DESCRIPTOR.message_types_by_name['dd'] = _DD +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +Message = _reflection.GeneratedProtocolMessageType('Message', (_message.Message,), dict( + DESCRIPTOR = _MESSAGE, + __module__ = 'clickhouse_path.format_schemas.message_with_repeated_pb2' + # @@protoc_insertion_point(class_scope:Message) + )) +_sym_db.RegisterMessage(Message) + +dd = _reflection.GeneratedProtocolMessageType('dd', (_message.Message,), dict( + DESCRIPTOR = _DD, + __module__ = 'clickhouse_path.format_schemas.message_with_repeated_pb2' + # @@protoc_insertion_point(class_scope:dd) + )) +_sym_db.RegisterMessage(dd) + + +DESCRIPTOR._options = None +# @@protoc_insertion_point(module_scope) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index c909926d8f0..1ee7f3cf125 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -36,6 +36,7 @@ from kafka.admin import NewTopic from . import kafka_pb2 from . import social_pb2 +from . import message_with_repeated_pb2 # TODO: add test for run-time offset update in CH, if we manually update it on Kafka side. @@ -3219,6 +3220,124 @@ def test_kafka_predefined_configuration(kafka_cluster): kafka_check_result(result, True) +# https://github.com/ClickHouse/ClickHouse/issues/26643 +def test_issue26643(kafka_cluster): + + # for backporting: + # admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092") + admin_client = KafkaAdminClient(bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port)) + producer = KafkaProducer(bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), value_serializer=producer_serializer) + + topic_list = [] + topic_list.append(NewTopic(name="test_issue26643", num_partitions=4, replication_factor=1)) + admin_client.create_topics(new_topics=topic_list, validate_only=False) + + msg = message_with_repeated_pb2.Message( + tnow=1629000000, + server='server1', + clien='host1', + sPort=443, + cPort=50000, + r=[ + message_with_repeated_pb2.dd(name='1', type=444, ttl=123123, data=b'adsfasd'), + message_with_repeated_pb2.dd(name='2') + ], + method='GET' + ) + + data = b'' + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + + msg = message_with_repeated_pb2.Message( + tnow=1629000002 + ) + + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + + producer.send(topic="test_issue26643", value=data) + + data = _VarintBytes(len(serialized_msg)) + serialized_msg + producer.send(topic="test_issue26643", value=data) + producer.flush() + + instance.query(''' + CREATE TABLE IF NOT EXISTS test.test_queue + ( + `tnow` UInt32, + `server` String, + `client` String, + `sPort` UInt16, + `cPort` UInt16, + `r.name` Array(String), + `r.class` Array(UInt16), + `r.type` Array(UInt16), + `r.ttl` Array(UInt32), + `r.data` Array(String), + `method` String + ) + ENGINE = Kafka + SETTINGS + kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'test_issue26643', + kafka_group_name = 'test_issue26643_group', + kafka_format = 'Protobuf', + kafka_schema = 'message_with_repeated.proto:Message', + kafka_num_consumers = 4, + kafka_skip_broken_messages = 10000; + + SET allow_suspicious_low_cardinality_types=1; + + CREATE TABLE test.log + ( + `tnow` DateTime CODEC(DoubleDelta, LZ4), + `server` LowCardinality(String), + `client` LowCardinality(String), + `sPort` LowCardinality(UInt16), + `cPort` UInt16 CODEC(T64, LZ4), + `r.name` Array(String), + `r.class` Array(LowCardinality(UInt16)), + `r.type` Array(LowCardinality(UInt16)), + `r.ttl` Array(LowCardinality(UInt32)), + `r.data` Array(String), + `method` LowCardinality(String) + ) + ENGINE = MergeTree + PARTITION BY toYYYYMMDD(tnow) + ORDER BY (tnow, server) + TTL toDate(tnow) + toIntervalMonth(1000) + SETTINGS index_granularity = 16384, merge_with_ttl_timeout = 7200; + + CREATE MATERIALIZED VIEW test.test_consumer TO test.log AS + SELECT + toDateTime(a.tnow) AS tnow, + a.server AS server, + a.client AS client, + a.sPort AS sPort, + a.cPort AS cPort, + a.`r.name` AS `r.name`, + a.`r.class` AS `r.class`, + a.`r.type` AS `r.type`, + a.`r.ttl` AS `r.ttl`, + a.`r.data` AS `r.data`, + a.method AS method + FROM test.test_queue AS a; + ''') + + instance.wait_for_log_line("Committed offset") + result = instance.query('SELECT * FROM test.log') + + expected = '''\ +2021-08-15 07:00:00 server1 443 50000 ['1','2'] [0,0] [444,0] [123123,0] ['adsfasd',''] GET +2021-08-15 07:00:02 0 0 [] [] [] [] [] +2021-08-15 07:00:02 0 0 [] [] [] [] [] +''' + assert TSV(result) == TSV(expected) + + # kafka_cluster.open_bash_shell('instance') + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_mongodb/configs/named_collections.xml b/tests/integration/test_storage_mongodb/configs/named_collections.xml index feb6b55af02..5f7db390982 100644 --- a/tests/integration/test_storage_mongodb/configs/named_collections.xml +++ b/tests/integration/test_storage_mongodb/configs/named_collections.xml @@ -6,7 +6,7 @@ mongo1 27017 test - simple_table
+ simple_table diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py index 1a5de353d7d..2d27ec18018 100644 --- a/tests/integration/test_storage_mongodb/test.py +++ b/tests/integration/test_storage_mongodb/test.py @@ -20,8 +20,12 @@ def started_cluster(request): cluster.shutdown() -def get_mongo_connection(started_cluster, secure=False): - connection_str = 'mongodb://root:clickhouse@localhost:{}'.format(started_cluster.mongo_port) +def get_mongo_connection(started_cluster, secure=False, with_credentials=True): + connection_str = '' + if with_credentials: + connection_str = 'mongodb://root:clickhouse@localhost:{}'.format(started_cluster.mongo_port) + else: + connection_str = 'mongodb://localhost:27018' if secure: connection_str += '/?tls=true&tlsAllowInvalidCertificates=true' return pymongo.MongoClient(connection_str) @@ -138,4 +142,20 @@ def test_predefined_connection_configuration(started_cluster): node = started_cluster.instances['node'] node.query("create table simple_mongo_table(key UInt64, data String) engine = MongoDB(mongo1)") + assert node.query("SELECT count() FROM simple_mongo_table") == '100\n' + simple_mongo_table.drop() + +@pytest.mark.parametrize('started_cluster', [False], indirect=['started_cluster']) +def test_no_credentials(started_cluster): + mongo_connection = get_mongo_connection(started_cluster, with_credentials=False) + db = mongo_connection['test'] + simple_mongo_table = db['simple_table'] + data = [] + for i in range(0, 100): + data.append({'key': i, 'data': hex(i * i)}) + simple_mongo_table.insert_many(data) + + node = started_cluster.instances['node'] + node.query("create table simple_mongo_table_2(key UInt64, data String) engine = MongoDB('mongo2:27017', 'test', 'simple_table', '', '')") + assert node.query("SELECT count() FROM simple_mongo_table_2") == '100\n' simple_mongo_table.drop() diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index 8d38234ccdd..3c22f2ed380 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -36,6 +36,8 @@ def test_mutate_and_upgrade(start_cluster): node1.query("ALTER TABLE mt DELETE WHERE id = 2", settings={"mutations_sync": "2"}) node2.query("SYSTEM SYNC REPLICA mt", timeout=15) + node2.query("DETACH TABLE mt") # stop being leader + node1.query("DETACH TABLE mt") # stop being leader node1.restart_with_latest_version(signal=9) node2.restart_with_latest_version(signal=9) @@ -83,6 +85,7 @@ def test_upgrade_while_mutation(start_cluster): node3.query("SYSTEM STOP MERGES mt1") node3.query("ALTER TABLE mt1 DELETE WHERE id % 2 == 0") + node3.query("DETACH TABLE mt1") # stop being leader node3.restart_with_latest_version(signal=9) # checks for readonly diff --git a/tests/performance/explain_ast.xml b/tests/performance/explain_ast.xml new file mode 100644 index 00000000000..0daa748de83 --- /dev/null +++ b/tests/performance/explain_ast.xml @@ -0,0 +1,5911 @@ + + + + 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +UNION ALL +SELECT * +FROM +( +SELECT +c1, +c2, +c3_q[1] AS c3_q1, +c3_q[3] AS c3_q3, +c3_q[2] AS c3_median, +least(c3_max, c3_q3 + (1.5 * (c3_q3 - c3_q1))) AS c3_max, +greatest(c3_min, c3_q1 - (1.5 * (c3_q3 - c3_q1))) AS c3_min, +c3_avg, +c4_q[1] AS c4_q1, +c4_q[3] AS c4_q3, +c4_q[2] AS c4_median, +least(c4_max, c4_q3 + (1.5 * (c4_q3 - c4_q1))) AS c4_max, +greatest(c4_min, c4_q1 - (1.5 * (c4_q3 - c4_q1))) AS c4_min, +c4_avg, +c5_q[1] AS c5_q1, +c5_q[3] AS c5_q3, +c5_q[2] AS c5_median, +least(c5_max, c5_q3 + (1.5 * (c5_q3 - c5_q1))) AS c5_max, +greatest(c5_min, c5_q1 - (1.5 * (c5_q3 - c5_q1))) AS c5_min, +c5_avg, +c6_q[1] AS c6_q1, +c6_q[3] AS c6_q3, +c6_q[2] AS c6_median, +least(c6_max, c6_q3 + (1.5 * (c6_q3 - c6_q1))) AS c6_max, +greatest(c6_min, c6_q1 - (1.5 * (c6_q3 - c6_q1))) AS c6_min, +c6_avg, +c7_q[1] AS c7_q1, +c7_q[3] AS c7_q3, +c7_q[2] AS c7_median, +least(c7_max, c7_q3 + (1.5 * (c7_q3 - c7_q1))) AS c7_max, +greatest(c7_min, c7_q1 - (1.5 * (c7_q3 - c7_q1))) AS c7_min, +c7_avg, +c8_q[1] AS c8_q1, +c8_q[3] AS c8_q3, +c8_q[2] AS c8_median, +least(c8_max, c8_q3 + (1.5 * (c8_q3 - c8_q1))) AS c8_max, +greatest(c8_min, c8_q1 - (1.5 * (c8_q3 - c8_q1))) AS c8_min, +c8_avg, +c9_q[1] AS c9_q1, +c9_q[3] AS c9_q3, +c9_q[2] AS c9_median, +least(c9_max, c9_q3 + (1.5 * (c9_q3 - c9_q1))) AS c9_max, +greatest(c9_min, c9_q1 - (1.5 * (c9_q3 - c9_q1))) AS c9_min, +c9_avg, +c10_q[1] AS c10_q1, +c10_q[3] AS c10_q3, +c10_q[2] AS c10_median, +least(c10_max, c10_q3 + (1.5 * (c10_q3 - c10_q1))) AS c10_max, +greatest(c10_min, c10_q1 - (1.5 * (c10_q3 - c10_q1))) AS c10_min, +c10_avg, +c10_avg, +c11_q[1] AS c11_q1, +c11_q[3] AS c11_q3, +c11_q[2] AS c11_median, +least(c11_max, c11_q3 + (1.5 * (c11_q3 - c11_q1))) AS c11_max, +greatest(c11_min, c11_q1 - (1.5 * (c11_q3 - c11_q1))) AS c11_min, +c11_avg, +c12_q[1] AS c12_q1, +c12_q[3] AS c12_q3, +c12_q[2] AS c12_median, +least(c12_max, c12_q3 + (1.5 * (c12_q3 - c12_q1))) AS c12_max, +greatest(c12_min, c12_q1 - (1.5 * (c12_q3 - c12_q1))) AS c12_min, +c12_avg, +c13_q[1] AS c13_q1, +c13_q[3] AS c13_q3, +c13_q[2] AS c13_median, +least(c13_max, c13_q3 + (1.5 * (c13_q3 - c13_q1))) AS c13_max, +greatest(c13_min, c13_q1 - (1.5 * (c13_q3 - c13_q1))) AS c13_min, +c13_avg, +c14_q[1] AS c14_q1, +c14_q[3] AS c14_q3, +c14_q[2] AS c14_median, +least(c14_max, c14_q3 + (1.5 * (c14_q3 - c14_q1))) AS c14_max, +greatest(c14_min, c14_q1 - (1.5 * (c14_q3 - c14_q1))) AS c14_min, +c14_avg, +c15_q[1] AS c15_q1, +c15_q[3] AS c15_q3, +c15_q[2] AS c15_median, +least(c15_max, c15_q3 + (1.5 * (c15_q3 - c15_q1))) AS c15_max, +greatest(c15_min, c15_q1 - (1.5 * (c15_q3 - c15_q1))) AS c15_min, +c15_avg, +c16_q[1] AS c16_q1, +c16_q[3] AS c16_q3, +c16_q[2] AS c16_median, +least(toFloat64(c16_max), c16_q3 + (1.5 * (c16_q3 - c16_q1))) AS c16_max, +greatest(toFloat64(c16_min), c16_q1 - (1.5 * (c16_q3 - c16_q1))) AS c16_min, +c16_avg, +c17_q[1] AS c17_q1, +c17_q[3] AS c17_q3, +c17_q[2] AS c17_median, +least(toFloat64(c17_max), c17_q3 + (1.5 * (c17_q3 - c17_q1))) AS c17_max, +greatest(toFloat64(c17_min), c17_q1 - (1.5 * (c17_q3 - c17_q1))) AS c17_min, +c17_avg, +c18_q[1] AS c18_q1, +c18_q[3] AS c18_q3, +c18_q[2] AS c18_median, +least(toFloat64(c18_max), c18_q3 + (1.5 * (c18_q3 - c18_q1))) AS c18_max, +greatest(toFloat64(c18_min), c18_q1 - (1.5 * (c18_q3 - c18_q1))) AS c18_min, +c18_avg, +round(if(c19 != 0, c24 / c19, 0), 2) AS c20, +c21, +c22, +c23 AS c23, +c19 AS c19, +c16 AS c16, +c17 AS c17, +c18 AS c18, +round(c24, 2) AS c24, +round(if(c17 != 0, c24 / c17, 0), 2) AS c25, +'CH' AS c26 +FROM +( +SELECT +c1, +c2, +groupUniqArray(c27) AS c28, +groupUniqArrayIf(c27, isNotNull(c29)) AS c28_with_c29, +quantiles(0.25, 0.5, 0.75)(if(c3 > 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +UNION ALL +SELECT * +FROM +( +SELECT +c1, +c2, +c3_q[1] AS c3_q1, +c3_q[3] AS c3_q3, +c3_q[2] AS c3_median, +least(c3_max, c3_q3 + (1.5 * (c3_q3 - c3_q1))) AS c3_max, +greatest(c3_min, c3_q1 - (1.5 * (c3_q3 - c3_q1))) AS c3_min, +c3_avg, +c4_q[1] AS c4_q1, +c4_q[3] AS c4_q3, +c4_q[2] AS c4_median, +least(c4_max, c4_q3 + (1.5 * (c4_q3 - c4_q1))) AS c4_max, +greatest(c4_min, c4_q1 - (1.5 * (c4_q3 - c4_q1))) AS c4_min, +c4_avg, +c5_q[1] AS c5_q1, +c5_q[3] AS c5_q3, +c5_q[2] AS c5_median, +least(c5_max, c5_q3 + (1.5 * (c5_q3 - c5_q1))) AS c5_max, +greatest(c5_min, c5_q1 - (1.5 * (c5_q3 - c5_q1))) AS c5_min, +c5_avg, +c6_q[1] AS c6_q1, +c6_q[3] AS c6_q3, +c6_q[2] AS c6_median, +least(c6_max, c6_q3 + (1.5 * (c6_q3 - c6_q1))) AS c6_max, +greatest(c6_min, c6_q1 - (1.5 * (c6_q3 - c6_q1))) AS c6_min, +c6_avg, +c7_q[1] AS c7_q1, +c7_q[3] AS c7_q3, +c7_q[2] AS c7_median, +least(c7_max, c7_q3 + (1.5 * (c7_q3 - c7_q1))) AS c7_max, +greatest(c7_min, c7_q1 - (1.5 * (c7_q3 - c7_q1))) AS c7_min, +c7_avg, +c8_q[1] AS c8_q1, +c8_q[3] AS c8_q3, +c8_q[2] AS c8_median, +least(c8_max, c8_q3 + (1.5 * (c8_q3 - c8_q1))) AS c8_max, +greatest(c8_min, c8_q1 - (1.5 * (c8_q3 - c8_q1))) AS c8_min, +c8_avg, +c9_q[1] AS c9_q1, +c9_q[3] AS c9_q3, +c9_q[2] AS c9_median, +least(c9_max, c9_q3 + (1.5 * (c9_q3 - c9_q1))) AS c9_max, +greatest(c9_min, c9_q1 - (1.5 * (c9_q3 - c9_q1))) AS c9_min, +c9_avg, +c10_q[1] AS c10_q1, +c10_q[3] AS c10_q3, +c10_q[2] AS c10_median, +least(c10_max, c10_q3 + (1.5 * (c10_q3 - c10_q1))) AS c10_max, +greatest(c10_min, c10_q1 - (1.5 * (c10_q3 - c10_q1))) AS c10_min, +c10_avg, +c10_avg, +c11_q[1] AS c11_q1, +c11_q[3] AS c11_q3, +c11_q[2] AS c11_median, +least(c11_max, c11_q3 + (1.5 * (c11_q3 - c11_q1))) AS c11_max, +greatest(c11_min, c11_q1 - (1.5 * (c11_q3 - c11_q1))) AS c11_min, +c11_avg, +c12_q[1] AS c12_q1, +c12_q[3] AS c12_q3, +c12_q[2] AS c12_median, +least(c12_max, c12_q3 + (1.5 * (c12_q3 - c12_q1))) AS c12_max, +greatest(c12_min, c12_q1 - (1.5 * (c12_q3 - c12_q1))) AS c12_min, +c12_avg, +c13_q[1] AS c13_q1, +c13_q[3] AS c13_q3, +c13_q[2] AS c13_median, +least(c13_max, c13_q3 + (1.5 * (c13_q3 - c13_q1))) AS c13_max, +greatest(c13_min, c13_q1 - (1.5 * (c13_q3 - c13_q1))) AS c13_min, +c13_avg, +c14_q[1] AS c14_q1, +c14_q[3] AS c14_q3, +c14_q[2] AS c14_median, +least(c14_max, c14_q3 + (1.5 * (c14_q3 - c14_q1))) AS c14_max, +greatest(c14_min, c14_q1 - (1.5 * (c14_q3 - c14_q1))) AS c14_min, +c14_avg, +c15_q[1] AS c15_q1, +c15_q[3] AS c15_q3, +c15_q[2] AS c15_median, +least(c15_max, c15_q3 + (1.5 * (c15_q3 - c15_q1))) AS c15_max, +greatest(c15_min, c15_q1 - (1.5 * (c15_q3 - c15_q1))) AS c15_min, +c15_avg, +c16_q[1] AS c16_q1, +c16_q[3] AS c16_q3, +c16_q[2] AS c16_median, +least(toFloat64(c16_max), c16_q3 + (1.5 * (c16_q3 - c16_q1))) AS c16_max, +greatest(toFloat64(c16_min), c16_q1 - (1.5 * (c16_q3 - c16_q1))) AS c16_min, +c16_avg, +c17_q[1] AS c17_q1, +c17_q[3] AS c17_q3, +c17_q[2] AS c17_median, +least(toFloat64(c17_max), c17_q3 + (1.5 * (c17_q3 - c17_q1))) AS c17_max, +greatest(toFloat64(c17_min), c17_q1 - (1.5 * (c17_q3 - c17_q1))) AS c17_min, +c17_avg, +c18_q[1] AS c18_q1, +c18_q[3] AS c18_q3, +c18_q[2] AS c18_median, +least(toFloat64(c18_max), c18_q3 + (1.5 * (c18_q3 - c18_q1))) AS c18_max, +greatest(toFloat64(c18_min), c18_q1 - (1.5 * (c18_q3 - c18_q1))) AS c18_min, +c18_avg, +round(if(c19 != 0, c24 / c19, 0), 2) AS c20, +c21, +c22, +c23 AS c23, +c19 AS c19, +c16 AS c16, +c17 AS c17, +c18 AS c18, +round(c24, 2) AS c24, +round(if(c17 != 0, c24 / c17, 0), 2) AS c25, +'CH' AS c26 +FROM +( +SELECT +c1, +c2, +groupUniqArray(c27) AS c28, +groupUniqArrayIf(c27, isNotNull(c29)) AS c28_with_c29, +quantiles(0.25, 0.5, 0.75)(if(c3 > 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +UNION ALL +SELECT * +FROM +( +SELECT +c1, +c2, +c3_q[1] AS c3_q1, +c3_q[3] AS c3_q3, +c3_q[2] AS c3_median, +least(c3_max, c3_q3 + (1.5 * (c3_q3 - c3_q1))) AS c3_max, +greatest(c3_min, c3_q1 - (1.5 * (c3_q3 - c3_q1))) AS c3_min, +c3_avg, +c4_q[1] AS c4_q1, +c4_q[3] AS c4_q3, +c4_q[2] AS c4_median, +least(c4_max, c4_q3 + (1.5 * (c4_q3 - c4_q1))) AS c4_max, +greatest(c4_min, c4_q1 - (1.5 * (c4_q3 - c4_q1))) AS c4_min, +c4_avg, +c5_q[1] AS c5_q1, +c5_q[3] AS c5_q3, +c5_q[2] AS c5_median, +least(c5_max, c5_q3 + (1.5 * (c5_q3 - c5_q1))) AS c5_max, +greatest(c5_min, c5_q1 - (1.5 * (c5_q3 - c5_q1))) AS c5_min, +c5_avg, +c6_q[1] AS c6_q1, +c6_q[3] AS c6_q3, +c6_q[2] AS c6_median, +least(c6_max, c6_q3 + (1.5 * (c6_q3 - c6_q1))) AS c6_max, +greatest(c6_min, c6_q1 - (1.5 * (c6_q3 - c6_q1))) AS c6_min, +c6_avg, +c7_q[1] AS c7_q1, +c7_q[3] AS c7_q3, +c7_q[2] AS c7_median, +least(c7_max, c7_q3 + (1.5 * (c7_q3 - c7_q1))) AS c7_max, +greatest(c7_min, c7_q1 - (1.5 * (c7_q3 - c7_q1))) AS c7_min, +c7_avg, +c8_q[1] AS c8_q1, +c8_q[3] AS c8_q3, +c8_q[2] AS c8_median, +least(c8_max, c8_q3 + (1.5 * (c8_q3 - c8_q1))) AS c8_max, +greatest(c8_min, c8_q1 - (1.5 * (c8_q3 - c8_q1))) AS c8_min, +c8_avg, +c9_q[1] AS c9_q1, +c9_q[3] AS c9_q3, +c9_q[2] AS c9_median, +least(c9_max, c9_q3 + (1.5 * (c9_q3 - c9_q1))) AS c9_max, +greatest(c9_min, c9_q1 - (1.5 * (c9_q3 - c9_q1))) AS c9_min, +c9_avg, +c10_q[1] AS c10_q1, +c10_q[3] AS c10_q3, +c10_q[2] AS c10_median, +least(c10_max, c10_q3 + (1.5 * (c10_q3 - c10_q1))) AS c10_max, +greatest(c10_min, c10_q1 - (1.5 * (c10_q3 - c10_q1))) AS c10_min, +c10_avg, +c10_avg, +c11_q[1] AS c11_q1, +c11_q[3] AS c11_q3, +c11_q[2] AS c11_median, +least(c11_max, c11_q3 + (1.5 * (c11_q3 - c11_q1))) AS c11_max, +greatest(c11_min, c11_q1 - (1.5 * (c11_q3 - c11_q1))) AS c11_min, +c11_avg, +c12_q[1] AS c12_q1, +c12_q[3] AS c12_q3, +c12_q[2] AS c12_median, +least(c12_max, c12_q3 + (1.5 * (c12_q3 - c12_q1))) AS c12_max, +greatest(c12_min, c12_q1 - (1.5 * (c12_q3 - c12_q1))) AS c12_min, +c12_avg, +c13_q[1] AS c13_q1, +c13_q[3] AS c13_q3, +c13_q[2] AS c13_median, +least(c13_max, c13_q3 + (1.5 * (c13_q3 - c13_q1))) AS c13_max, +greatest(c13_min, c13_q1 - (1.5 * (c13_q3 - c13_q1))) AS c13_min, +c13_avg, +c14_q[1] AS c14_q1, +c14_q[3] AS c14_q3, +c14_q[2] AS c14_median, +least(c14_max, c14_q3 + (1.5 * (c14_q3 - c14_q1))) AS c14_max, +greatest(c14_min, c14_q1 - (1.5 * (c14_q3 - c14_q1))) AS c14_min, +c14_avg, +c15_q[1] AS c15_q1, +c15_q[3] AS c15_q3, +c15_q[2] AS c15_median, +least(c15_max, c15_q3 + (1.5 * (c15_q3 - c15_q1))) AS c15_max, +greatest(c15_min, c15_q1 - (1.5 * (c15_q3 - c15_q1))) AS c15_min, +c15_avg, +c16_q[1] AS c16_q1, +c16_q[3] AS c16_q3, +c16_q[2] AS c16_median, +least(toFloat64(c16_max), c16_q3 + (1.5 * (c16_q3 - c16_q1))) AS c16_max, +greatest(toFloat64(c16_min), c16_q1 - (1.5 * (c16_q3 - c16_q1))) AS c16_min, +c16_avg, +c17_q[1] AS c17_q1, +c17_q[3] AS c17_q3, +c17_q[2] AS c17_median, +least(toFloat64(c17_max), c17_q3 + (1.5 * (c17_q3 - c17_q1))) AS c17_max, +greatest(toFloat64(c17_min), c17_q1 - (1.5 * (c17_q3 - c17_q1))) AS c17_min, +c17_avg, +c18_q[1] AS c18_q1, +c18_q[3] AS c18_q3, +c18_q[2] AS c18_median, +least(toFloat64(c18_max), c18_q3 + (1.5 * (c18_q3 - c18_q1))) AS c18_max, +greatest(toFloat64(c18_min), c18_q1 - (1.5 * (c18_q3 - c18_q1))) AS c18_min, +c18_avg, +round(if(c19 != 0, c24 / c19, 0), 2) AS c20, +c21, +c22, +c23 AS c23, +c19 AS c19, +c16 AS c16, +c17 AS c17, +c18 AS c18, +round(c24, 2) AS c24, +round(if(c17 != 0, c24 / c17, 0), 2) AS c25, +'CH' AS c26 +FROM +( +SELECT +c1, +c2, +groupUniqArray(c27) AS c28, +groupUniqArrayIf(c27, isNotNull(c29)) AS c28_with_c29, +quantiles(0.25, 0.5, 0.75)(if(c3 > 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +FORMAT Null + ]]> + + diff --git a/tests/performance/reinterpret_as.xml b/tests/performance/reinterpret_as.xml index 79ce167a363..dbf6df160ed 100644 --- a/tests/performance/reinterpret_as.xml +++ b/tests/performance/reinterpret_as.xml @@ -191,7 +191,7 @@ toInt256(number) as d, toString(number) as f, toFixedString(f, 20) as g - FROM numbers_mt(200000000) + FROM numbers_mt(100000000) SETTINGS max_threads = 8 FORMAT Null
diff --git a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql index 6bc5fe268d6..f9cbf92db41 100644 --- a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql +++ b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql @@ -1,7 +1,6 @@ -- Tags: replica, distributed SET max_parallel_replicas = 2; - DROP TABLE IF EXISTS report; CREATE TABLE report(id UInt32, event_date Date, priority UInt32, description String) ENGINE = MergeTree(event_date, intHash32(id), (id, event_date, intHash32(id)), 8192); diff --git a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference index 1cc42544311..f757a86aeee 100644 --- a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference +++ b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference @@ -35,6 +35,8 @@ slice [2,NULL,4,5] ['b','c','d'] ['b',NULL,'d'] +[] 1 +[] 1 push back \N [1,1] diff --git a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.sql b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.sql index 8f2f0811193..c87d52d2478 100644 --- a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.sql +++ b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.sql @@ -36,6 +36,7 @@ select arraySlice([1, 2, 3, 4, 5, 6], 10, 1); select arraySlice([1, 2, Null, 4, 5, 6], 2, 4); select arraySlice(['a', 'b', 'c', 'd', 'e'], 2, 3); select arraySlice([Null, 'b', Null, 'd', 'e'], 2, 3); +select arraySlice([], materialize(NULL), NULL), 1 from numbers(2); select 'push back'; select arrayPushBack(Null, 1); diff --git a/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.reference b/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.reference index 8b1acc12b63..e69de29bb2d 100644 --- a/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.reference +++ b/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.reference @@ -1,10 +0,0 @@ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 diff --git a/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.sql b/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.sql index de3000533a1..1f4cb2a36b2 100644 --- a/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.sql +++ b/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.sql @@ -1,4 +1,4 @@ -- Tags: distributed SET max_execution_time = 1, timeout_overflow_mode = 'break'; -SELECT DISTINCT * FROM remote('127.0.0.{2,3}', system.numbers) WHERE number < 10; +SELECT * FROM remote('127.0.0.{2,3}', system.numbers) WHERE number < 10 FORMAT Null; diff --git a/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference new file mode 100644 index 00000000000..12550ffbf28 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference @@ -0,0 +1,30 @@ +e4048ead-30a2-45e5-90be-2af1c7137523 dummy [1] [50639] [58114] [[5393]] [[1]] [[3411]] [[17811]] [[(10,20)]] + +Binary representation: +00000000 44 0a 24 65 34 30 34 38 65 61 64 2d 33 30 61 32 |D.$e4048ead-30a2| +00000010 2d 34 35 65 35 2d 39 30 62 65 2d 32 61 66 31 63 |-45e5-90be-2af1c| +00000020 37 31 33 37 35 32 33 62 1c 10 01 18 cf 8b 03 20 |7137523b....... | +00000030 82 c6 03 5a 10 28 01 30 91 2a 40 93 8b 01 52 05 |...Z.(.0.*@...R.| +00000040 4d 00 00 a0 41 |M...A| +00000045 + +MESSAGE #1 AT 0x00000001 +identifier: "e4048ead-30a2-45e5-90be-2af1c7137523" +modules { + module_id: 1 + supply: 50639 + temp: 58114 + nodes { + node_id: 1 + opening_time: 5393 + current: 17811 + coords { + y: 20 + } + } +} + +Binary representation is as expected + +e4048ead-30a2-45e5-90be-2af1c7137523 [1] [50639] [58114] [[5393]] [[1]] [[]] [[17811]] [[(0,20)]] +e4048ead-30a2-45e5-90be-2af1c7137523 dummy [1] [50639] [58114] [[5393]] [[1]] [[3411]] [[17811]] [[(10,20)]] diff --git a/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh new file mode 100755 index 00000000000..ed35df5e98b --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +# https://github.com/ClickHouse/ClickHouse/issues/31160 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +SCHEMADIR=$CURDIR/format_schemas +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -eo pipefail + +# Run the client. +$CLICKHOUSE_CLIENT --multiquery < "$BINARY_FILE_PATH" + +# Check the output in the protobuf format +echo +$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage" --input "$BINARY_FILE_PATH" + +# Check the input in the protobuf format (now the table contains the same data twice). +echo +$CLICKHOUSE_CLIENT --query "INSERT INTO table_skipped_column_in_nested_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "SELECT * FROM table_skipped_column_in_nested_00825 ORDER BY unused1" + +rm "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "DROP TABLE table_skipped_column_in_nested_00825" diff --git a/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference b/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference index 184a6c5ba51..77f48f2832c 100644 --- a/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference +++ b/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference @@ -1,22 +1,22 @@ ---TUMBLE--- ||---WINDOW COLUMN NAME--- -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE default.`.inner.wv`\n(\n `b` Int32,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE default.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---PARTITION--- -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(____timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `WINDOW_ID(____timestamp, toIntervalSecond(\'1\'))`\nORDER BY `WINDOW_ID(____timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(____timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ---HOP--- ||---WINDOW COLUMN NAME--- -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE default.`.inner.wv`\n(\n `b` Int32,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE default.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---PARTITION--- -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `WINDOW_ID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `WINDOW_ID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql index 6c607ba689e..777c5ae2a5a 100644 --- a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql +++ b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql @@ -1,71 +1,75 @@ +-- Tags: no-parallel + SET allow_experimental_window_view = 1; +DROP DATABASE IF EXISTS test_01047; +CREATE DATABASE test_01047 ENGINE=Ordinary; -DROP TABLE IF EXISTS mt; +DROP TABLE IF EXISTS test_01047.mt; -CREATE TABLE mt(a Int32, b Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); +CREATE TABLE test_01047.mt(a Int32, b Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---TUMBLE---'; SELECT '||---WINDOW COLUMN NAME---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv ENGINE AggregatingMergeTree ORDER BY TUMBLE(timestamp, INTERVAL '1' SECOND) AS SELECT count(a), TUMBLE_END(wid) AS count FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '1' SECOND) as wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01047.wv; +DROP TABLE IF EXISTS test_01047.`.inner.wv`; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY tumble(timestamp, INTERVAL '1' SECOND) AS SELECT count(a), tumbleEnd(wid) AS count FROM test_01047.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) as wid; +SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(a) AS count, TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid FROM mt GROUP BY wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01047.wv; +DROP TABLE IF EXISTS test_01047.`.inner.wv`; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; +SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---IDENTIFIER---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv ENGINE AggregatingMergeTree ORDER BY (TUMBLE(timestamp, INTERVAL '1' SECOND), b) PRIMARY KEY TUMBLE(timestamp, INTERVAL '1' SECOND) AS SELECT count(a) AS count FROM mt GROUP BY b, TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01047.wv; +DROP TABLE IF EXISTS test_01047.`.inner.wv`; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), b) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---FUNCTION---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv ENGINE AggregatingMergeTree ORDER BY (TUMBLE(timestamp, INTERVAL '1' SECOND), plus(a, b)) PRIMARY KEY TUMBLE(timestamp, INTERVAL '1' SECOND) AS SELECT count(a) AS count FROM mt GROUP BY plus(a, b) as _type, TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01047.wv; +DROP TABLE IF EXISTS test_01047.`.inner.wv`; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), plus(a, b)) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---PARTITION---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid AS SELECT count(a) AS count, TUMBLE(now(), INTERVAL '1' SECOND) AS wid FROM mt GROUP BY wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01047.wv; +DROP TABLE IF EXISTS test_01047.`.inner.wv`; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid AS SELECT count(a) AS count, tumble(now(), INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; +SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '---HOP---'; SELECT '||---WINDOW COLUMN NAME---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv ENGINE AggregatingMergeTree ORDER BY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count, HOP_END(wid) FROM mt GROUP BY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01047.wv; +DROP TABLE IF EXISTS test_01047.`.inner.wv`; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; +SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(a) AS count, HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM mt GROUP BY wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01047.wv; +DROP TABLE IF EXISTS test_01047.`.inner.wv`; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01047.mt GROUP BY wid; +SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---IDENTIFIER---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv ENGINE AggregatingMergeTree ORDER BY (HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), b) PRIMARY KEY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count FROM mt GROUP BY b, HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01047.wv; +DROP TABLE IF EXISTS test_01047.`.inner.wv`; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), b) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---FUNCTION---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv ENGINE AggregatingMergeTree ORDER BY (HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), plus(a, b)) PRIMARY KEY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count FROM mt GROUP BY plus(a, b) as _type, HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01047.wv; +DROP TABLE IF EXISTS test_01047.`.inner.wv`; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), plus(a, b)) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---PARTITION---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid AS SELECT count(a) AS count, HOP_END(wid) FROM mt GROUP BY HOP(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01047.wv; +DROP TABLE IF EXISTS test_01047.`.inner.wv`; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; +SHOW CREATE TABLE test_01047.`.inner.wv`; -DROP TABLE wv; -DROP TABLE mt; \ No newline at end of file +DROP TABLE test_01047.wv; +DROP TABLE test_01047.mt; diff --git a/tests/queries/0_stateless/01048_window_view_parser.reference b/tests/queries/0_stateless/01048_window_view_parser.reference index 84ede9be463..6625313f572 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.reference +++ b/tests/queries/0_stateless/01048_window_view_parser.reference @@ -1,26 +1,26 @@ ---TUMBLE--- ||---WINDOW COLUMN NAME--- -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(1))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(1))`\nORDER BY tuple(`WINDOW_ID(timestamp, toIntervalSecond(1))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(1))`\nORDER BY tuple(`windowID(timestamp, toIntervalSecond(1))`)\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY tuple(`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY tuple(`windowID(timestamp, toIntervalSecond(\'1\'))`)\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE default.`.inner.wv`\n(\n `b` Int32,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE default.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---TimeZone--- -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`\nORDER BY tuple(`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`\nORDER BY tuple(`windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`)\nSETTINGS index_granularity = 8192 ---HOP--- ||---WINDOW COLUMN NAME--- -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(1), toIntervalSecond(3))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`\nORDER BY tuple(`WINDOW_ID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`\nORDER BY tuple(`windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`)\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY tuple(`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY tuple(`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`)\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE default.`.inner.wv`\n(\n `b` Int32,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE default.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---TimeZone--- -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`\nORDER BY tuple(`WINDOW_ID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`)\nSETTINGS index_granularity = 8192 -CREATE TABLE default.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`\nORDER BY tuple(`windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01048_window_view_parser.sql b/tests/queries/0_stateless/01048_window_view_parser.sql index eebb36aefdf..3f57f6fbd91 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.sql +++ b/tests/queries/0_stateless/01048_window_view_parser.sql @@ -1,93 +1,82 @@ +-- Tags: no-parallel + SET allow_experimental_window_view = 1; +DROP DATABASE IF EXISTS test_01048; +CREATE DATABASE test_01048 ENGINE=Ordinary; -DROP TABLE IF EXISTS mt; -DROP TABLE IF EXISTS `.inner.wv`; +DROP TABLE IF EXISTS test_01048.mt; -CREATE TABLE mt(a Int32, b Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); +CREATE TABLE test_01048.mt(a Int32, b Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---TUMBLE---'; SELECT '||---WINDOW COLUMN NAME---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count, TUMBLE_END(wid) as wend FROM mt GROUP BY TUMBLE(timestamp, INTERVAL 1 SECOND) as wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, tumbleEnd(wid) as wend FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL 1 SECOND) as wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count, TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid FROM mt GROUP BY wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01048.mt GROUP BY wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---IDENTIFIER---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count FROM mt GROUP BY b, TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid, b; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) AS wid, b; +SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---FUNCTION---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count FROM mt GROUP BY plus(a, b) as _type, TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid, plus(a, b); -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) AS wid, plus(a, b); +SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---TimeZone---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count, TUMBLE(timestamp, INTERVAL '1' SECOND, 'Asia/Shanghai') AS wid FROM mt GROUP BY wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND, 'Asia/Shanghai') AS wid FROM test_01048.mt GROUP BY wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '---HOP---'; SELECT '||---WINDOW COLUMN NAME---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count, HOP_END(wid) as wend FROM mt GROUP BY HOP(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND) as wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, hopEnd(wid) as wend FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND) as wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count, HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM mt GROUP BY wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01048.mt GROUP BY wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---IDENTIFIER---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count FROM mt GROUP BY b, HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count FROM mt GROUP BY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, b; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, b; +SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---FUNCTION---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count FROM mt GROUP BY plus(a, b) as _type, HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---TimeZone---'; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count, HOP_END(wid) as wend FROM mt GROUP BY HOP(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'Asia/Shanghai') as wid; -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, hopEnd(wid) as wend FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'Asia/Shanghai') as wid; +SHOW CREATE TABLE test_01048.`.inner.wv`; -DROP TABLE IF EXISTS wv; -DROP TABLE IF EXISTS `.inner.wv`; -CREATE WINDOW VIEW wv AS SELECT count(a) AS count FROM mt GROUP BY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, plus(a, b); -SHOW CREATE TABLE `.inner.wv`; +DROP TABLE IF EXISTS test_01048.wv; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, plus(a, b); +SHOW CREATE TABLE test_01048.`.inner.wv`; -DROP TABLE wv; -DROP TABLE mt; \ No newline at end of file +DROP TABLE test_01048.wv; +DROP TABLE test_01048.mt; diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.reference b/tests/queries/0_stateless/01049_window_view_window_functions.reference index e8813db5a7d..2d49664b280 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.reference +++ b/tests/queries/0_stateless/01049_window_view_window_functions.reference @@ -1,69 +1,69 @@ -- { echo } -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, 'US/Samoa'); ('2020-01-09 12:00:01','2020-01-09 12:00:02') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, 'US/Samoa'); ('2020-01-09 12:00:00','2020-01-09 12:01:00') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' HOUR, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' HOUR, 'US/Samoa'); ('2020-01-09 12:00:00','2020-01-09 13:00:00') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); ('2020-01-09 00:00:00','2020-01-10 00:00:00') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, 'US/Samoa'); ('2020-01-06','2020-01-13') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' MONTH, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' MONTH, 'US/Samoa'); ('2020-01-01','2020-02-01') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' QUARTER, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' QUARTER, 'US/Samoa'); ('2020-01-01','2020-04-01') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' YEAR, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' YEAR, 'US/Samoa'); ('2020-01-01','2021-01-01') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); ('2020-01-09 00:00:00','2020-01-10 00:00:00') -SELECT TUMBLE_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT tumbleStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); 2020-01-09 00:00:00 -SELECT toDateTime(TUMBLE_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(tumbleStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-09 00:00:00 -SELECT toDateTime(TUMBLE_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(tumbleStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-09 00:00:00 -SELECT TUMBLE_START(TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); +SELECT tumbleStart(tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); 2020-01-09 00:00:00 -SELECT TUMBLE_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT tumbleEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); 2020-01-10 00:00:00 -SELECT toDateTime(TUMBLE_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(tumbleEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-10 00:00:00 -SELECT toDateTime(TUMBLE_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(tumbleEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-10 00:00:00 -SELECT TUMBLE_END(TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); +SELECT tumbleEnd(tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); 2020-01-10 00:00:00 -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'US/Samoa'); ('2020-01-09 11:59:59','2020-01-09 12:00:02') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, INTERVAL 3 MINUTE, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, INTERVAL 3 MINUTE, 'US/Samoa'); ('2020-01-09 11:58:00','2020-01-09 12:01:00') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 HOUR, INTERVAL 3 HOUR, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 HOUR, INTERVAL 3 HOUR, 'US/Samoa'); ('2020-01-09 10:00:00','2020-01-09 13:00:00') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 DAY, INTERVAL 3 DAY, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 DAY, INTERVAL 3 DAY, 'US/Samoa'); ('2020-01-07 00:00:00','2020-01-10 00:00:00') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, INTERVAL 3 WEEK, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, INTERVAL 3 WEEK, 'US/Samoa'); ('2019-12-23','2020-01-13') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MONTH, INTERVAL 3 MONTH, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MONTH, INTERVAL 3 MONTH, 'US/Samoa'); ('2019-11-01','2020-02-01') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 QUARTER, INTERVAL 3 QUARTER, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 QUARTER, INTERVAL 3 QUARTER, 'US/Samoa'); ('2019-07-01','2020-04-01') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 YEAR, INTERVAL 3 YEAR, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 YEAR, INTERVAL 3 YEAR, 'US/Samoa'); ('2018-01-01','2021-01-01') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); ('2020-01-07 00:00:00','2020-01-10 00:00:00') -SELECT HOP_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); +SELECT hopStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); 2020-01-07 00:00:00 -SELECT toDateTime(HOP_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(hopStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-07 00:00:00 -SELECT toDateTime(HOP_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(hopStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-07 00:00:00 -SELECT HOP_START(HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); +SELECT hopStart(hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); 2020-01-07 00:00:00 -SELECT HOP_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); +SELECT hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); 2020-01-10 00:00:00 -SELECT toDateTime(HOP_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-10 00:00:00 -SELECT toDateTime(HOP_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-10 00:00:00 -SELECT HOP_END(HOP(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); +SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); 2019-01-10 00:00:00 diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.sql b/tests/queries/0_stateless/01049_window_view_window_functions.sql index 4c98f9445e1..617019bd2c6 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.sql +++ b/tests/queries/0_stateless/01049_window_view_window_functions.sql @@ -1,38 +1,38 @@ -- { echo } -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' HOUR, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' MONTH, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' QUARTER, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' YEAR, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' HOUR, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' MONTH, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' QUARTER, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' YEAR, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); -SELECT TUMBLE_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); -SELECT toDateTime(TUMBLE_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT toDateTime(TUMBLE_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT TUMBLE_START(TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); -SELECT TUMBLE_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); -SELECT toDateTime(TUMBLE_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT toDateTime(TUMBLE_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT TUMBLE_END(TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT tumbleStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT toDateTime(tumbleStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(tumbleStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT tumbleStart(tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); +SELECT tumbleEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT toDateTime(tumbleEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(tumbleEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT tumbleEnd(tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, INTERVAL 3 MINUTE, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 HOUR, INTERVAL 3 HOUR, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 DAY, INTERVAL 3 DAY, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, INTERVAL 3 WEEK, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MONTH, INTERVAL 3 MONTH, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 QUARTER, INTERVAL 3 QUARTER, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 YEAR, INTERVAL 3 YEAR, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, INTERVAL 3 MINUTE, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 HOUR, INTERVAL 3 HOUR, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 DAY, INTERVAL 3 DAY, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, INTERVAL 3 WEEK, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MONTH, INTERVAL 3 MONTH, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 QUARTER, INTERVAL 3 QUARTER, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 YEAR, INTERVAL 3 YEAR, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); -SELECT HOP_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); -SELECT toDateTime(HOP_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT toDateTime(HOP_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT HOP_START(HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); -SELECT HOP_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); -SELECT toDateTime(HOP_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT toDateTime(HOP_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT HOP_END(HOP(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); +SELECT hopStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); +SELECT toDateTime(hopStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(hopStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT hopStart(hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); +SELECT hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); +SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); diff --git a/tests/queries/0_stateless/01050_window_view_parser_tumble.reference b/tests/queries/0_stateless/01050_window_view_parser_tumble.reference index 75cd8e28af5..6375c151906 100644 --- a/tests/queries/0_stateless/01050_window_view_parser_tumble.reference +++ b/tests/queries/0_stateless/01050_window_view_parser_tumble.reference @@ -4,3 +4,4 @@ ---WITH--- ---WHERE--- ---ORDER_BY--- +---With now--- diff --git a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql index 12f67a68237..54f9ed00cbe 100644 --- a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql +++ b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql @@ -6,24 +6,28 @@ CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---WATERMARK---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), TUMBLE_START(wid) AS w_start, TUMBLE_END(wid) AS w_end FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---With w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(wid) AS w_start, TUMBLE_END(wid) AS w_end FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(tumble(timestamp, INTERVAL '3' SECOND)) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WithOut w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(wid) AS w_start FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WITH---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), TUMBLE_START(wid) AS w_start, TUMBLE_END(wid) AS w_end, date_time FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end, date_time FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WHERE---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(wid) AS w_start FROM mt WHERE a != 1 GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---ORDER_BY---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(wid) AS w_start FROM mt WHERE a != 1 GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid ORDER BY w_start; +CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid ORDER BY w_start; + +SELECT '---With now---'; +DROP TABLE IF EXISTS wv NO DELAY; +CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(tumble(now(), INTERVAL '3' SECOND)) AS w_end FROM mt GROUP BY tumble(now(), INTERVAL '3' SECOND) AS wid; diff --git a/tests/queries/0_stateless/01051_window_view_parser_hop.reference b/tests/queries/0_stateless/01051_window_view_parser_hop.reference index 75cd8e28af5..6375c151906 100644 --- a/tests/queries/0_stateless/01051_window_view_parser_hop.reference +++ b/tests/queries/0_stateless/01051_window_view_parser_hop.reference @@ -4,3 +4,4 @@ ---WITH--- ---WHERE--- ---ORDER_BY--- +---With now--- diff --git a/tests/queries/0_stateless/01051_window_view_parser_hop.sql b/tests/queries/0_stateless/01051_window_view_parser_hop.sql index 3c1e3d16320..0f705d5c911 100644 --- a/tests/queries/0_stateless/01051_window_view_parser_hop.sql +++ b/tests/queries/0_stateless/01051_window_view_parser_hop.sql @@ -6,24 +6,28 @@ CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---WATERMARK---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), HOP_START(wid) AS w_start, HOP_END(wid) AS w_end FROM mt GROUP BY HOP(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---With w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), HOP_START(wid) AS w_start, HOP_END(wid) AS w_end FROM mt GROUP BY HOP(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WithOut w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), HOP_START(wid) AS w_start FROM mt GROUP BY HOP(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WITH---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), HOP_START(wid) AS w_start, HOP_END(wid) AS w_end, date_time FROM mt GROUP BY HOP(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end, date_time FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WHERE---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), HOP_START(wid) AS w_start FROM mt WHERE a != 1 GROUP BY HOP(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---ORDER_BY---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), HOP_START(wid) AS w_start FROM mt WHERE a != 1 GROUP BY HOP(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid ORDER BY w_start; +CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid ORDER BY w_start; + +SELECT '---With now---'; +DROP TABLE IF EXISTS wv NO DELAY; +CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND)) as w_end FROM mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.reference b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.reference index 0d66ea1aee9..d00491fd7e5 100644 --- a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.reference +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.reference @@ -1,2 +1 @@ -0 1 diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh new file mode 100755 index 00000000000..033568b6077 --- /dev/null +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery < -0., ['foo']))]) + ) AS all_metrics +SELECT + (finalizeAggregation(arrayReduce('sumMapMergeState', [all_metrics])) AS metrics_tuple).1 AS metric_names, + metrics_tuple.2 AS metric_values +FROM system.one; diff --git a/tests/queries/0_stateless/01950_kill_large_group_by_query.sh b/tests/queries/0_stateless/01950_kill_large_group_by_query.sh index 0b369c7257e..aba9d2d2467 100755 --- a/tests/queries/0_stateless/01950_kill_large_group_by_query.sh +++ b/tests/queries/0_stateless/01950_kill_large_group_by_query.sh @@ -12,9 +12,11 @@ function wait_for_query_to_start() } +MAX_TIMEOUT=30 + # TCP CLIENT -$CLICKHOUSE_CLIENT --max_execution_time 10 --query_id "test_01948_tcp_$CLICKHOUSE_DATABASE" -q \ +$CLICKHOUSE_CLIENT --max_execution_time $MAX_TIMEOUT --query_id "test_01948_tcp_$CLICKHOUSE_DATABASE" -q \ "SELECT * FROM ( SELECT a.name as n @@ -30,12 +32,12 @@ $CLICKHOUSE_CLIENT --max_execution_time 10 --query_id "test_01948_tcp_$CLICKHOUS LIMIT 20 FORMAT Null" > /dev/null 2>&1 & wait_for_query_to_start "test_01948_tcp_$CLICKHOUSE_DATABASE" -$CLICKHOUSE_CLIENT --max_execution_time 10 -q "KILL QUERY WHERE query_id = 'test_01948_tcp_$CLICKHOUSE_DATABASE' SYNC" +$CLICKHOUSE_CLIENT --max_execution_time $MAX_TIMEOUT -q "KILL QUERY WHERE query_id = 'test_01948_tcp_$CLICKHOUSE_DATABASE' SYNC" # HTTP CLIENT -${CLICKHOUSE_CURL_COMMAND} -q --max-time 10 -sS "$CLICKHOUSE_URL&query_id=test_01948_http_$CLICKHOUSE_DATABASE" -d \ +${CLICKHOUSE_CURL_COMMAND} -q --max-time $MAX_TIMEOUT -sS "$CLICKHOUSE_URL&query_id=test_01948_http_$CLICKHOUSE_DATABASE" -d \ "SELECT * FROM ( SELECT a.name as n @@ -51,4 +53,4 @@ ${CLICKHOUSE_CURL_COMMAND} -q --max-time 10 -sS "$CLICKHOUSE_URL&query_id=test_0 LIMIT 20 FORMAT Null" > /dev/null 2>&1 & wait_for_query_to_start "test_01948_http_$CLICKHOUSE_DATABASE" -$CLICKHOUSE_CURL --max-time 10 -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = 'test_01948_http_$CLICKHOUSE_DATABASE' SYNC" +$CLICKHOUSE_CURL --max-time $MAX_TIMEOUT -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = 'test_01948_http_$CLICKHOUSE_DATABASE' SYNC" diff --git a/tests/queries/0_stateless/02008_materialize_column.sql b/tests/queries/0_stateless/02008_materialize_column.sql index 4136a04568e..8a8eb2afe83 100644 --- a/tests/queries/0_stateless/02008_materialize_column.sql +++ b/tests/queries/0_stateless/02008_materialize_column.sql @@ -5,6 +5,8 @@ SET mutations_sync = 2; CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple(); INSERT INTO tmp SELECT * FROM system.numbers LIMIT 20; +ALTER TABLE tmp MATERIALIZE COLUMN x; -- { serverError 36 } + ALTER TABLE tmp ADD COLUMN s String DEFAULT toString(x); SELECT groupArray(x), groupArray(s) FROM tmp; diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.reference b/tests/queries/0_stateless/02122_join_group_by_timeout.reference new file mode 100644 index 00000000000..f314e22e519 --- /dev/null +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.reference @@ -0,0 +1,4 @@ +Code: 159 +0 +Code: 159 +0 diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.sh b/tests/queries/0_stateless/02122_join_group_by_timeout.sh new file mode 100755 index 00000000000..4116453b69a --- /dev/null +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +MAX_PROCESS_WAIT=5 + +# TCP CLIENT: As of today (02/12/21) uses PullingAsyncPipelineExecutor +### Should be cancelled after 1 second and return a 159 exception (timeout) +timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ + "SELECT * FROM + ( + SELECT a.name as n + FROM + ( + SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 + ) AS a, + ( + SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 + ) as b + GROUP BY n + ) + LIMIT 20 + FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq + +### Should stop pulling data and return what has been generated already (return code 0) +timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT -q \ + "SELECT a.name as n + FROM + ( + SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 + ) AS a, + ( + SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 + ) as b + FORMAT Null + SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' + " +echo $? + + +# HTTP CLIENT: As of today (02/12/21) uses PullingPipelineExecutor +### Should be cancelled after 1 second and return a 159 exception (timeout) +${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d \ + "SELECT * FROM + ( + SELECT a.name as n + FROM + ( + SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 + ) AS a, + ( + SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 + ) as b + GROUP BY n + ) + LIMIT 20 + FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq + + +### Should stop pulling data and return what has been generated already (return code 0) +${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \ + "SELECT a.name as n + FROM + ( + SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 + ) AS a, + ( + SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 + ) as b + FORMAT Null + SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' + " +echo $? diff --git a/tests/queries/0_stateless/02122_parallel_formatting.sh b/tests/queries/0_stateless/02122_parallel_formatting.sh index 8061cbe58b2..f0c24344329 100755 --- a/tests/queries/0_stateless/02122_parallel_formatting.sh +++ b/tests/queries/0_stateless/02122_parallel_formatting.sh @@ -11,14 +11,14 @@ formats="RowBinary RowBinaryWithNames RowBinaryWithNamesAndTypes XML Markdown Ve for format in ${formats}; do echo $format-1 - $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -v "elapsed" > $non_parallel_file - $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -v "elapsed" > $parallel_file + $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file + $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file diff $non_parallel_file $parallel_file echo $format-2 - $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -v "elapsed" > $non_parallel_file - $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -v "elapsed" > $parallel_file + $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file + $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file diff $non_parallel_file $parallel_file done diff --git a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference index 67f2590a0c6..a7903610a42 100644 --- a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference +++ b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference @@ -6,3 +6,7 @@ 42 42 42 +SELECT + x, + concat(x, \'_\') +FROM test diff --git a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql index ad3d417bc26..d2041a612a6 100644 --- a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql +++ b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql @@ -11,4 +11,7 @@ select if(toUInt8(1), 42, y) from test; select if(toInt8(1), 42, y) from test; select if(toUInt8(toUInt8(0)), y, 42) from test; select if(cast(cast(0, 'UInt8'), 'UInt8'), y, 42) from test; + +explain syntax select x, if((select hasColumnInTable(currentDatabase(), 'test', 'y')), y, x || '_') from test; + drop table if exists t; diff --git a/tests/queries/0_stateless/02125_many_mutations.reference b/tests/queries/0_stateless/02125_many_mutations.reference new file mode 100644 index 00000000000..c98d8221c7f --- /dev/null +++ b/tests/queries/0_stateless/02125_many_mutations.reference @@ -0,0 +1,6 @@ +0 0 +1 1 +20000 +0 +0 20000 +1 20001 diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh new file mode 100755 index 00000000000..7a89e5f7c4f --- /dev/null +++ b/tests/queries/0_stateless/02125_many_mutations.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Tags: long, no-tsan, no-debug, no-asan, no-msan, no-ubsan + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x" +$CLICKHOUSE_CLIENT -q "insert into many_mutations values (0, 0), (1, 1)" +$CLICKHOUSE_CLIENT -q "system stop merges many_mutations" + +$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" + +job() +{ + for _ in {1..1000} + do + $CLICKHOUSE_CLIENT -q "alter table many_mutations update y = y + 1 where 1" + done +} + +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & + +wait + +$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" +$CLICKHOUSE_CLIENT -q "system start merges many_mutations" +$CLICKHOUSE_CLIENT -q "optimize table many_mutations final" +$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" +$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" diff --git a/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.reference b/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.reference new file mode 100644 index 00000000000..4792e70f333 --- /dev/null +++ b/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.reference @@ -0,0 +1,2 @@ +2 +3 diff --git a/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.sql b/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.sql new file mode 100644 index 00000000000..1870521c255 --- /dev/null +++ b/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.sql @@ -0,0 +1,23 @@ +-- Tags: no-parallel + +DROP FUNCTION IF EXISTS 02125_function; +CREATE FUNCTION 02125_function AS x -> 02125_function(x); +SELECT 02125_function(1); --{serverError 1}; +DROP FUNCTION 02125_function; + +DROP FUNCTION IF EXISTS 02125_function_1; +CREATE FUNCTION 02125_function_1 AS x -> 02125_function_2(x); + +DROP FUNCTION IF EXISTS 02125_function_2; +CREATE FUNCTION 02125_function_2 AS x -> 02125_function_1(x); + +SELECT 02125_function_1(1); --{serverError 1}; +SELECT 02125_function_2(2); --{serverError 1}; + +CREATE OR REPLACE FUNCTION 02125_function_2 AS x -> x + 1; + +SELECT 02125_function_1(1); +SELECT 02125_function_2(2); + +DROP FUNCTION 02125_function_1; +DROP FUNCTION 02125_function_2; diff --git a/tests/queries/0_stateless/02126_identity_user_defined_function.reference b/tests/queries/0_stateless/02126_identity_user_defined_function.reference new file mode 100644 index 00000000000..26b37d07fac --- /dev/null +++ b/tests/queries/0_stateless/02126_identity_user_defined_function.reference @@ -0,0 +1,2 @@ +1 +5 diff --git a/tests/queries/0_stateless/02126_identity_user_defined_function.sql b/tests/queries/0_stateless/02126_identity_user_defined_function.sql new file mode 100644 index 00000000000..a53c6e28a48 --- /dev/null +++ b/tests/queries/0_stateless/02126_identity_user_defined_function.sql @@ -0,0 +1,14 @@ +-- Tags: no-parallel + +DROP FUNCTION IF EXISTS 02126_function; +CREATE FUNCTION 02126_function AS x -> x; +SELECT 02126_function(1); +DROP FUNCTION 02126_function; + +CREATE FUNCTION 02126_function AS () -> x; +SELECT 02126_function(); --{ serverError 47 } +DROP FUNCTION 02126_function; + +CREATE FUNCTION 02126_function AS () -> 5; +SELECT 02126_function(); +DROP FUNCTION 02126_function; diff --git a/tests/queries/0_stateless/02126_lc_window_functions.reference b/tests/queries/0_stateless/02126_lc_window_functions.reference new file mode 100644 index 00000000000..bb2c453139e --- /dev/null +++ b/tests/queries/0_stateless/02126_lc_window_functions.reference @@ -0,0 +1,13 @@ +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +a\0aa 1 +a\0aa 1 +a\0aa 1 diff --git a/tests/queries/0_stateless/02126_lc_window_functions.sql b/tests/queries/0_stateless/02126_lc_window_functions.sql new file mode 100644 index 00000000000..6a1fb691a37 --- /dev/null +++ b/tests/queries/0_stateless/02126_lc_window_functions.sql @@ -0,0 +1,38 @@ +SELECT max(id) OVER () AS aid +FROM +( + SELECT materialize(toLowCardinality('aaaa')) AS id + FROM numbers_mt(1000000) +) +FORMAT `Null`; + +SELECT max(id) OVER (PARTITION BY id) AS id +FROM +( + SELECT materialize('aaaa') AS id + FROM numbers_mt(1000000) +) +FORMAT `Null`; + +SELECT countIf(sym = 'Red') OVER () AS res +FROM +( + SELECT CAST(CAST(number % 5, 'Enum8(\'Red\' = 0, \'Blue\' = 1, \'Yellow\' = 2, \'Black\' = 3, \'White\' = 4)'), 'LowCardinality(String)') AS sym + FROM numbers(10) +); + +SELECT materialize(toLowCardinality('a\0aa')), countIf(toLowCardinality('aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0'), sym = 'Red') OVER (Range BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS res FROM (SELECT CAST(CAST(number % 5, 'Enum8(\'Red\' = 0, \'Blue\' = 1, \'Yellow\' = 2, \'Black\' = 3, \'White\' = 4)'), 'LowCardinality(String)') AS sym FROM numbers(3)); + +SELECT + NULL, + id, + max(id) OVER (Rows BETWEEN 10 PRECEDING AND UNBOUNDED FOLLOWING) AS aid +FROM +( + SELECT + NULL, + max(id) OVER (), + materialize(toLowCardinality('')) AS id + FROM numbers_mt(0, 1) +) +FORMAT `Null`; diff --git a/tests/queries/0_stateless/02128_apply_lambda_parsing.reference b/tests/queries/0_stateless/02128_apply_lambda_parsing.reference new file mode 100644 index 00000000000..120eec989de --- /dev/null +++ b/tests/queries/0_stateless/02128_apply_lambda_parsing.reference @@ -0,0 +1,10 @@ +1 +1 +1 +1 +1 +1 +2 +3 +4 +5 diff --git a/tests/queries/0_stateless/02128_apply_lambda_parsing.sql b/tests/queries/0_stateless/02128_apply_lambda_parsing.sql new file mode 100644 index 00000000000..5fc809ca75d --- /dev/null +++ b/tests/queries/0_stateless/02128_apply_lambda_parsing.sql @@ -0,0 +1,13 @@ +WITH * APPLY lambda(e); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(1); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(x); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(range(1)); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(range(x)); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(1, 2); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(x, y); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda((x, y), 2); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda((x, y), x + y); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(tuple(1), 1); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(tuple(x), 1) FROM numbers(5); +SELECT * APPLY lambda(tuple(x), x + 1) FROM numbers(5); diff --git a/tests/queries/0_stateless/02128_cast_nullable.reference b/tests/queries/0_stateless/02128_cast_nullable.reference new file mode 100644 index 00000000000..26d85f14a3e --- /dev/null +++ b/tests/queries/0_stateless/02128_cast_nullable.reference @@ -0,0 +1,9 @@ +-- { echo } +SELECT toUInt32OrDefault(toNullable(toUInt32(1))) SETTINGS cast_keep_nullable=1; +1 +SELECT toUInt32OrDefault(toNullable(toUInt32(1)), toNullable(toUInt32(2))) SETTINGS cast_keep_nullable=1; +1 +SELECT toUInt32OrDefault(toUInt32(1)) SETTINGS cast_keep_nullable=1; +1 +SELECT toUInt32OrDefault(toUInt32(1), toUInt32(2)) SETTINGS cast_keep_nullable=1; +1 diff --git a/tests/queries/0_stateless/02128_cast_nullable.sql b/tests/queries/0_stateless/02128_cast_nullable.sql new file mode 100644 index 00000000000..fec686d791b --- /dev/null +++ b/tests/queries/0_stateless/02128_cast_nullable.sql @@ -0,0 +1,5 @@ +-- { echo } +SELECT toUInt32OrDefault(toNullable(toUInt32(1))) SETTINGS cast_keep_nullable=1; +SELECT toUInt32OrDefault(toNullable(toUInt32(1)), toNullable(toUInt32(2))) SETTINGS cast_keep_nullable=1; +SELECT toUInt32OrDefault(toUInt32(1)) SETTINGS cast_keep_nullable=1; +SELECT toUInt32OrDefault(toUInt32(1), toUInt32(2)) SETTINGS cast_keep_nullable=1; diff --git a/tests/queries/0_stateless/02128_wait_end_of_query_fix.reference b/tests/queries/0_stateless/02128_wait_end_of_query_fix.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02128_wait_end_of_query_fix.sh b/tests/queries/0_stateless/02128_wait_end_of_query_fix.sh new file mode 100755 index 00000000000..c98f9980fb1 --- /dev/null +++ b/tests/queries/0_stateless/02128_wait_end_of_query_fix.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -eu + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# https://github.com/ClickHouse/ClickHouse/issues/32186 + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}/?&query=SELECT+cluster%2C+host_address%2C+port+FROM+system.clusters+FORMAT+JSON&max_result_bytes=104857600&log_queries=1&optimize_throw_if_noop=1&output_format_json_quote_64bit_integers=0&lock_acquire_timeout=10&max_execution_time=10&wait_end_of_query=1" >/dev/null diff --git a/tests/queries/0_stateless/02129_skip_quoted_fields.reference b/tests/queries/0_stateless/02129_skip_quoted_fields.reference new file mode 100644 index 00000000000..312f526ca28 --- /dev/null +++ b/tests/queries/0_stateless/02129_skip_quoted_fields.reference @@ -0,0 +1,26 @@ +1 42 +2 42 +3 42 +4 42 +5 42 +6 42 +7 42 +8 42 +9 42 +10 42 +11 42 +12 42 +13 42 +14 42 +15 42 +16 42 +17 42 +18 42 +19 42 +20 42 +21 42 +22 42 +23 42 +24 42 +25 42 +26 42 diff --git a/tests/queries/0_stateless/02129_skip_quoted_fields.sh b/tests/queries/0_stateless/02129_skip_quoted_fields.sh new file mode 100755 index 00000000000..c1baeb5b8f2 --- /dev/null +++ b/tests/queries/0_stateless/02129_skip_quoted_fields.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test_02129" +$CLICKHOUSE_CLIENT -q "create table test_02129 (x UInt64, y UInt64) engine=Memory()" + +QUERY="insert into test_02129 format CustomSeparatedWithNames settings input_format_skip_unknown_fields=1, format_custom_escaping_rule='Quoted'" + +# Skip string +echo -e "'x'\t'trash'\t'y'\n1\t'Some string'\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" + +# Skip number +echo -e "'x'\t'trash'\t'y'\n2\t42\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n3\t4242.4242\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n4\t-42\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n5\t+42\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n6\t-4242.424242\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n7\t+4242.424242\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n8\tnan\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n9\tinf\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n10\t+nan\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n11\t+inf\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n12\t-nan\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n13\t-inf\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n14\t44444444444444444444444444.444444444444444444444444\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n15\t30e30\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n16\t-30e-30\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" + +# Skip NULL +echo -e "'x'\t'trash'\t'y'\n17\tNULL\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" + +# Skip an array +echo -e "'x'\t'trash'\t'y'\n18\t[1,2,3,4]\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n19\t['some string ]][[][][]', 'one more string (){}][[{[[[[[[']\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n20\t[[(1,2), (3,4)], [(5,6), (7,8)]]\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" + +# Skip a tuple +echo -e "'x'\t'trash'\t'y'\n21\t(1,2,3,4)\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n22\t('some string ()))))(()(())', 'one more string (){}][[{[)))))')\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n23\t(([1,2], (3,4)), ([5,6], (7,8)))\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" + +# Skip a map +echo -e "'x'\t'trash'\t'y'\n24\t{1:2,2:3,3:4,4:5}\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n25\t{'some string }}}}}}{{{{':123, 'one more string (){}][[{[{{{{{':123}\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n26\t{'key':{1:(1,2), 2:(3,4)}, 'foo':{1:(5,6), 2:(7,8)}}\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" + +$CLICKHOUSE_CLIENT -q "select * from test_02129 order by x" +$CLICKHOUSE_CLIENT -q "drop table test_02129" + diff --git a/tests/queries/0_stateless/02129_window_functions_disable_optimizations.reference b/tests/queries/0_stateless/02129_window_functions_disable_optimizations.reference new file mode 100644 index 00000000000..f66c81021c9 --- /dev/null +++ b/tests/queries/0_stateless/02129_window_functions_disable_optimizations.reference @@ -0,0 +1,36 @@ +1 1 +0 1 +0 1 +0 1 +0 1 +0 1 +0 1 +0 1 +0 1 +0 1 +0 0.5 30 15 +1 1 30 45 +2 0.5 30 60 +3 1 30 90 +4 0.5 30 105 +5 1 30 135 +6 0.5 30 150 +7 1 30 180 +8 0.5 30 195 +9 1 30 225 +0 0 0 +1 1 1 +2 0 1 +3 0 1 +4 0 1 +5 0 1 +6 0 1 +7 0 1 +8 0 1 +9 0 1 +5772761.230862 +5773916.014064 +5775070.797267 +5776226.273617 +5777381.749967 +5778537.226317 diff --git a/tests/queries/0_stateless/02129_window_functions_disable_optimizations.sql b/tests/queries/0_stateless/02129_window_functions_disable_optimizations.sql new file mode 100644 index 00000000000..cfe9f20d378 --- /dev/null +++ b/tests/queries/0_stateless/02129_window_functions_disable_optimizations.sql @@ -0,0 +1,27 @@ +SET optimize_rewrite_sum_if_to_count_if = 1; + +SELECT if(number % 10 = 0, 1, 0) AS dummy, +sum(dummy) OVER w +FROM numbers(10) +WINDOW w AS (ORDER BY number ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW); + +SET optimize_arithmetic_operations_in_aggregate_functions=1; +SELECT + *, + if((number % 2) = 0, 0.5, 1) AS a, + 30 AS b, + sum(a * b) OVER (ORDER BY number ASC) AS s +FROM numbers(10); + +SET optimize_aggregators_of_group_by_keys=1; + +SELECT + *, + if(number = 1, 1, 0) as a, + max(a) OVER (ORDER BY number ASC) AS s +FROM numbers(10); + +SET optimize_group_by_function_keys = 1; +SELECT round(sum(log(2) * number), 6) AS k FROM numbers(10000) +GROUP BY (number % 2) * (number % 3), number % 3, number % 2 +HAVING sum(log(2) * number) > 346.57353 ORDER BY k; diff --git a/tests/queries/0_stateless/02130_parse_quoted_null.reference b/tests/queries/0_stateless/02130_parse_quoted_null.reference new file mode 100644 index 00000000000..1f7989bd2ba --- /dev/null +++ b/tests/queries/0_stateless/02130_parse_quoted_null.reference @@ -0,0 +1,12 @@ +\N 1 +nan 2 +42.42 3 +\N 4 +\N 5 +\N 6 +\N 7 +nan 8 +nan 9 +nan 10 +nan 11 +OK diff --git a/tests/queries/0_stateless/02130_parse_quoted_null.sh b/tests/queries/0_stateless/02130_parse_quoted_null.sh new file mode 100755 index 00000000000..9cb6cb73e6c --- /dev/null +++ b/tests/queries/0_stateless/02130_parse_quoted_null.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +DATA_FILE=$USER_FILES_PATH/test_02130.data +SELECT_QUERY="select * from file('test_02130.data', 'CustomSeparated', 'x Nullable(Float64), y Nullable(UInt64)') settings input_format_parallel_parsing=0, format_custom_escaping_rule='Quoted'" + + +$CLICKHOUSE_CLIENT -q "drop table if exists test_02130" +$CLICKHOUSE_CLIENT -q "create table test_02130 (x Nullable(Float64), y Nullable(UInt64)) engine=Memory()" + +echo -e "null\t1" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" + +echo -e "nan\t2" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" + +echo -e "42.42\t3" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" + +echo -e "null\t4" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 + +echo -e "null\t5" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 + +echo -e "null\t6" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 + +echo -e "null\t7" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 + +echo -e "nan\t8" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 + +echo -e "nan\t9" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 + +echo -e "nan\t10" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 + +echo -e "nan\t11" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 + +echo -e "42\tnan" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' + +$CLICKHOUSE_CLIENT -q "select * from test_02130 order by y" +$CLICKHOUSE_CLIENT -q "drop table test_02130" + +rm $DATA_FILE diff --git a/tests/queries/0_stateless/02131_materialize_column_cast.reference b/tests/queries/0_stateless/02131_materialize_column_cast.reference new file mode 100644 index 00000000000..8c94b2ade9c --- /dev/null +++ b/tests/queries/0_stateless/02131_materialize_column_cast.reference @@ -0,0 +1,14 @@ +1_1_1_0_2 i Int32 +1_1_1_0_2 s LowCardinality(String) +=========== +1_1_1_0_2 i Int32 +1_1_1_0_2 s LowCardinality(String) +2_3_3_0 i Int32 +2_3_3_0 s LowCardinality(String) +=========== +1_1_1_0_4 i Int32 +1_1_1_0_4 s LowCardinality(String) +2_3_3_0_4 i Int32 +2_3_3_0_4 s LowCardinality(String) +1 1 +2 2 diff --git a/tests/queries/0_stateless/02131_materialize_column_cast.sql b/tests/queries/0_stateless/02131_materialize_column_cast.sql new file mode 100644 index 00000000000..3bfeaf5baeb --- /dev/null +++ b/tests/queries/0_stateless/02131_materialize_column_cast.sql @@ -0,0 +1,35 @@ +DROP TABLE IF EXISTS t_materialize_column; + +CREATE TABLE t_materialize_column (i Int32) +ENGINE = MergeTree ORDER BY i PARTITION BY i +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO t_materialize_column VALUES (1); + +ALTER TABLE t_materialize_column ADD COLUMN s LowCardinality(String) DEFAULT toString(i); +ALTER TABLE t_materialize_column MATERIALIZE COLUMN s SETTINGS mutations_sync = 2; + +SELECT name, column, type FROM system.parts_columns +WHERE table = 't_materialize_column' AND database = currentDatabase() AND active +ORDER BY name, column; + +SELECT '==========='; + +INSERT INTO t_materialize_column (i) VALUES (2); + +SELECT name, column, type FROM system.parts_columns +WHERE table = 't_materialize_column' AND database = currentDatabase() AND active +ORDER BY name, column; + +SELECT '==========='; + +ALTER TABLE t_materialize_column ADD INDEX s_bf (s) TYPE bloom_filter(0.01) GRANULARITY 1; +ALTER TABLE t_materialize_column MATERIALIZE INDEX s_bf SETTINGS mutations_sync = 2; + +SELECT name, column, type FROM system.parts_columns +WHERE table = 't_materialize_column' AND database = currentDatabase() AND active +ORDER BY name, column; + +SELECT * FROM t_materialize_column ORDER BY i; + +DROP TABLE t_materialize_column; diff --git a/tests/queries/0_stateless/02131_remove_columns_in_subquery.reference b/tests/queries/0_stateless/02131_remove_columns_in_subquery.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02131_remove_columns_in_subquery.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02131_remove_columns_in_subquery.sql b/tests/queries/0_stateless/02131_remove_columns_in_subquery.sql new file mode 100644 index 00000000000..f9ca2269aad --- /dev/null +++ b/tests/queries/0_stateless/02131_remove_columns_in_subquery.sql @@ -0,0 +1 @@ +select count(1) from (SELECT 1 AS a, count(1) FROM numbers(5)) diff --git a/tests/queries/0_stateless/02132_client_history_navigation.expect b/tests/queries/0_stateless/02132_client_history_navigation.expect new file mode 100755 index 00000000000..129a65e0a0a --- /dev/null +++ b/tests/queries/0_stateless/02132_client_history_navigation.expect @@ -0,0 +1,33 @@ +#!/usr/bin/expect -f +# Tags: no-fasttest + +log_user 0 +set timeout 3 +match_max 100000 +# A default timeout action is to do nothing, change it to fail +expect_after { + timeout { + exit 1 + } +} + +# useful debugging configuration +# exp_internal 1 + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --highlight 0" +expect ":) " + +# Make a query +send -- "SELECT 1\r" +expect "1" +expect ":) " +send -- "SELECT 2" +send -- "\033\[A" +expect "SELECT 1" +send -- "\033\[B" +expect "SELECT 2" +send -- "\r" +expect "2" +send -- "exit\r" +expect eof diff --git a/tests/queries/0_stateless/02132_client_history_navigation.reference b/tests/queries/0_stateless/02132_client_history_navigation.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02132_empty_mutation_livelock.reference b/tests/queries/0_stateless/02132_empty_mutation_livelock.reference new file mode 100644 index 00000000000..e58e9764b39 --- /dev/null +++ b/tests/queries/0_stateless/02132_empty_mutation_livelock.reference @@ -0,0 +1,2 @@ +100 +100 diff --git a/tests/queries/0_stateless/02132_empty_mutation_livelock.sql b/tests/queries/0_stateless/02132_empty_mutation_livelock.sql new file mode 100644 index 00000000000..186199d4e13 --- /dev/null +++ b/tests/queries/0_stateless/02132_empty_mutation_livelock.sql @@ -0,0 +1,12 @@ +drop table if exists a8x; + +set empty_result_for_aggregation_by_empty_set=1; +create table a8x ENGINE = MergeTree ORDER BY tuple() settings min_bytes_for_wide_part=0 as SELECT number FROM system.numbers limit 100; + +select count() from a8x; + +set mutations_sync=1; +alter table a8x update number=0 WHERE number=-3; + +select count() from a8x; +drop table if exists a8x; diff --git a/tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.reference b/tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.reference new file mode 100644 index 00000000000..0b7680a594f --- /dev/null +++ b/tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.reference @@ -0,0 +1,2 @@ +LowCardinality(String) +LowCardinality(String) diff --git a/tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.sql b/tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.sql new file mode 100644 index 00000000000..a801fe08614 --- /dev/null +++ b/tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS errors_local; + +CREATE TABLE errors_local (level LowCardinality(String)) ENGINE=ReplacingMergeTree ORDER BY level settings min_bytes_for_wide_part = '10000000'; +insert into errors_local select toString(number) from numbers(10000); + +SELECT toTypeName(level) FROM errors_local FINAL PREWHERE isNotNull(level) WHERE isNotNull(level) LIMIT 1; + +DROP TABLE errors_local; + +CREATE TABLE errors_local(level LowCardinality(String)) ENGINE=ReplacingMergeTree ORDER BY level; +insert into errors_local select toString(number) from numbers(10000); + +SELECT toTypeName(level) FROM errors_local FINAL PREWHERE isNotNull(level) WHERE isNotNull(level) LIMIT 1; + +DROP TABLE errors_local; diff --git a/tests/queries/0_stateless/02133_issue_32458.reference b/tests/queries/0_stateless/02133_issue_32458.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02133_issue_32458.sql b/tests/queries/0_stateless/02133_issue_32458.sql new file mode 100644 index 00000000000..16af361db7a --- /dev/null +++ b/tests/queries/0_stateless/02133_issue_32458.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (`id` Int32, `key` String) ENGINE = Memory; +CREATE TABLE t2 (`id` Int32, `key` String) ENGINE = Memory; + +INSERT INTO t1 VALUES (0, ''); +INSERT INTO t2 VALUES (0, ''); + +SELECT * FROM t1 ANY INNER JOIN t2 ON ((NULL = t1.key) = t2.id) AND (('' = t1.key) = t2.id); + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02135_local_create_db.reference b/tests/queries/0_stateless/02135_local_create_db.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02135_local_create_db.sh b/tests/queries/0_stateless/02135_local_create_db.sh new file mode 100755 index 00000000000..2a0105e554e --- /dev/null +++ b/tests/queries/0_stateless/02135_local_create_db.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +for Engine in Atomic Ordinary; do + $CLICKHOUSE_LOCAL --query """ + CREATE DATABASE foo_$Engine Engine=$Engine; + DROP DATABASE foo_$Engine; + """ +done diff --git a/tests/queries/0_stateless/02140_clickhouse_local_queries_file_table.reference b/tests/queries/0_stateless/02140_clickhouse_local_queries_file_table.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02140_clickhouse_local_queries_file_table.sh b/tests/queries/0_stateless/02140_clickhouse_local_queries_file_table.sh new file mode 100755 index 00000000000..377cbb13688 --- /dev/null +++ b/tests/queries/0_stateless/02140_clickhouse_local_queries_file_table.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --queries-file <(echo 'select 1') --queries-file <(echo 'select 2') --format Null diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference new file mode 100644 index 00000000000..e4c93e9e1c5 --- /dev/null +++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference @@ -0,0 +1 @@ +CREATE TABLE _local.table\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh new file mode 100755 index 00000000000..fc71f779fa1 --- /dev/null +++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create table table' diff --git a/tests/queries/0_stateless/02144_avg_ubsan.reference b/tests/queries/0_stateless/02144_avg_ubsan.reference new file mode 100644 index 00000000000..09f03e40e59 --- /dev/null +++ b/tests/queries/0_stateless/02144_avg_ubsan.reference @@ -0,0 +1,14 @@ +-- { echo } + +-- Aggregate function 'avg' allows overflow with two's complement arithmetics. +-- This contradicts the standard SQL semantic and we are totally fine with it. + +-- AggregateFunctionAvg::add +SELECT avg(-8000000000000000000) FROM (SELECT *, 1 AS k FROM numbers(65535*2)) GROUP BY k; +63121857572613.94 +-- AggregateFunctionAvg::addBatchSinglePlace +SELECT avg(-8000000000000000000) FROM numbers(65535 * 2); +63121857572613.94 +-- AggregateFunctionAvg::addBatchSinglePlaceNotNull +SELECT avg(toNullable(-8000000000000000000)) FROM numbers(65535 * 2); +63121857572613.94 diff --git a/tests/queries/0_stateless/02144_avg_ubsan.sql b/tests/queries/0_stateless/02144_avg_ubsan.sql new file mode 100644 index 00000000000..7c51963333e --- /dev/null +++ b/tests/queries/0_stateless/02144_avg_ubsan.sql @@ -0,0 +1,11 @@ +-- { echo } + +-- Aggregate function 'avg' allows overflow with two's complement arithmetics. +-- This contradicts the standard SQL semantic and we are totally fine with it. + +-- AggregateFunctionAvg::add +SELECT avg(-8000000000000000000) FROM (SELECT *, 1 AS k FROM numbers(65535*2)) GROUP BY k; +-- AggregateFunctionAvg::addBatchSinglePlace +SELECT avg(-8000000000000000000) FROM numbers(65535 * 2); +-- AggregateFunctionAvg::addBatchSinglePlaceNotNull +SELECT avg(toNullable(-8000000000000000000)) FROM numbers(65535 * 2); diff --git a/tests/queries/0_stateless/02146_mv_non_phys.reference b/tests/queries/0_stateless/02146_mv_non_phys.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02146_mv_non_phys.sql b/tests/queries/0_stateless/02146_mv_non_phys.sql new file mode 100644 index 00000000000..4b15900fe76 --- /dev/null +++ b/tests/queries/0_stateless/02146_mv_non_phys.sql @@ -0,0 +1,2 @@ +drop table if exists mv_02146; +create materialized view mv_02146 engine=MergeTree() order by number as select * from numbers(10); -- { serverError QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW } diff --git a/tests/queries/0_stateless/format_schemas/00825_protobuf_format_skipped_column_in_nested.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_skipped_column_in_nested.proto new file mode 100644 index 00000000000..054de349e24 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_skipped_column_in_nested.proto @@ -0,0 +1,29 @@ +syntax = "proto3"; + +message UpdateMessage { + string identifier = 1; + //string unused1 = 100; + + message Module { + uint32 module_id = 2; + uint32 supply = 3; + uint32 temp = 4; + + message ModuleNode { + uint32 node_id = 5; + uint32 opening_time = 6; + uint32 closing_time = 7; // The column in the table is named `closing_time_time` + uint32 current = 8; + + message Coords { + //float x = 8; + float y = 9; + } + Coords coords = 10; + } + + repeated ModuleNode nodes = 11; + } + + repeated Module modules = 12; +} diff --git a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference new file mode 100644 index 00000000000..2675904dea0 --- /dev/null +++ b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference @@ -0,0 +1,110 @@ +Testing 00001_count_hits.sql ----> Ok! ✅ +Testing 00002_count_visits.sql ----> Ok! ✅ +Testing 00004_top_counters.sql ----> Ok! ✅ +Testing 00005_filtering.sql ----> Ok! ✅ +Testing 00006_agregates.sql ----> Ok! ✅ +Testing 00007_uniq.sql ----> Ok! ✅ +Testing 00008_uniq.sql ----> Ok! ✅ +Testing 00009_uniq_distributed.sql ----> Ok! ✅ +Testing 00010_quantiles_segfault.sql ----> Ok! ✅ +Testing 00011_sorting.sql ----> Ok! ✅ +Testing 00012_sorting_distributed.sql ----> Ok! ✅ +Skipping 00013_sorting_of_nested.sql +Testing 00014_filtering_arrays.sql ----> Ok! ✅ +Testing 00015_totals_and_no_aggregate_functions.sql ----> Ok! ✅ +Testing 00016_any_if_distributed_cond_always_false.sql ----> Ok! ✅ +Testing 00017_aggregation_uninitialized_memory.sql ----> Ok! ✅ +Testing 00020_distinct_order_by_distributed.sql ----> Ok! ✅ +Testing 00021_1_select_with_in.sql ----> Ok! ✅ +Testing 00021_2_select_with_in.sql ----> Ok! ✅ +Testing 00021_3_select_with_in.sql ----> Ok! ✅ +Testing 00022_merge_prewhere.sql ----> Ok! ✅ +Testing 00023_totals_limit.sql ----> Ok! ✅ +Testing 00024_random_counters.sql ----> Ok! ✅ +Testing 00030_array_enumerate_uniq.sql ----> Ok! ✅ +Testing 00031_array_enumerate_uniq.sql ----> Ok! ✅ +Testing 00032_aggregate_key64.sql ----> Ok! ✅ +Testing 00033_aggregate_key_string.sql ----> Ok! ✅ +Testing 00034_aggregate_key_fixed_string.sql ----> Ok! ✅ +Testing 00035_aggregate_keys128.sql ----> Ok! ✅ +Testing 00036_aggregate_hashed.sql ----> Ok! ✅ +Testing 00037_uniq_state_merge1.sql ----> Ok! ✅ +Testing 00038_uniq_state_merge2.sql ----> Ok! ✅ +Testing 00039_primary_key.sql ----> Ok! ✅ +Testing 00040_aggregating_materialized_view.sql ----> Ok! ✅ +Testing 00041_aggregating_materialized_view.sql ----> Ok! ✅ +Testing 00042_any_left_join.sql ----> Ok! ✅ +Testing 00043_any_left_join.sql ----> Ok! ✅ +Testing 00044_any_left_join_string.sql ----> Ok! ✅ +Testing 00045_uniq_upto.sql ----> Ok! ✅ +Testing 00046_uniq_upto_distributed.sql ----> Ok! ✅ +Testing 00047_bar.sql ----> Ok! ✅ +Testing 00048_min_max.sql ----> Ok! ✅ +Testing 00049_max_string_if.sql ----> Ok! ✅ +Testing 00050_min_max.sql ----> Ok! ✅ +Testing 00051_min_max_array.sql ----> Ok! ✅ +Testing 00052_group_by_in.sql ----> Ok! ✅ +Testing 00053_replicate_segfault.sql ----> Ok! ✅ +Testing 00054_merge_tree_partitions.sql ----> Ok! ✅ +Testing 00055_index_and_not.sql ----> Ok! ✅ +Testing 00056_view.sql ----> Ok! ✅ +Testing 00059_merge_sorting_empty_array_joined.sql ----> Ok! ✅ +Testing 00060_move_to_prewhere_and_sets.sql ----> Ok! ✅ +Skipping 00061_storage_buffer.sql +Testing 00062_loyalty.sql ----> Ok! ✅ +Testing 00063_loyalty_joins.sql ----> Ok! ✅ +Testing 00065_loyalty_with_storage_join.sql ----> Ok! ✅ +Testing 00066_sorting_distributed_many_replicas.sql ----> Ok! ✅ +Testing 00067_union_all.sql ----> Ok! ✅ +Testing 00068_subquery_in_prewhere.sql ----> Ok! ✅ +Testing 00069_duplicate_aggregation_keys.sql ----> Ok! ✅ +Testing 00071_merge_tree_optimize_aio.sql ----> Ok! ✅ +Testing 00072_compare_date_and_string_index.sql ----> Ok! ✅ +Testing 00073_uniq_array.sql ----> Ok! ✅ +Testing 00074_full_join.sql ----> Ok! ✅ +Testing 00075_left_array_join.sql ----> Ok! ✅ +Testing 00076_system_columns_bytes.sql ----> Ok! ✅ +Testing 00077_log_tinylog_stripelog.sql ----> Ok! ✅ +Testing 00078_group_by_arrays.sql ----> Ok! ✅ +Testing 00079_array_join_not_used_joined_column.sql ----> Ok! ✅ +Testing 00080_array_join_and_union.sql ----> Ok! ✅ +Testing 00081_group_by_without_key_and_totals.sql ----> Ok! ✅ +Testing 00082_quantiles.sql ----> Ok! ✅ +Testing 00083_array_filter.sql ----> Ok! ✅ +Testing 00084_external_aggregation.sql ----> Ok! ✅ +Testing 00085_monotonic_evaluation_segfault.sql ----> Ok! ✅ +Testing 00086_array_reduce.sql ----> Ok! ✅ +Testing 00087_where_0.sql ----> Ok! ✅ +Testing 00088_global_in_one_shard_and_rows_before_limit.sql ----> Ok! ✅ +Testing 00089_position_functions_with_non_constant_arg.sql ----> Ok! ✅ +Testing 00091_prewhere_two_conditions.sql ----> Ok! ✅ +Testing 00093_prewhere_array_join.sql ----> Ok! ✅ +Testing 00094_order_by_array_join_limit.sql ----> Ok! ✅ +Skipping 00095_hyperscan_profiler.sql +Testing 00139_like.sql ----> Ok! ✅ +Skipping 00140_rename.sql +Testing 00141_transform.sql ----> Ok! ✅ +Testing 00142_system_columns.sql ----> Ok! ✅ +Testing 00143_transform_non_const_default.sql ----> Ok! ✅ +Testing 00144_functions_of_aggregation_states.sql ----> Ok! ✅ +Testing 00145_aggregate_functions_statistics.sql ----> Ok! ✅ +Testing 00146_aggregate_function_uniq.sql ----> Ok! ✅ +Testing 00147_global_in_aggregate_function.sql ----> Ok! ✅ +Testing 00148_monotonic_functions_and_index.sql ----> Ok! ✅ +Testing 00149_quantiles_timing_distributed.sql ----> Ok! ✅ +Testing 00150_quantiles_timing_precision.sql ----> Ok! ✅ +Testing 00151_order_by_read_in_order.sql ----> Ok! ✅ +Skipping 00151_replace_partition_with_different_granularity.sql +Skipping 00152_insert_different_granularity.sql +Testing 00153_aggregate_arena_race.sql ----> Ok! ✅ +Skipping 00154_avro.sql +Testing 00156_max_execution_speed_sample_merge.sql ----> Ok! ✅ +Skipping 00157_cache_dictionary.sql +Skipping 00158_cache_dictionary_has.sql +Testing 00160_decode_xml_component.sql ----> Ok! ✅ +Testing 00162_mmap_compression_none.sql ----> Ok! ✅ +Testing 00164_quantileBfloat16.sql ----> Ok! ✅ +Testing 00165_jit_aggregate_functions.sql ----> Ok! ✅ +Skipping 00166_explain_estimate.sql +Testing 00167_read_bytes_from_fs.sql ----> Ok! ✅ +Total failed tests: diff --git a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh new file mode 100755 index 00000000000..ba1245d9679 --- /dev/null +++ b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +# Tags: no-tsan + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# set -e + +# All replicas are localhost, disable `prefer_localhost_replica` option to test network interface +# Currently this feature could not work with hedged requests +# Enabling `enable_sample_offset_parallel_processing` feature could lead to intersecting marks, so some of them would be thrown away and it will lead to incorrect result of SELECT query +SETTINGS="--max_parallel_replicas=3 --prefer_localhost_replica=false --use_hedged_requests=false --async_socket_for_remote=false --allow_experimental_parallel_reading_from_replicas=true" + +# Prepare tables +$CLICKHOUSE_CLIENT $SETTINGS -nm -q ''' + drop table if exists test.dist_hits SYNC; + drop table if exists test.dist_visits SYNC; + + create table test.dist_hits as test.hits engine = Distributed("test_cluster_one_shard_three_replicas_localhost", test, hits, rand()); + create table test.dist_visits as test.visits engine = Distributed("test_cluster_one_shard_three_replicas_localhost", test, visits, rand()); +'''; + +FAILED=() + +# PreviouslyFailed=( +# ) + +SkipList=( + "00013_sorting_of_nested.sql" # It contains FINAL, which is not allowed together with parallel reading + + "00061_storage_buffer.sql" + "00095_hyperscan_profiler.sql" # too long in debug (there is a --no-debug tag inside a test) + + "00140_rename.sql" # Multiple renames are not allowed with DatabaseReplicated and tags are not forwarded through this test + + "00154_avro.sql" # Plain select * with limit with Distributed table is not deterministic + "00151_replace_partition_with_different_granularity.sql" # Replace partition from Distributed is not allowed + "00152_insert_different_granularity.sql" # The same as above + + "00157_cache_dictionary.sql" # Too long in debug mode, but result is correct + "00158_cache_dictionary_has.sql" # The same as above + + "00166_explain_estimate.sql" # Distributed table returns nothing +) + +# for TESTPATH in "${PreviouslyFailed[@]}" +for TESTPATH in "$CURDIR"/*.sql; +do + TESTNAME=$(basename $TESTPATH) + + if [[ " ${SkipList[*]} " =~ ${TESTNAME} ]]; then + echo "Skipping $TESTNAME " + continue + fi + + echo -n "Testing $TESTNAME ----> " + + # prepare test + NEW_TESTNAME="/tmp/dist_$TESTNAME" + # Added g to sed command to replace all tables, not the first + cat $TESTPATH | sed -e 's/test.hits/test.dist_hits/g' | sed -e 's/test.visits/test.dist_visits/g' > $NEW_TESTNAME + + TESTNAME_RESULT="/tmp/result_$TESTNAME" + NEW_TESTNAME_RESULT="/tmp/result_dist_$TESTNAME" + + $CLICKHOUSE_CLIENT $SETTINGS -nm --testmode < $TESTPATH > $TESTNAME_RESULT + $CLICKHOUSE_CLIENT $SETTINGS -nm --testmode < $NEW_TESTNAME > $NEW_TESTNAME_RESULT + + expected=$(cat $TESTNAME_RESULT | md5sum) + actual=$(cat $NEW_TESTNAME_RESULT | md5sum) + + if [[ "$expected" != "$actual" ]]; then + FAILED+=("$TESTNAME") + echo "Failed! ❌ " + echo "Plain:" + cat $TESTNAME_RESULT + echo "Distributed:" + cat $NEW_TESTNAME_RESULT + else + echo "Ok! ✅" + fi +done + + +echo "Total failed tests: " +# Iterate the loop to read and print each array element +for value in "${FAILED[@]}" +do + echo "🔺 $value" +done + +# Drop tables + +$CLICKHOUSE_CLIENT $SETTINGS -nm -q ''' + drop table if exists test.dist_hits SYNC; + drop table if exists test.dist_visits SYNC; +'''; diff --git a/utils/ci/install-libraries.sh b/utils/ci/install-libraries.sh index 7615375fbc1..3c26e3b09b1 100755 --- a/utils/ci/install-libraries.sh +++ b/utils/ci/install-libraries.sh @@ -4,4 +4,3 @@ set -e -x source default-config ./install-os-packages.sh libicu-dev -./install-os-packages.sh libreadline-dev diff --git a/utils/ci/install-os-packages.sh b/utils/ci/install-os-packages.sh index 38fa6dbba15..b4b0c74f30c 100755 --- a/utils/ci/install-os-packages.sh +++ b/utils/ci/install-os-packages.sh @@ -46,9 +46,6 @@ case $PACKAGE_MANAGER in libicu-dev) $SUDO apt-get install -y libicu-dev ;; - libreadline-dev) - $SUDO apt-get install -y libreadline-dev - ;; llvm-libs*) $SUDO apt-get install -y ${WHAT/llvm-libs/liblld}-dev ${WHAT/llvm-libs/libclang}-dev ;; @@ -91,9 +88,6 @@ case $PACKAGE_MANAGER in libicu-dev) $SUDO yum install -y libicu-devel ;; - libreadline-dev) - $SUDO yum install -y readline-devel - ;; *) echo "Unknown package"; exit 1; ;; @@ -130,9 +124,6 @@ case $PACKAGE_MANAGER in libicu-dev) $SUDO pkg install -y icu ;; - libreadline-dev) - $SUDO pkg install -y readline - ;; *) echo "Unknown package"; exit 1; ;; diff --git a/utils/github/backport.py b/utils/github/backport.py index a28a1510694..9227dbf4108 100644 --- a/utils/github/backport.py +++ b/utils/github/backport.py @@ -74,7 +74,7 @@ class Backport: # First pass. Find all must-backports for label in pr['labels']['nodes']: - if label['name'] == 'pr-bugfix' or label['name'] == 'pr-must-backport': + if label['name'] == 'pr-must-backport': backport_map[pr['number']] = branch_set.copy() continue matched = RE_MUST_BACKPORT.match(label['name']) diff --git a/website/css/main.css b/website/css/main.css index a505beb20bb..7f388ffeab6 100644 --- a/website/css/main.css +++ b/website/css/main.css @@ -1 +1 @@ -@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:grid;-moz-column-gap:24px;column-gap:24px;row-gap:16px;grid-auto-flow:column;justify-content:center}@media screen and (max-width:767.98px){.btns{grid-auto-flow:row}}.btns.btns-lg{-moz-column-gap:40px;column-gap:40px}.btns.is-2{grid-template-columns:1fr 1fr}@media screen and (max-width:767.98px){.btns.is-2{grid-template-columns:1fr}}.btns.is-3{grid-template-columns:1fr 1fr 1fr}@media screen and (max-width:767.98px){.btns.is-3{grid-template-columns:1fr}}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(50%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(70%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3{left:-60%;position:relative;transition:left .4s;transition-delay:.6s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-3{flex:0 0 250px;max-width:250px;width:250px}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{left:0;transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s;transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{opacity:1;transform:none;transition-delay:.2s}}.footer-copy{white-space:nowrap}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.logo{display:block;height:36px;max-width:220px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;width:100%}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}.page,.photo-frame{overflow:hidden;width:100%}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;position:relative}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.overflow-auto{overflow:auto}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file +@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:grid;-moz-column-gap:24px;column-gap:24px;row-gap:16px;grid-auto-flow:column;justify-content:center}@media screen and (max-width:767.98px){.btns{grid-auto-flow:row}}.btns.btns-lg{-moz-column-gap:40px;column-gap:40px}.btns.is-2{grid-template-columns:1fr 1fr}@media screen and (max-width:767.98px){.btns.is-2{grid-template-columns:1fr}}.btns.is-3{grid-template-columns:1fr 1fr 1fr}@media screen and (max-width:767.98px){.btns.is-3{grid-template-columns:1fr}}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(50%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(70%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3{left:-60%;position:relative;transition:left .4s;transition-delay:.6s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-3{flex:0 0 250px;max-width:250px;width:250px}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{left:0;transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s;transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{opacity:1;transform:none;transition-delay:.2s}}.footer-copy{white-space:nowrap}form .form-group{position:relative}form .form-group.is-select:before{border-left:6px solid transparent;border-right:6px solid transparent;border-top:8px solid #6c757d;content:"";display:block;position:absolute;right:33px;top:calc(50% - 4px);z-index:10}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none;padding-right:24px;white-space:pre-wrap}form select.form-control[name=priority]{height:84px}@media screen and (max-width:767.98px){form select.form-control[name=priority]{height:104px}}@media screen and (max-width:499.98px){form select.form-control[name=priority]{height:124px}}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.logo{display:block;height:36px;max-width:220px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;width:100%}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}.page,.photo-frame{overflow:hidden;width:100%}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;position:relative}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.overflow-auto{overflow:auto}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file diff --git a/website/src/scss/components/_form.scss b/website/src/scss/components/_form.scss index b37b7044ce1..144b7e7301e 100644 --- a/website/src/scss/components/_form.scss +++ b/website/src/scss/components/_form.scss @@ -1,4 +1,20 @@ form { + .form-group { + position: relative; + } + + .form-group.is-select:before { + border-left: 6px solid transparent; + border-right: 6px solid transparent; + border-top: 8px solid $gray-700; + content: ''; + display: block; + position: absolute; + right: 33px; + top: calc(50% - 4px); + z-index: 10; + } + .form-control { border: 1px solid $gray-700; border-radius: 6px; @@ -23,6 +39,20 @@ form { select.form-control { appearance: none; + padding-right: 24px; + white-space: pre-wrap; + + &[name=priority] { + height: 84px; + + @media screen and (max-width: 767.98px) { + height: 104px; + } + + @media screen and (max-width: 499.98px) { + height: 124px; + } + } } select.form-control:not([data-chosen]) { diff --git a/website/support/case/index.html b/website/support/case/index.html new file mode 100644 index 00000000000..07f77d65d46 --- /dev/null +++ b/website/support/case/index.html @@ -0,0 +1,27 @@ +{% set prefetch_items = [ + ('/docs/en/', 'document') +] %} + +{% extends "templates/base.html" %} + +{% block extra_meta %} + +{% include "templates/common_fonts.html" %} +{% endblock %} + +{% block nav %} + +{% include "templates/global/nav.html" %} + +{% endblock %} + +{% block content %} + +{% include "templates/support/hero.html" %} +{% include "templates/support/overview.html" %} +{% include "templates/support/form.html" %} + +{% include "templates/global/newsletter.html" %} +{% include "templates/global/github_stars.html" %} + +{% endblock %} diff --git a/website/templates/support/form.html b/website/templates/support/form.html new file mode 100644 index 00000000000..14c153c7fde --- /dev/null +++ b/website/templates/support/form.html @@ -0,0 +1,50 @@ +
+
+ +
+ + + + +
+
+
+ +
+
+ +
+
+ +
+
+ +
+
+ +
+
+ +
+
+ +
+ +
+
+ +
+ +
+ +
+ +
+ +
+
diff --git a/website/templates/support/hero.html b/website/templates/support/hero.html new file mode 100644 index 00000000000..22b69256471 --- /dev/null +++ b/website/templates/support/hero.html @@ -0,0 +1,10 @@ +
+
+
+ +

+ {{ _('Support Services') }} +

+ +
+
diff --git a/website/templates/support/overview.html b/website/templates/support/overview.html new file mode 100644 index 00000000000..429b8d23c5b --- /dev/null +++ b/website/templates/support/overview.html @@ -0,0 +1,13 @@ +
+
+ +

+ Enter Your Support Case Details +

+ +

+ Describe the problem you're having. Once you hit submit, we'll log a case for you and kick off an email thread to collaborate on your issue. If you have an attachment (e.g. log files) to share, respond to the case email thread and include as an attachment. +

+ +
+