diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 417284f14d5..44fe082b04d 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -9,6 +9,18 @@ on: # yamllint disable-line rule:truthy branches: - 'backport/**' jobs: + PythonUnitTests: + runs-on: [self-hosted, style-checker] + steps: + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Python unit tests + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: @@ -143,8 +155,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -184,8 +196,8 @@ jobs: - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -229,8 +241,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -274,8 +286,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -319,8 +331,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index eab7ce36eb7..efaf1c64c05 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -219,8 +219,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -260,8 +260,8 @@ jobs: - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -305,8 +305,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -350,8 +350,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -395,8 +395,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -440,8 +440,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -485,8 +485,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -530,8 +530,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -575,8 +575,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -620,8 +620,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -668,8 +668,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -713,8 +713,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -758,8 +758,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -803,8 +803,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -848,8 +848,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -893,8 +893,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -938,8 +938,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 836421f34dd..bd54fd975c0 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -112,7 +112,7 @@ jobs: run: | curl --form token="${COVERITY_TOKEN}" \ --form email='security+coverity@clickhouse.com' \ - --form file="@$TEMP_PATH/$BUILD_NAME/clickhouse-scan.tgz" \ + --form file="@$TEMP_PATH/$BUILD_NAME/coverity-scan.tgz" \ --form version="${GITHUB_REF#refs/heads/}-${GITHUB_SHA::6}" \ --form description="Nighly Scan: $(date +'%Y-%m-%dT%H:%M:%S')" \ https://scan.coverity.com/builds?project=ClickHouse%2FClickHouse diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 8942cca391e..8072f816cb8 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -272,8 +272,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -317,8 +317,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -362,8 +362,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -404,8 +404,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -446,8 +446,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -491,8 +491,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -536,8 +536,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -581,8 +581,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -626,8 +626,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -671,8 +671,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -719,8 +719,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -764,8 +764,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -809,8 +809,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -854,8 +854,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -899,8 +899,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -944,8 +944,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -989,8 +989,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 29e3d0c4358..ea2e1ed33fb 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -52,8 +52,8 @@ jobs: - name: Check docker clickhouse/clickhouse-server building run: | cd "$GITHUB_WORKSPACE/tests/ci" - python3 docker_server.py --release-type auto - python3 docker_server.py --release-type auto --no-ubuntu \ + python3 docker_server.py --release-type auto --version "${{ github.ref }}" + python3 docker_server.py --release-type auto --version "${{ github.ref }}" --no-ubuntu \ --image-repo clickhouse/clickhouse-keeper --image-path docker/keeper - name: Cleanup if: always() diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index b2af465142b..91e1a224204 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -146,8 +146,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -187,8 +187,8 @@ jobs: - name: Upload build URLs to artifacts uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ runner.temp }}/build_check/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -232,8 +232,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -277,8 +277,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -322,8 +322,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -367,8 +367,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | @@ -412,8 +412,8 @@ jobs: if: ${{ success() || failure() }} uses: actions/upload-artifact@v2 with: - name: ${{ env.BUILD_NAME }} - path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index d893ba773cc..dad9a25ab26 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -222,6 +222,12 @@ else () set(NO_WHOLE_ARCHIVE --no-whole-archive) endif () +option(ENABLE_CURL_BUILD "Enable curl, azure, sentry build on by default except MacOS." ON) +if (OS_DARWIN) + # Disable the curl, azure, senry build on MacOS + set (ENABLE_CURL_BUILD OFF) +endif () + # Ignored if `lld` is used option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.") diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 311349a2ba7..b27a904b31a 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -828,7 +828,6 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() /// Setup signal handlers. /// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime. - addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, &handled_signals); addSignalHandler({SIGHUP}, closeLogsSignalHandler, &handled_signals); addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals); diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 9cf307c473e..1f03c0fd341 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -119,9 +119,13 @@ add_contrib (fastops-cmake fastops) add_contrib (libuv-cmake libuv) add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv add_contrib (cassandra-cmake cassandra) # requires: libuv -add_contrib (curl-cmake curl) -add_contrib (azure-cmake azure) -add_contrib (sentry-native-cmake sentry-native) # requires: curl + +if (ENABLE_CURL_BUILD) + add_contrib (curl-cmake curl) + add_contrib (azure-cmake azure) + add_contrib (sentry-native-cmake sentry-native) # requires: curl +endif() + add_contrib (fmtlib-cmake fmtlib) add_contrib (krb5-cmake krb5) add_contrib (cyrus-sasl-cmake cyrus-sasl) # for krb5 diff --git a/contrib/curl b/contrib/curl index 3b8bbbbd160..801bd5138ce 160000 --- a/contrib/curl +++ b/contrib/curl @@ -1 +1 @@ -Subproject commit 3b8bbbbd1609c638a3d3d0acb148a33dedb67be3 +Subproject commit 801bd5138ce31aa0d906fa4e2eabfc599d74e793 diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index 589f40384e3..b1e1a0ded8a 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -32,7 +32,6 @@ set (SRCS "${LIBRARY_DIR}/lib/transfer.c" "${LIBRARY_DIR}/lib/strcase.c" "${LIBRARY_DIR}/lib/easy.c" - "${LIBRARY_DIR}/lib/security.c" "${LIBRARY_DIR}/lib/curl_fnmatch.c" "${LIBRARY_DIR}/lib/fileinfo.c" "${LIBRARY_DIR}/lib/wildcard.c" @@ -115,6 +114,12 @@ set (SRCS "${LIBRARY_DIR}/lib/curl_get_line.c" "${LIBRARY_DIR}/lib/altsvc.c" "${LIBRARY_DIR}/lib/socketpair.c" + "${LIBRARY_DIR}/lib/bufref.c" + "${LIBRARY_DIR}/lib/dynbuf.c" + "${LIBRARY_DIR}/lib/hsts.c" + "${LIBRARY_DIR}/lib/http_aws_sigv4.c" + "${LIBRARY_DIR}/lib/mqtt.c" + "${LIBRARY_DIR}/lib/rename.c" "${LIBRARY_DIR}/lib/vauth/vauth.c" "${LIBRARY_DIR}/lib/vauth/cleartext.c" "${LIBRARY_DIR}/lib/vauth/cram.c" @@ -131,8 +136,6 @@ set (SRCS "${LIBRARY_DIR}/lib/vtls/gtls.c" "${LIBRARY_DIR}/lib/vtls/vtls.c" "${LIBRARY_DIR}/lib/vtls/nss.c" - "${LIBRARY_DIR}/lib/vtls/polarssl.c" - "${LIBRARY_DIR}/lib/vtls/polarssl_threadlock.c" "${LIBRARY_DIR}/lib/vtls/wolfssl.c" "${LIBRARY_DIR}/lib/vtls/schannel.c" "${LIBRARY_DIR}/lib/vtls/schannel_verify.c" @@ -141,6 +144,7 @@ set (SRCS "${LIBRARY_DIR}/lib/vtls/mbedtls.c" "${LIBRARY_DIR}/lib/vtls/mesalink.c" "${LIBRARY_DIR}/lib/vtls/bearssl.c" + "${LIBRARY_DIR}/lib/vtls/keylog.c" "${LIBRARY_DIR}/lib/vquic/ngtcp2.c" "${LIBRARY_DIR}/lib/vquic/quiche.c" "${LIBRARY_DIR}/lib/vssh/libssh2.c" diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 861e17848a4..6aa9d88f5b4 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -96,7 +96,7 @@ else clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits" + clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0" fi clickhouse-client --query "SHOW TABLES FROM test" diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index f8b73791388..63750b90b5a 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -139,7 +139,7 @@ pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhous # directly # - even though ci auto-compress some files (but not *.tsv) it does this only # for files >64MB, we want this files to be compressed explicitly -for table in query_log zookeeper_log trace_log +for table in query_log zookeeper_log trace_log transactions_info_log do clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz & if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 86f8edf5980..10c6088af75 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -10,7 +10,7 @@ import logging import time -def get_options(i): +def get_options(i, backward_compatibility_check): options = [] client_options = [] if 0 < i: @@ -19,7 +19,7 @@ def get_options(i): if i % 3 == 1: options.append("--db-engine=Ordinary") - if i % 3 == 2: + if i % 3 == 2 and not backward_compatibility_check: options.append('''--db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i)) client_options.append('allow_experimental_database_replicated=1') @@ -57,7 +57,7 @@ def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_t pipes = [] for i in range(0, len(output_paths)): f = open(output_paths[i], 'w') - full_command = "{} {} {} {} {}".format(cmd, get_options(i), global_time_limit_option, skip_tests_option, backward_compatibility_check_option) + full_command = "{} {} {} {} {}".format(cmd, get_options(i, backward_compatibility_check), global_time_limit_option, skip_tests_option, backward_compatibility_check_option) logging.info("Run func tests '%s'", full_command) p = Popen(full_command, shell=True, stdout=f, stderr=f) pipes.append(p) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 20d6b20feb6..98eea85bbfa 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -158,6 +158,7 @@ toc_title: Adopters | Staffcop | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) | | Suning | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) | | Superwall | Monetization Tooling | Main product | — | — | [Word of mouth, Jan 2022](https://github.com/ClickHouse/ClickHouse/pull/33573) | +| Swetrix | Analytics | Main Product | — | — | [Source code](https://github.com/swetrix/swetrix-api) | | Teralytics | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) | | Tencent | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | | Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index f9996cbfb0b..07abd77fed0 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -519,6 +519,33 @@ Possible values: Default value: `1`. +## allow_settings_after_format_in_insert {#allow_settings_after_format_in_insert} + +Control whether `SETTINGS` after `FORMAT` in `INSERT` queries is allowed or not. It is not recommended to use this, since this may interpret part of `SETTINGS` as values. + +Example: + +```sql +INSERT INTO FUNCTION null('foo String') SETTINGS max_threads=1 VALUES ('bar'); +``` + +But the following query will work only with `allow_settings_after_format_in_insert`: + +```sql +SET allow_settings_after_format_in_insert=1; +INSERT INTO FUNCTION null('foo String') VALUES ('bar') SETTINGS max_threads=1; +``` + +Possible values: + +- 0 — Disallow. +- 1 — Allow. + +Default value: `0`. + +!!! note "Warning" + Use this setting only for backward compatibility if your use cases depend on old syntax. + ## input_format_skip_unknown_fields {#settings-input-format-skip-unknown-fields} Enables or disables skipping insertion of extra data. diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 835afcdb2ed..50d85cdd43d 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -54,6 +54,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) ("multiquery,n", "allow multiple queries in the same file") ("obfuscate", "obfuscate instead of formatting") ("backslash", "add a backslash at the end of each line of the formatted query") + ("allow_settings_after_format_in_insert", "Allow SETTINGS after FORMAT, but note, that this is not always safe") ("seed", po::value(), "seed (arbitrary string) that determines the result of obfuscation") ; @@ -83,6 +84,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) bool multiple = options.count("multiquery"); bool obfuscate = options.count("obfuscate"); bool backslash = options.count("backslash"); + bool allow_settings_after_format_in_insert = options.count("allow_settings_after_format_in_insert"); if (quiet && (hilite || oneline || obfuscate)) { @@ -154,7 +156,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) const char * pos = query.data(); const char * end = pos + query.size(); - ParserQuery parser(end); + ParserQuery parser(end, allow_settings_after_format_in_insert); do { ASTPtr res = parseQueryAndMovePosition( diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 0b5a7724fe5..fc9187cb622 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1639,6 +1639,8 @@ int Server::main(const std::vector & /*args*/) server.start(); LOG_INFO(log, "Listening for {}", server.getDescription()); } + + global_context->setServerCompletelyStarted(); LOG_INFO(log, "Ready for connections."); } diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index fb5eafbe679..accfa0ad33d 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -102,6 +102,7 @@ enum class AccessType \ M(KILL_QUERY, "", GLOBAL, ALL) /* allows to kill a query started by another user (anyone can kill his own queries) */\ + M(KILL_TRANSACTION, "", GLOBAL, ALL) \ \ M(MOVE_PARTITION_BETWEEN_SHARDS, "", GLOBAL, ALL) /* required to be able to move a part/partition to a table identified by its ZooKeeper path */\ diff --git a/src/Backups/ArchiveBackup.cpp b/src/Backups/ArchiveBackup.cpp index 5d4837fff2e..0c4b0c3cd40 100644 --- a/src/Backups/ArchiveBackup.cpp +++ b/src/Backups/ArchiveBackup.cpp @@ -42,6 +42,14 @@ void ArchiveBackup::openImpl(OpenMode open_mode_) /// mutex is already locked if (open_mode_ == OpenMode::WRITE) { + /// Create a directory to contain the archive. + auto dir_path = fs::path(path).parent_path(); + if (disk) + disk->createDirectories(dir_path); + else + std::filesystem::create_directories(dir_path); + + /// Start writing the archive. if (disk) writer = createArchiveWriter(path, disk->writeFile(path)); else @@ -65,7 +73,7 @@ void ArchiveBackup::openImpl(OpenMode open_mode_) } } -void ArchiveBackup::closeImpl(bool writing_finalized_) +void ArchiveBackup::closeImpl(const Strings &, bool writing_finalized_) { /// mutex is already locked if (writer && writer->isWritingFile()) diff --git a/src/Backups/ArchiveBackup.h b/src/Backups/ArchiveBackup.h index 9649c0c1843..d947fa16beb 100644 --- a/src/Backups/ArchiveBackup.h +++ b/src/Backups/ArchiveBackup.h @@ -35,7 +35,7 @@ public: private: bool backupExists() const override; void openImpl(OpenMode open_mode_) override; - void closeImpl(bool writing_finalized_) override; + void closeImpl(const Strings & written_files_, bool writing_finalized_) override; bool supportsWritingInMultipleThreads() const override { return false; } std::unique_ptr readFileImpl(const String & file_name) const override; std::unique_ptr writeFileImpl(const String & file_name) override; diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index e4fc894837a..21300f2dbae 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -107,6 +107,7 @@ void BackupImpl::open(OpenMode open_mode_) timestamp = std::time(nullptr); uuid = UUIDHelpers::generateV4(); writing_finalized = false; + written_files.clear(); } if (open_mode_ == OpenMode::READ) @@ -145,7 +146,7 @@ void BackupImpl::close() if (open_mode == OpenMode::NONE) return; - closeImpl(writing_finalized); + closeImpl(written_files, writing_finalized); uuid = UUIDHelpers::Nil; timestamp = 0; @@ -202,9 +203,12 @@ void BackupImpl::writeBackupMetadata() config->setString(prefix + "checksum", getHexUIntLowercase(info.checksum)); if (info.base_size) { - config->setUInt(prefix + "base_size", info.base_size); - if (info.base_checksum != info.checksum) + config->setBool(prefix + "use_base", true); + if (info.base_size != info.size) + { + config->setUInt(prefix + "base_size", info.base_size); config->setString(prefix + "base_checksum", getHexUIntLowercase(info.base_checksum)); + } } } ++index; @@ -213,6 +217,7 @@ void BackupImpl::writeBackupMetadata() std::ostringstream stream; // STYLE_CHECK_ALLOW_STD_STRING_STREAM config->save(stream); String str = stream.str(); + written_files.push_back(".backup"); auto out = writeFileImpl(".backup"); out->write(str.data(), str.size()); } @@ -253,13 +258,14 @@ void BackupImpl::readBackupMetadata() if (info.size) { info.checksum = unhexChecksum(config->getString(prefix + "checksum")); - info.base_size = config->getUInt(prefix + "base_size", 0); + bool use_base = config->getBool(prefix + "use_base", false); + info.base_size = config->getUInt(prefix + "base_size", use_base ? info.size : 0); if (info.base_size) { - if (config->has(prefix + "base_checksum")) - info.base_checksum = unhexChecksum(config->getString(prefix + "base_checksum")); - else + if (info.base_size == info.size) info.base_checksum = info.checksum; + else + info.base_checksum = unhexChecksum(config->getString(prefix + "base_checksum")); } } file_infos.emplace(name, info); @@ -345,11 +351,6 @@ BackupEntryPtr BackupImpl::readFile(const String & file_name) const return std::make_unique(nullptr, 0, UInt128{0, 0}); } - auto read_callback = [backup = std::static_pointer_cast(shared_from_this()), file_name]() - { - return backup->readFileImpl(file_name); - }; - if (!info.base_size) { /// Data goes completely from this backup, the base backup isn't used. @@ -526,6 +527,7 @@ void BackupImpl::writeFile(const String & file_name, BackupEntryPtr entry) } /// Copy the entry's data after `copy_pos`. + written_files.push_back(file_name); auto out = writeFileImpl(file_name); copyData(*read_buffer, *out); diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h index d1fc3c3248c..597b025d0ef 100644 --- a/src/Backups/BackupImpl.h +++ b/src/Backups/BackupImpl.h @@ -47,7 +47,7 @@ protected: virtual void openImpl(OpenMode open_mode_) = 0; OpenMode getOpenModeNoLock() const { return open_mode; } - virtual void closeImpl(bool writing_finalized_) = 0; + virtual void closeImpl(const Strings & written_files_, bool writing_finalized_) = 0; /// Read a file from the backup. /// Low level: the function doesn't check base backup or checksums. @@ -86,6 +86,7 @@ private: std::optional base_backup_uuid; std::map file_infos; /// Should be ordered alphabetically, see listFiles(). std::unordered_map file_checksums; + Strings written_files; bool writing_finalized = false; }; diff --git a/src/Backups/BackupInfo.cpp b/src/Backups/BackupInfo.cpp index ba953b818c2..cab08e306d6 100644 --- a/src/Backups/BackupInfo.cpp +++ b/src/Backups/BackupInfo.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -23,7 +24,11 @@ String BackupInfo::toString() const auto list = std::make_shared(); func->arguments = list; func->children.push_back(list); - list->children.reserve(args.size()); + list->children.reserve(args.size() + !id_arg.empty()); + + if (!id_arg.empty()) + list->children.push_back(std::make_shared(id_arg)); + for (const auto & arg : args) list->children.push_back(std::make_shared(arg)); @@ -53,9 +58,22 @@ BackupInfo BackupInfo::fromAST(const IAST & ast) const auto * list = func->arguments->as(); if (!list) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected list, got {}", serializeAST(*func->arguments)); - res.args.reserve(list->children.size()); - for (const auto & elem : list->children) + + size_t index = 0; + if (!list->children.empty()) { + const auto * id = list->children[0]->as(); + if (id) + { + res.id_arg = id->name(); + ++index; + } + } + + res.args.reserve(list->children.size() - index); + for (; index < list->children.size(); ++index) + { + const auto & elem = list->children[index]; const auto * lit = elem->as(); if (!lit) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected literal, got {}", serializeAST(*elem)); diff --git a/src/Backups/BackupInfo.h b/src/Backups/BackupInfo.h index 9b7d03c6d6a..5b5c676ecf1 100644 --- a/src/Backups/BackupInfo.h +++ b/src/Backups/BackupInfo.h @@ -11,6 +11,7 @@ class IAST; struct BackupInfo { String backup_engine_name; + String id_arg; std::vector args; String toString() const; diff --git a/src/Backups/DirectoryBackup.cpp b/src/Backups/DirectoryBackup.cpp index dc4d098dbe9..0deb41c200d 100644 --- a/src/Backups/DirectoryBackup.cpp +++ b/src/Backups/DirectoryBackup.cpp @@ -1,16 +1,9 @@ #include -#include -#include #include namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - DirectoryBackup::DirectoryBackup( const String & backup_name_, @@ -19,23 +12,16 @@ DirectoryBackup::DirectoryBackup( const ContextPtr & context_, const std::optional & base_backup_info_) : BackupImpl(backup_name_, context_, base_backup_info_) - , disk(disk_), path(path_) + , disk(disk_) { - /// Path to backup must end with '/' - if (!path.ends_with("/")) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Backup {}: Path to backup must end with '/', but {} doesn't.", getName(), quoteString(path)); - dir_path = fs::path(path).parent_path(); /// get path without terminating slash + /// Remove terminating slash. + path = (std::filesystem::path(path_) / "").parent_path(); /// If `disk` is not specified, we create an internal instance of `DiskLocal` here. if (!disk) { - auto fspath = fs::path{dir_path}; - if (!fspath.has_filename()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Backup {}: Path to a backup must be a directory path.", getName(), quoteString(path)); - path = fspath.filename() / ""; - dir_path = fs::path(path).parent_path(); /// get path without terminating slash - String disk_path = fspath.remove_filename(); - disk = std::make_shared(disk_path, disk_path, 0); + disk = std::make_shared(path, path, 0); + path = "."; } } @@ -47,34 +33,38 @@ DirectoryBackup::~DirectoryBackup() bool DirectoryBackup::backupExists() const { - return disk->isDirectory(dir_path); + return disk->isDirectory(path); } void DirectoryBackup::openImpl(OpenMode open_mode_) { if (open_mode_ == OpenMode::WRITE) - disk->createDirectories(dir_path); + disk->createDirectories(path); } -void DirectoryBackup::closeImpl(bool writing_finalized_) +void DirectoryBackup::closeImpl(const Strings & written_files_, bool writing_finalized_) { - if ((getOpenModeNoLock() == OpenMode::WRITE) && !writing_finalized_ && disk->isDirectory(dir_path)) + if ((getOpenModeNoLock() == OpenMode::WRITE) && !writing_finalized_ && !written_files_.empty()) { /// Creating of the backup wasn't finished correctly, /// so the backup cannot be used and it's better to remove its files. - disk->removeRecursive(dir_path); + const auto & files_to_delete = written_files_; + for (const String & file_name : files_to_delete) + disk->removeFileIfExists(path / file_name); + if (disk->isDirectory(path) && disk->isDirectoryEmpty(path)) + disk->removeDirectory(path); } } std::unique_ptr DirectoryBackup::readFileImpl(const String & file_name) const { - String file_path = path + file_name; + auto file_path = path / file_name; return disk->readFile(file_path); } std::unique_ptr DirectoryBackup::writeFileImpl(const String & file_name) { - String file_path = path + file_name; + auto file_path = path / file_name; disk->createDirectories(fs::path(file_path).parent_path()); return disk->writeFile(file_path); } diff --git a/src/Backups/DirectoryBackup.h b/src/Backups/DirectoryBackup.h index 7d9b5cc4557..499a1893dca 100644 --- a/src/Backups/DirectoryBackup.h +++ b/src/Backups/DirectoryBackup.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -25,13 +26,12 @@ public: private: bool backupExists() const override; void openImpl(OpenMode open_mode_) override; - void closeImpl(bool writing_finalized_) override; + void closeImpl(const Strings & written_files_, bool writing_finalized_) override; std::unique_ptr readFileImpl(const String & file_name) const override; std::unique_ptr writeFileImpl(const String & file_name) override; DiskPtr disk; - String path; - String dir_path; /// `path` without terminating slash + std::filesystem::path path; }; } diff --git a/src/Backups/registerBackupEnginesFileAndDisk.cpp b/src/Backups/registerBackupEnginesFileAndDisk.cpp index 6a34d67115d..e3b06a21d96 100644 --- a/src/Backups/registerBackupEnginesFileAndDisk.cpp +++ b/src/Backups/registerBackupEnginesFileAndDisk.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -13,8 +14,9 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INVALID_CONFIG_PARAMETER; + extern const int LOGICAL_ERROR; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -22,83 +24,70 @@ namespace { namespace fs = std::filesystem; - [[noreturn]] void throwDiskIsAllowed(const String & disk_name) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk {} is not allowed for backups", disk_name); - } - - [[noreturn]] void throwPathNotAllowed(const fs::path & path) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} is not allowed for backups", quoteString(String{path})); - } - - void checkAllowedPathInConfigIsValid(const String & key, const fs::path & value) - { - if (value.empty() || value.is_relative()) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Configuration parameter {} has a wrong value {}", key, String{value}); - } - - /// Checks that a disk name and a path specified as parameters of Disk() are valid. - void checkDiskNameAndPath(const String & disk_name, fs::path & path, const Poco::Util::AbstractConfiguration & config) + /// Checks that a disk name specified as parameters of Disk() is valid. + void checkDiskName(const String & disk_name, const Poco::Util::AbstractConfiguration & config) { String key = "backups.allowed_disk"; - bool disk_name_found = false; + if (!config.has(key)) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "The \"backups.allowed_disk\" configuration parameter is not set, cannot use Disk() backup engine"); + size_t counter = 0; - while (config.has(key)) + while (config.getString(key) != disk_name) { - if (config.getString(key) == disk_name) - { - disk_name_found = true; - break; - } key = "backups.allowed_disk[" + std::to_string(++counter) + "]"; + if (!config.has(key)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk {} is not allowed for backups, see the \"backups.allowed_disk\" configuration parameter", disk_name); } - - if (!disk_name_found) - throwDiskIsAllowed(disk_name); - - path = path.lexically_normal(); - if (!path.is_relative() || path.empty() || (*path.begin() == "..")) - throwPathNotAllowed(path); } - /// Checks that a path specified as a parameter of File() is valid. - void checkPath(fs::path & path, const Poco::Util::AbstractConfiguration & config) + /// Checks that a path specified as parameters of Disk() is valid. + void checkPath(const String & disk_name, const DiskPtr & disk, fs::path & path) { - String key = "backups.allowed_path"; + path = path.lexically_normal(); + if (!path.is_relative() && (disk->getType() == DiskType::Local)) + path = path.lexically_proximate(disk->getPath()); + bool path_ok = path.empty() || (path.is_relative() && (*path.begin() != "..")); + if (!path_ok) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} to backup must be inside the specified disk {}", quoteString(path.c_str()), disk_name); + } + + /// Checks that a path specified as parameters of File() is valid. + void checkPath(fs::path & path, const Poco::Util::AbstractConfiguration & config, const fs::path & data_dir) { path = path.lexically_normal(); if (path.empty()) - throwPathNotAllowed(path); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to backup must not be empty"); + + String key = "backups.allowed_path"; + if (!config.has(key)) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "The \"backups.allowed_path\" configuration parameter is not set, cannot use File() backup engine"); if (path.is_relative()) { - if (*path.begin() == "..") - throwPathNotAllowed(path); + auto first_allowed_path = fs::path(config.getString(key)); + if (first_allowed_path.is_relative()) + first_allowed_path = data_dir / first_allowed_path; - auto base = fs::path(config.getString(key, "")); - checkAllowedPathInConfigIsValid(key, base); - path = base / path; - return; + path = first_allowed_path / path; } - bool path_found_in_config = false; size_t counter = 0; - while (config.has(key)) + while (true) { - auto base = fs::path(config.getString(key)); - checkAllowedPathInConfigIsValid(key, base); - auto rel = path.lexically_relative(base); - if (!rel.empty() && (*rel.begin() != "..")) - { - path_found_in_config = true; + auto allowed_path = fs::path(config.getString(key)); + if (allowed_path.is_relative()) + allowed_path = data_dir / allowed_path; + auto rel = path.lexically_proximate(allowed_path); + bool path_ok = rel.empty() || (rel.is_relative() && (*rel.begin() != "..")); + if (path_ok) break; - } key = "backups.allowed_path[" + std::to_string(++counter) + "]"; + if (!config.has(key)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Path {} is not allowed for backups, see the \"backups.allowed_path\" configuration parameter", + quoteString(path.c_str())); } - - if (!path_found_in_config) - throwPathNotAllowed(path); } } @@ -109,6 +98,15 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory) { String backup_name = params.backup_info.toString(); const String & engine_name = params.backup_info.backup_engine_name; + + if (!params.backup_info.id_arg.empty()) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Backup engine '{}' requires first argument to be a string", + engine_name); + } + const auto & args = params.backup_info.args; DiskPtr disk; @@ -123,7 +121,9 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory) } path = args[0].safeGet(); - checkPath(path, params.context->getConfigRef()); + const auto & config = params.context->getConfigRef(); + const auto & data_dir = params.context->getPath(); + checkPath(path, config, data_dir); } else if (engine_name == "Disk") { @@ -135,30 +135,28 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory) } String disk_name = args[0].safeGet(); + const auto & config = params.context->getConfigRef(); + checkDiskName(disk_name, config); path = args[1].safeGet(); - checkDiskNameAndPath(disk_name, path, params.context->getConfigRef()); disk = params.context->getDisk(disk_name); + checkPath(disk_name, disk, path); } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected backup engine '{}'", engine_name); - std::unique_ptr backup; - - if (!path.has_filename() && !path.empty()) - { - if (!params.password.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Password is not applicable, backup cannot be encrypted"); - backup = std::make_unique(backup_name, disk, path, params.context, params.base_backup_info); - } - else if (hasRegisteredArchiveFileExtension(path)) + if (hasRegisteredArchiveFileExtension(path)) { auto archive_backup = std::make_unique(backup_name, disk, path, params.context, params.base_backup_info); archive_backup->setCompression(params.compression_method, params.compression_level); archive_backup->setPassword(params.password); - backup = std::move(archive_backup); + return archive_backup; } else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to backup must be either a directory or a path to an archive"); - - return backup; + { + if (!params.password.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Password is not applicable, backup cannot be encrypted"); + return std::make_unique(backup_name, disk, path, params.context, params.base_backup_info); + } }; factory.registerBackupEngine("File", creator_fn); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 93ed1780e3b..9092cb16663 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -275,7 +275,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - ParserQuery parser(end); + ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -1129,7 +1129,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des sendDataFromPipe( storage->read( sample.getNames(), - storage->getStorageSnapshot(metadata), + storage->getStorageSnapshot(metadata, global_context), query_info, global_context, {}, diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h index 9dbe8d355d6..4888adb511a 100644 --- a/src/Common/CurrentThread.h +++ b/src/Common/CurrentThread.h @@ -91,6 +91,7 @@ public: struct QueryScope { explicit QueryScope(ContextMutablePtr query_context); + explicit QueryScope(ContextPtr query_context); ~QueryScope(); void logPeakMemoryUsage(); diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 2e60e125d73..3097af6207c 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -617,6 +617,8 @@ M(646, CANNOT_BACKUP_DATABASE) \ M(647, CANNOT_BACKUP_TABLE) \ M(648, WRONG_DDL_RENAMING_SETTINGS) \ + M(649, INVALID_TRANSACTION) \ + M(650, SERIALIZATION_ERROR) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index d648267b95d..05d32f5ffe4 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -31,13 +32,11 @@ namespace IFileCache::IFileCache( const String & cache_base_path_, - size_t max_size_, - size_t max_element_size_, - size_t max_file_segment_size_) + const FileCacheSettings & cache_settings_) : cache_base_path(cache_base_path_) - , max_size(max_size_) - , max_element_size(max_element_size_) - , max_file_segment_size(max_file_segment_size_) + , max_size(cache_settings_.max_size) + , max_element_size(cache_settings_.max_elements) + , max_file_segment_size(cache_settings_.max_file_segment_size) { } @@ -58,7 +57,7 @@ String IFileCache::getPathInLocalCache(const Key & key) return fs::path(cache_base_path) / key_str.substr(0, 3) / key_str; } -bool IFileCache::shouldBypassCache() +bool IFileCache::isReadOnly() { return !CurrentThread::isInitialized() || !CurrentThread::get().getQueryContext() @@ -71,8 +70,8 @@ void IFileCache::assertInitialized() const throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cache not initialized"); } -LRUFileCache::LRUFileCache(const String & cache_base_path_, size_t max_size_, size_t max_element_size_, size_t max_file_segment_size_) - : IFileCache(cache_base_path_, max_size_, max_element_size_, max_file_segment_size_) +LRUFileCache::LRUFileCache(const String & cache_base_path_, const FileCacheSettings & cache_settings_) + : IFileCache(cache_base_path_, cache_settings_) , log(&Poco::Logger::get("LRUFileCache")) { } @@ -205,8 +204,8 @@ FileSegments LRUFileCache::getImpl( return result; } -FileSegments LRUFileCache::splitRangeIntoEmptyCells( - const Key & key, size_t offset, size_t size, std::lock_guard & cache_lock) +FileSegments LRUFileCache::splitRangeIntoCells( + const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard & cache_lock) { assert(size > 0); @@ -222,9 +221,10 @@ FileSegments LRUFileCache::splitRangeIntoEmptyCells( current_cell_size = std::min(remaining_size, max_file_segment_size); remaining_size -= current_cell_size; - auto * cell = addCell(key, current_pos, current_cell_size, FileSegment::State::EMPTY, cache_lock); + auto * cell = addCell(key, current_pos, current_cell_size, state, cache_lock); if (cell) file_segments.push_back(cell->file_segment); + assert(cell); current_pos += current_cell_size; } @@ -250,7 +250,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t if (file_segments.empty()) { - file_segments = splitRangeIntoEmptyCells(key, offset, size, cache_lock); + file_segments = splitRangeIntoCells(key, offset, size, FileSegment::State::EMPTY, cache_lock); } else { @@ -295,7 +295,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t assert(current_pos < segment_range.left); auto hole_size = segment_range.left - current_pos; - file_segments.splice(it, splitRangeIntoEmptyCells(key, current_pos, hole_size, cache_lock)); + file_segments.splice(it, splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock)); current_pos = segment_range.right + 1; ++it; @@ -309,7 +309,7 @@ FileSegmentsHolder LRUFileCache::getOrSet(const Key & key, size_t offset, size_t /// segmentN auto hole_size = range.right - current_pos + 1; - file_segments.splice(file_segments.end(), splitRangeIntoEmptyCells(key, current_pos, hole_size, cache_lock)); + file_segments.splice(file_segments.end(), splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, cache_lock)); } } @@ -354,6 +354,21 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( return &(it->second); } +FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset, size_t size) +{ + std::lock_guard cache_lock(mutex); + + auto * cell = getCell(key, offset, cache_lock); + if (cell) + throw Exception( + ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + "Cache cell already exists for key `{}` and offset {}", + keyToStr(key), offset); + + auto file_segments = splitRangeIntoCells(key, offset, size, FileSegment::State::DOWNLOADING, cache_lock); + return FileSegmentsHolder(std::move(file_segments)); +} + bool LRUFileCache::tryReserve( const Key & key_, size_t offset_, size_t size, std::lock_guard & cache_lock) { @@ -372,7 +387,8 @@ bool LRUFileCache::tryReserve( auto is_overflow = [&] { - return (current_size + size - removed_size > max_size) + /// max_size == 0 means unlimited cache size, max_element_size means unlimited number of cache elements. + return (max_size != 0 && current_size + size - removed_size > max_size) || (max_element_size != 0 && queue_size > max_element_size); }; @@ -484,6 +500,30 @@ void LRUFileCache::remove(const Key & key) fs::remove(key_path); } +void LRUFileCache::tryRemoveAll() +{ + /// Try remove all cached files by cache_base_path. + /// Only releasable file segments are evicted. + + std::lock_guard cache_lock(mutex); + + for (auto it = queue.begin(); it != queue.end();) + { + auto & [key, offset] = *it++; + + auto * cell = getCell(key, offset, cache_lock); + if (cell->releasable()) + { + auto file_segment = cell->file_segment; + if (file_segment) + { + std::lock_guard segment_lock(file_segment->mutex); + remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock); + } + } + } +} + void LRUFileCache::remove( Key key, size_t offset, std::lock_guard & cache_lock, std::lock_guard & /* segment_lock */) @@ -668,6 +708,38 @@ bool LRUFileCache::isLastFileSegmentHolder( return cell->file_segment.use_count() == 2; } +FileSegments LRUFileCache::getSnapshot() const +{ + std::lock_guard cache_lock(mutex); + + FileSegments file_segments; + + for (const auto & [key, cells_by_offset] : files) + { + for (const auto & [offset, cell] : cells_by_offset) + file_segments.push_back(FileSegment::getSnapshot(cell.file_segment, cache_lock)); + } + + return file_segments; +} + +std::vector LRUFileCache::tryGetCachePaths(const Key & key) +{ + std::lock_guard cache_lock(mutex); + + std::vector cache_paths; + + const auto & cells_by_offset = files[key]; + + for (const auto & [offset, cell] : cells_by_offset) + { + if (cell.file_segment->state() == FileSegment::State::DOWNLOADED) + cache_paths.push_back(getPathInLocalCache(key, offset)); + } + + return cache_paths; +} + LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRUQueue & queue_) : file_segment(file_segment_) { @@ -685,12 +757,13 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(FileSegmentPtr file_segment_, LRU break; } case FileSegment::State::EMPTY: + case FileSegment::State::DOWNLOADING: { break; } default: throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, - "Can create cell with either DOWNLOADED or EMPTY state, got: {}", + "Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state, got: {}", FileSegment::stateToString(file_segment->download_state)); } } diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index d58711cef0a..e706376bc89 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -33,9 +33,7 @@ public: IFileCache( const String & cache_base_path_, - size_t max_size_, - size_t max_element_size_, - size_t max_file_segment_size_); + const FileCacheSettings & cache_settings_); virtual ~IFileCache() = default; @@ -44,7 +42,9 @@ public: virtual void remove(const Key & key) = 0; - static bool shouldBypassCache(); + virtual void tryRemoveAll() = 0; + + static bool isReadOnly(); /// Cache capacity in bytes. size_t capacity() const { return max_size; } @@ -55,6 +55,10 @@ public: String getPathInLocalCache(const Key & key); + const String & getBasePath() const { return cache_base_path; } + + virtual std::vector tryGetCachePaths(const Key & key) = 0; + /** * Given an `offset` and `size` representing [offset, offset + size) bytes interval, * return list of cached non-overlapping non-empty @@ -68,6 +72,10 @@ public: */ virtual FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) = 0; + virtual FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) = 0; + + virtual FileSegments getSnapshot() const = 0; + /// For debug. virtual String dumpStructure(const Key & key) = 0; @@ -112,16 +120,22 @@ class LRUFileCache final : public IFileCache public: LRUFileCache( const String & cache_base_path_, - size_t max_size_, - size_t max_element_size_ = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS, - size_t max_file_segment_size_ = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE); + const FileCacheSettings & cache_settings_); FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) override; + FileSegments getSnapshot() const override; + + FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) override; + void initialize() override; void remove(const Key & key) override; + void tryRemoveAll() override; + + std::vector tryGetCachePaths(const Key & key) override; + private: using FileKeyAndOffset = std::pair; using LRUQueue = std::list; @@ -194,8 +208,8 @@ private: void loadCacheInfoIntoMemory(); - FileSegments splitRangeIntoEmptyCells( - const Key & key, size_t offset, size_t size, std::lock_guard & cache_lock); + FileSegments splitRangeIntoCells( + const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard & cache_lock); String dumpStructureImpl(const Key & key_, std::lock_guard & cache_lock); diff --git a/src/Common/FileCacheFactory.cpp b/src/Common/FileCacheFactory.cpp index fc8dff0b26c..9eadea05547 100644 --- a/src/Common/FileCacheFactory.cpp +++ b/src/Common/FileCacheFactory.cpp @@ -15,28 +15,53 @@ FileCacheFactory & FileCacheFactory::instance() return ret; } -FileCachePtr FileCacheFactory::getImpl(const std::string & cache_base_path, std::lock_guard &) +FileCacheFactory::CacheByBasePath FileCacheFactory::getAll() +{ + std::lock_guard lock(mutex); + return caches; +} + +const FileCacheSettings & FileCacheFactory::getSettings(const std::string & cache_base_path) +{ + std::lock_guard lock(mutex); + + auto * cache_data = getImpl(cache_base_path, lock); + if (cache_data) + return cache_data->settings; + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); +} + +FileCacheFactory::CacheData * FileCacheFactory::getImpl(const std::string & cache_base_path, std::lock_guard &) { auto it = caches.find(cache_base_path); if (it == caches.end()) return nullptr; - return it->second; + return &it->second; +} + +FileCachePtr FileCacheFactory::get(const std::string & cache_base_path) +{ + std::lock_guard lock(mutex); + + auto * cache_data = getImpl(cache_base_path, lock); + if (cache_data) + return cache_data->cache; + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No cache found by path: {}", cache_base_path); } FileCachePtr FileCacheFactory::getOrCreate( - const std::string & cache_base_path, size_t max_size, size_t max_elements_size, size_t max_file_segment_size) + const std::string & cache_base_path, const FileCacheSettings & file_cache_settings) { std::lock_guard lock(mutex); - auto cache = getImpl(cache_base_path, lock); - if (cache) - { - if (cache->capacity() != max_size) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cache with path `{}` already exists, but has different max size", cache_base_path); - return cache; - } - cache = std::make_shared(cache_base_path, max_size, max_elements_size, max_file_segment_size); - caches.emplace(cache_base_path, cache); + auto * cache_data = getImpl(cache_base_path, lock); + if (cache_data) + return cache_data->cache; + + auto cache = std::make_shared(cache_base_path, file_cache_settings); + caches.emplace(cache_base_path, CacheData(cache, file_cache_settings)); return cache; } diff --git a/src/Common/FileCacheFactory.h b/src/Common/FileCacheFactory.h index f2432f03cae..3518f487b6d 100644 --- a/src/Common/FileCacheFactory.h +++ b/src/Common/FileCacheFactory.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -14,16 +15,32 @@ namespace DB */ class FileCacheFactory final : private boost::noncopyable { + struct CacheData + { + FileCachePtr cache; + FileCacheSettings settings; + + CacheData(FileCachePtr cache_, const FileCacheSettings & settings_) : cache(cache_), settings(settings_) {} + }; + + using CacheByBasePath = std::unordered_map; + public: static FileCacheFactory & instance(); - FileCachePtr getOrCreate(const std::string & cache_base_path, size_t max_size, size_t max_elements_size, size_t max_file_segment_size); + FileCachePtr getOrCreate(const std::string & cache_base_path, const FileCacheSettings & file_cache_settings); + + FileCachePtr get(const std::string & cache_base_path); + + CacheByBasePath getAll(); + + const FileCacheSettings & getSettings(const std::string & cache_base_path); private: - FileCachePtr getImpl(const std::string & cache_base_path, std::lock_guard &); + CacheData * getImpl(const std::string & cache_base_path, std::lock_guard &); std::mutex mutex; - std::unordered_map caches; + CacheByBasePath caches; }; } diff --git a/src/Common/FileCacheSettings.cpp b/src/Common/FileCacheSettings.cpp new file mode 100644 index 00000000000..f555de277b2 --- /dev/null +++ b/src/Common/FileCacheSettings.cpp @@ -0,0 +1,16 @@ +#include "FileCacheSettings.h" + +#include + +namespace DB +{ + +void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) +{ + max_size = config.getUInt64(config_prefix + ".data_cache_max_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_CACHE_SIZE); + max_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS); + max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE); + cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false); +} + +} diff --git a/src/Common/FileCacheSettings.h b/src/Common/FileCacheSettings.h new file mode 100644 index 00000000000..0b34e1e3d82 --- /dev/null +++ b/src/Common/FileCacheSettings.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace Poco { namespace Util { class AbstractConfiguration; } } + +namespace DB +{ + +struct FileCacheSettings +{ + size_t max_size = 0; + size_t max_elements = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS; + size_t max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE; + bool cache_on_write_operations = false; + + void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); +}; + +} diff --git a/src/Common/FileCache_fwd.h b/src/Common/FileCache_fwd.h index cab1525600b..7448f0c8c89 100644 --- a/src/Common/FileCache_fwd.h +++ b/src/Common/FileCache_fwd.h @@ -4,10 +4,13 @@ namespace DB { +static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_CACHE_SIZE = 1024 * 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024; class IFileCache; using FileCachePtr = std::shared_ptr; +struct FileCacheSettings; + } diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index ac89721683e..5a13ea7d207 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -31,10 +31,34 @@ FileSegment::FileSegment( , log(&Poco::Logger::get("FileSegment")) #endif { - if (download_state == State::DOWNLOADED) - reserved_size = downloaded_size = size_; - else if (download_state != State::EMPTY) - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Can create cell with either DOWNLOADED or EMPTY state"); + /// On creation, file segment state can be EMPTY, DOWNLOADED, DOWNLOADING. + switch (download_state) + { + /// EMPTY is used when file segment is not in cache and + /// someone will _potentially_ want to download it (after calling getOrSetDownloader()). + case (State::EMPTY): + { + break; + } + /// DOWNLOADED is used either on initial cache metadata load into memory on server startup + /// or on reduceSizeToDownloaded() -- when file segment object is updated. + case (State::DOWNLOADED): + { + reserved_size = downloaded_size = size_; + break; + } + /// DOWNLOADING is used only for write-through caching (e.g. getOrSetDownloader() is not + /// needed, downloader is set on file segment creation). + case (State::DOWNLOADING): + { + downloader_id = getCallerId(); + break; + } + default: + { + throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state"); + } + } } FileSegment::State FileSegment::state() const @@ -49,6 +73,12 @@ size_t FileSegment::getDownloadOffset() const return range().left + getDownloadedSize(segment_lock); } +size_t FileSegment::getDownloadedSize() const +{ + std::lock_guard segment_lock(mutex); + return getDownloadedSize(segment_lock); +} + size_t FileSegment::getDownloadedSize(std::lock_guard & /* segment_lock */) const { if (download_state == State::DOWNLOADED) @@ -60,24 +90,15 @@ size_t FileSegment::getDownloadedSize(std::lock_guard & /* segment_l String FileSegment::getCallerId() { - return getCallerIdImpl(false); + return getCallerIdImpl(); } -String FileSegment::getCallerIdImpl(bool allow_non_strict_checking) +String FileSegment::getCallerIdImpl() { - if (IFileCache::shouldBypassCache()) - { - /// getCallerId() can be called from completeImpl(), which can be called from complete(). - /// complete() is called from destructor of CachedReadBufferFromRemoteFS when there is no query id anymore. - /// Allow non strict checking in this case. This works correctly as if getCallerIdImpl() is called from destructor, - /// then we know that caller is not a downloader, because downloader is reset each nextImpl() call either - /// manually or via SCOPE_EXIT. - - if (allow_non_strict_checking) - return "None"; - - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cannot use cache without query id"); - } + if (!CurrentThread::isInitialized() + || !CurrentThread::get().getQueryContext() + || CurrentThread::getQueryId().size == 0) + return "None:" + toString(getThreadId()); return CurrentThread::getQueryId().toString() + ":" + toString(getThreadId()); } @@ -136,7 +157,6 @@ String FileSegment::getDownloader() const bool FileSegment::isDownloader() const { std::lock_guard segment_lock(mutex); - LOG_TEST(log, "Checking for current downloader. Caller: {}, downloader: {}, current state: {}", getCallerId(), downloader_id, stateToString(download_state)); return getCallerId() == downloader_id; } @@ -221,15 +241,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) { std::lock_guard segment_lock(mutex); - auto info = getInfoForLogImpl(segment_lock); - e.addMessage("while writing into cache, info: " + info); + wrapWithCacheInfo(e, "while writing into cache", segment_lock); - LOG_ERROR(log, "Failed to write to cache. File segment info: {}", info); - - download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; - - cache_writer->finalize(); - cache_writer.reset(); + setDownloadFailed(segment_lock); cv.notify_all(); @@ -239,6 +253,77 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) assert(getDownloadOffset() == offset_ + size); } +void FileSegment::writeInMemory(const char * from, size_t size) +{ + if (!size) + throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Attempt to write zero size cache file"); + + if (availableSize() < size) + throw Exception( + ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + "Not enough space is reserved. Available: {}, expected: {}", availableSize(), size); + + std::lock_guard segment_lock(mutex); + + if (cache_writer) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer already initialized"); + + auto download_path = cache->getPathInLocalCache(key(), offset()); + cache_writer = std::make_unique(download_path, size + 1); + + try + { + cache_writer->write(from, size); + } + catch (Exception & e) + { + wrapWithCacheInfo(e, "while writing into cache", segment_lock); + + setDownloadFailed(segment_lock); + + cv.notify_all(); + + throw; + } +} + +size_t FileSegment::finalizeWrite() +{ + std::lock_guard segment_lock(mutex); + + if (!cache_writer) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer not initialized"); + + size_t size = cache_writer->offset(); + + if (size == 0) + throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing size is not allowed"); + + try + { + cache_writer->next(); + } + catch (Exception & e) + { + wrapWithCacheInfo(e, "while writing into cache", segment_lock); + + setDownloadFailed(segment_lock); + + cv.notify_all(); + + throw; + } + + downloaded_size += size; + + if (downloaded_size != range().size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected downloaded size to equal file segment size ({} == {})", downloaded_size, range().size()); + + setDownloaded(segment_lock); + + return size; +} + FileSegment::State FileSegment::wait() { std::unique_lock segment_lock(mutex); @@ -303,6 +388,20 @@ void FileSegment::setDownloaded(std::lock_guard & /* segment_lock */ { download_state = State::DOWNLOADED; is_downloaded = true; + downloader_id.clear(); + + if (cache_writer) + { + cache_writer->finalize(); + cache_writer.reset(); + remote_file_reader.reset(); + } +} + +void FileSegment::setDownloadFailed(std::lock_guard & /* segment_lock */) +{ + download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; + downloader_id.clear(); if (cache_writer) { @@ -360,7 +459,7 @@ void FileSegment::complete(State state) } catch (...) { - if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true)) + if (!downloader_id.empty() && downloader_id == getCallerIdImpl()) downloader_id.clear(); cv.notify_all(); @@ -385,7 +484,7 @@ void FileSegment::complete(std::lock_guard & cache_lock) /// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the /// downloader or the only owner of the segment. - bool can_update_segment_state = downloader_id == getCallerIdImpl(true) + bool can_update_segment_state = downloader_id == getCallerIdImpl() || cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); if (can_update_segment_state) @@ -394,11 +493,11 @@ void FileSegment::complete(std::lock_guard & cache_lock) try { - completeImpl(cache_lock, segment_lock, /* allow_non_strict_checking */true); + completeImpl(cache_lock, segment_lock); } catch (...) { - if (!downloader_id.empty() && downloader_id == getCallerIdImpl(true)) + if (!downloader_id.empty() && downloader_id == getCallerIdImpl()) downloader_id.clear(); cv.notify_all(); @@ -408,7 +507,7 @@ void FileSegment::complete(std::lock_guard & cache_lock) cv.notify_all(); } -void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lock_guard & segment_lock, bool allow_non_strict_checking) +void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lock_guard & segment_lock) { bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock); @@ -444,7 +543,7 @@ void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lo } } - if (!downloader_id.empty() && (downloader_id == getCallerIdImpl(allow_non_strict_checking) || is_last_holder)) + if (!downloader_id.empty() && (downloader_id == getCallerIdImpl() || is_last_holder)) { LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state)); downloader_id.clear(); @@ -471,6 +570,11 @@ String FileSegment::getInfoForLogImpl(std::lock_guard & segment_lock return info.str(); } +void FileSegment::wrapWithCacheInfo(Exception & e, const String & message, std::lock_guard & segment_lock) const +{ + e.addMessage(fmt::format("{}, current cache state: {}", message, getInfoForLogImpl(segment_lock))); +} + String FileSegment::stateToString(FileSegment::State state) { switch (state) @@ -504,6 +608,23 @@ void FileSegment::assertCorrectnessImpl(std::lock_guard & /* segment assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0); } +FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard & /* cache_lock */) +{ + auto snapshot = std::make_shared( + file_segment->offset(), + file_segment->range().size(), + file_segment->key(), + nullptr, + State::EMPTY); + + snapshot->hits_count = file_segment->getHitsCount(); + snapshot->ref_count = file_segment.use_count(); + snapshot->downloaded_size = file_segment->getDownloadedSize(); + snapshot->download_state = file_segment->state(); + + return snapshot; +} + FileSegmentsHolder::~FileSegmentsHolder() { /// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from diff --git a/src/Common/FileSegment.h b/src/Common/FileSegment.h index b7501640913..615fd9a56de 100644 --- a/src/Common/FileSegment.h +++ b/src/Common/FileSegment.h @@ -97,6 +97,15 @@ public: void write(const char * from, size_t size, size_t offset_); + /** + * writeInMemory and finalizeWrite are used together to write a single file with delay. + * Both can be called only once, one after another. Used for writing cache via threadpool + * on wrote operations. TODO: this solution is temporary, until adding a separate cache layer. + */ + void writeInMemory(const char * from, size_t size); + + size_t finalizeWrite(); + RemoteFileReaderPtr getRemoteFileReader(); void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_); @@ -117,14 +126,24 @@ public: size_t getDownloadOffset() const; + size_t getDownloadedSize() const; + void completeBatchAndResetDownloader(); void complete(State state); String getInfoForLog() const; + size_t getHitsCount() const { return hits_count; } + + size_t getRefCount() const { return ref_count; } + + void incrementHitsCount() { ++hits_count; } + void assertCorrectness() const; + static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard & cache_lock); + private: size_t availableSize() const { return reserved_size - downloaded_size; } @@ -133,6 +152,9 @@ private: void assertCorrectnessImpl(std::lock_guard & segment_lock) const; void setDownloaded(std::lock_guard & segment_lock); + void setDownloadFailed(std::lock_guard & segment_lock); + + void wrapWithCacheInfo(Exception & e, const String & message, std::lock_guard & segment_lock) const; bool lastFileSegmentHolder() const; @@ -144,9 +166,9 @@ private: void completeImpl( std::lock_guard & cache_lock, - std::lock_guard & segment_lock, bool allow_non_strict_checking = false); + std::lock_guard & segment_lock); - static String getCallerIdImpl(bool allow_non_strict_checking = false); + static String getCallerIdImpl(); void resetDownloaderImpl(std::lock_guard & segment_lock); @@ -180,6 +202,8 @@ private: bool detached = false; std::atomic is_downloaded{false}; + std::atomic hits_count = 0; /// cache hits. + std::atomic ref_count = 0; /// Used for getting snapshot state }; struct FileSegmentsHolder : private boost::noncopyable diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index e5991421633..88e6e8327b8 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index cfb4821691c..da3d6c24562 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -23,6 +23,7 @@ M(QueryViewsLogElement) \ M(SessionLogElement) \ M(TraceLogElement) \ + M(TransactionsInfoLogElement) \ M(ZooKeeperLogElement) \ M(ProcessorProfileLogElement) \ M(TextLogElement) diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index f3920474111..3d7ec08cdaf 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -216,6 +216,11 @@ public: return query_context.lock(); } + auto getGlobalContext() const + { + return global_context.lock(); + } + void disableProfiling() { assert(!query_profiler_real && !query_profiler_cpu); diff --git a/src/Common/TransactionID.cpp b/src/Common/TransactionID.cpp new file mode 100644 index 00000000000..8a9894fbe53 --- /dev/null +++ b/src/Common/TransactionID.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include + +namespace DB +{ + +TIDHash TransactionID::getHash() const +{ + SipHash hash; + hash.update(start_csn); + hash.update(local_tid); + hash.update(host_id); + return hash.get64(); +} + + +void TransactionID::write(const TransactionID & tid, WriteBuffer & buf) +{ + writeChar('(', buf); + writeText(tid.start_csn, buf); + writeCString(", ", buf); + writeText(tid.local_tid, buf); + writeCString(", ", buf); + writeText(tid.host_id, buf); + writeChar(')', buf); +} + +TransactionID TransactionID::read(ReadBuffer & buf) +{ + TransactionID tid = Tx::EmptyTID; + assertChar('(', buf); + readText(tid.start_csn, buf); + assertString(", ", buf); + readText(tid.local_tid, buf); + assertString(", ", buf); + readText(tid.host_id, buf); + assertChar(')', buf); + return tid; +} + +} diff --git a/src/Common/TransactionID.h b/src/Common/TransactionID.h new file mode 100644 index 00000000000..3ab86f7589c --- /dev/null +++ b/src/Common/TransactionID.h @@ -0,0 +1,115 @@ +#pragma once +#include +#include +#include +#include + +namespace DB +{ + +class IDataType; +using DataTypePtr = std::shared_ptr; +class MergeTreeTransaction; + +/// This macro is useful for places where a pointer to current transaction should be passed, +/// but transactions are not supported yet (e.g. when calling MergeTreeData's methods from StorageReplicatedMergeTree) +/// or transaction object is not needed and not passed intentionally. +#ifndef NO_TRANSACTION_PTR +#define NO_TRANSACTION_PTR std::shared_ptr(nullptr) +#define NO_TRANSACTION_RAW static_cast(nullptr) +#endif + +/// Commit Sequence Number +using CSN = UInt64; +/// Local part of TransactionID +using LocalTID = UInt64; +/// Hash of TransactionID that fits into 64-bit atomic +using TIDHash = UInt64; + +namespace Tx +{ + /// For transactions that are probably not committed (yet) + const CSN UnknownCSN = 0; + /// For changes were made without creating a transaction + const CSN PrehistoricCSN = 1; + /// Special reserved values + const CSN CommittingCSN = 2; + const CSN EverythingVisibleCSN = 3; + const CSN MaxReservedCSN = 32; + + /// So far, that changes will never become visible + const CSN RolledBackCSN = std::numeric_limits::max(); + + const LocalTID PrehistoricLocalTID = 1; + const LocalTID DummyLocalTID = 2; + const LocalTID MaxReservedLocalTID = 32; +} + +struct TransactionID +{ + /// Global sequential number, the newest commit timestamp the we saw when this transaction began + CSN start_csn = 0; + /// Local sequential that is unique for each transaction started by this host within specific start_csn + LocalTID local_tid = 0; + /// UUID of host that has started this transaction + UUID host_id = UUIDHelpers::Nil; + + /// NOTE Maybe we could just generate UUIDv4 for each transaction, but it would be harder to debug. + /// Partial order is defined for this TransactionID structure: + /// (tid1.start_csn <= tid2.start_csn) <==> (tid1 <= tid2) + /// (tid1.start_csn == tid2.start_csn && tid1.host_id == tid2.host_id && tid1.local_tid < tid2.local_tid) ==> (tid1 < tid2) + /// If two transaction have the same start_csn, but were started by different hosts, then order is undefined. + + bool operator == (const TransactionID & rhs) const + { + return start_csn == rhs.start_csn && local_tid == rhs.local_tid && host_id == rhs.host_id; + } + + bool operator != (const TransactionID & rhs) const + { + return !(*this == rhs); + } + + TIDHash getHash() const; + + bool isEmpty() const + { + assert((local_tid == 0) == (start_csn == 0 && host_id == UUIDHelpers::Nil)); + return local_tid == 0; + } + + bool isPrehistoric() const + { + assert((local_tid == Tx::PrehistoricLocalTID) == (start_csn == Tx::PrehistoricCSN)); + return local_tid == Tx::PrehistoricLocalTID; + } + + + static void write(const TransactionID & tid, WriteBuffer & buf); + static TransactionID read(ReadBuffer & buf); +}; + +namespace Tx +{ + const TransactionID EmptyTID = {0, 0, UUIDHelpers::Nil}; + const TransactionID PrehistoricTID = {PrehistoricCSN, PrehistoricLocalTID, UUIDHelpers::Nil}; + const TransactionID DummyTID = {PrehistoricCSN, DummyLocalTID, UUIDHelpers::Nil}; +} + +} + +template<> +struct fmt::formatter +{ + template + constexpr auto parse(ParseContext & context) + { + return context.begin(); + } + + template + auto format(const DB::TransactionID & tid, FormatContext & context) + { + return fmt::format_to(context.out(), "({}, {}, {})", tid.start_csn, tid.local_tid, tid.host_id); + } +}; diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index aae3b6d4191..0f4b141d058 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -1270,4 +1270,14 @@ String extractZooKeeperPath(const String & path, bool check_starts_with_slash, P return normalizeZooKeeperPath(path, check_starts_with_slash, log); } +String getSequentialNodeName(const String & prefix, UInt64 number) +{ + /// NOTE Sequential counter in ZooKeeper is Int32. + assert(number < std::numeric_limits::max()); + constexpr size_t seq_node_digits = 10; + String num_str = std::to_string(number); + String name = prefix + String(seq_node_digits - num_str.size(), '0') + num_str; + return name; +} + } diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 0f7eccd2547..4d5bd039a55 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -417,4 +417,6 @@ String extractZooKeeperName(const String & path); String extractZooKeeperPath(const String & path, bool check_starts_with_slash, Poco::Logger * log = nullptr); +String getSequentialNodeName(const String & prefix, UInt64 number); + } diff --git a/src/Common/tests/gtest_lru_file_cache.cpp b/src/Common/tests/gtest_lru_file_cache.cpp index c08b12857a1..dfcf51ddf2f 100644 --- a/src/Common/tests/gtest_lru_file_cache.cpp +++ b/src/Common/tests/gtest_lru_file_cache.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -102,7 +103,10 @@ TEST(LRUFileCache, get) query_context->setCurrentQueryId("query_id"); DB::CurrentThread::QueryScope query_scope_holder(query_context); - auto cache = DB::LRUFileCache(cache_base_path, 30, 5); + DB::FileCacheSettings settings; + settings.max_size = 30; + settings.max_elements = 5; + auto cache = DB::LRUFileCache(cache_base_path, settings); cache.initialize(); auto key = cache.hash("key1"); @@ -472,7 +476,7 @@ TEST(LRUFileCache, get) { /// Test LRUCache::restore(). - auto cache2 = DB::LRUFileCache(cache_base_path, 30, 5); + auto cache2 = DB::LRUFileCache(cache_base_path, settings); cache2.initialize(); ASSERT_EQ(cache2.getStat().downloaded_size, 5); @@ -491,7 +495,9 @@ TEST(LRUFileCache, get) { /// Test max file segment size - auto cache2 = DB::LRUFileCache(caches_dir / "cache2", 30, 5, /* max_file_segment_size */10); + auto settings2 = settings; + settings2.max_file_segment_size = 10; + auto cache2 = DB::LRUFileCache(caches_dir / "cache2", settings2); cache2.initialize(); auto holder1 = cache2.getOrSet(key, 0, 25); /// Get [0, 24] diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2cbfe97cde5..aa78456702c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -465,6 +465,7 @@ class IColumn; M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ + M(Bool, allow_settings_after_format_in_insert, false, "Allow SETTINGS after FORMAT, but note, that this is not always safe (note: this is a compatibility setting).", 0) \ M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \ M(Seconds, periodic_live_view_refresh, DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC, "Interval after which periodically refreshed live view is forced to refresh.", 0) \ M(Bool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \ @@ -560,8 +561,10 @@ class IColumn; \ M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \ M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \ - M(Bool, remote_fs_enable_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \ - M(UInt64, remote_fs_cache_max_wait_sec, 5, "Allow to wait at most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \ + M(Bool, enable_filesystem_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \ + M(UInt64, filesystem_cache_max_wait_sec, 5, "Allow to wait at most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \ + M(Bool, enable_filesystem_cache_on_write_operations, false, "Write into cache on write operations. To actually work this setting requires be added to disk config too", 0) \ + M(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache, false, "", 0) \ \ M(UInt64, http_max_tries, 10, "Max attempts to read via http.", 0) \ M(UInt64, http_retry_initial_backoff_ms, 100, "Min milliseconds for backoff, when retrying read via http", 0) \ @@ -578,6 +581,7 @@ class IColumn; M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \ + M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. @@ -638,6 +642,12 @@ class IColumn; M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \ M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \ M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \ + M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \ + M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \ + M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format Parquet", 0) \ + M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format ORC", 0) \ + M(Bool, input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format Arrow", 0) \ + M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \ M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \ \ M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \ diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 1168013488e..7fe54c12665 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -165,7 +165,7 @@ DECLARE_SETTING_ENUM(DistributedDDLOutputMode) enum class HandleKafkaErrorMode { - DEFAULT = 0, // Ignore errors whit threshold. + DEFAULT = 0, // Ignore errors with threshold. STREAM, // Put errors to stream in the virtual column named ``_error. /*FIXED_SYSTEM_TABLE, Put errors to in a fixed system table likey system.kafka_errors. This is not implemented now. */ /*CUSTOM_SYSTEM_TABLE, Put errors to in a custom system table. This is not implemented now. */ diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index 41de17982aa..42ec739c33b 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -45,22 +45,7 @@ DataTypeMap::DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & valu void DataTypeMap::assertKeyType() const { - bool type_error = false; - if (key_type->getTypeId() == TypeIndex::LowCardinality) - { - const auto & low_cardinality_data_type = assert_cast(*key_type); - if (!isStringOrFixedString(*(low_cardinality_data_type.getDictionaryType()))) - type_error = true; - } - else if (!key_type->isValueRepresentedByInteger() - && !isStringOrFixedString(*key_type) - && !WhichDataType(key_type).isNothing() - && !WhichDataType(key_type).isUUID()) - { - type_error = true; - } - - if (type_error) + if (!checkKeyType(key_type)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type of Map key must be a type, that can be represented by integer or String or FixedString (possibly LowCardinality) or UUID," " but {} given", key_type->getName()); @@ -102,6 +87,25 @@ bool DataTypeMap::equals(const IDataType & rhs) const return nested->equals(*rhs_map.nested); } +bool DataTypeMap::checkKeyType(DataTypePtr key_type) +{ + if (key_type->getTypeId() == TypeIndex::LowCardinality) + { + const auto & low_cardinality_data_type = assert_cast(*key_type); + if (!isStringOrFixedString(*(low_cardinality_data_type.getDictionaryType()))) + return false; + } + else if (!key_type->isValueRepresentedByInteger() + && !isStringOrFixedString(*key_type) + && !WhichDataType(key_type).isNothing() + && !WhichDataType(key_type).isUUID()) + { + return false; + } + + return true; +} + static DataTypePtr create(const ASTPtr & arguments) { if (!arguments || arguments->children.size() != 2) diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 65bdd93ca4d..479008031fe 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -48,6 +48,8 @@ public: SerializationPtr doGetDefaultSerialization() const override; + static bool checkKeyType(DataTypePtr key_type); + private: void assertKeyType() const; }; diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index d94eceb7dec..2a07ba8375d 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -461,6 +461,10 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context) { + + if (query_context->getCurrentTransaction() && query_context->getSettingsRef().throw_on_unsupported_query_inside_transaction) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Distributed DDL queries inside transactions are not supported"); + if (is_readonly) throw Exception(ErrorCodes::NO_ZOOKEEPER, "Database is in readonly mode, because it cannot connect to ZooKeeper"); diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 84c3f857a81..5765f1c6598 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -319,7 +319,6 @@ bool DatabaseReplicatedDDLWorker::canRemoveQueueEntry(const String & entry_name, void DatabaseReplicatedDDLWorker::initializeLogPointer(const String & processed_entry_name) { updateMaxDDLEntryID(processed_entry_name); - assert(max_id.load() == parse(getAndSetZooKeeper()->get(fs::path(database->replica_path) / "log_ptr"))); } UInt32 DatabaseReplicatedDDLWorker::getLogPointer() const diff --git a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp index fb07d8c356b..78b9b9e3446 100644 --- a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp @@ -71,8 +71,8 @@ std::unique_ptr DiskAzureBlobStorage::readFile( LOG_TEST(log, "Read from file by path: {}", backQuote(metadata_disk->getPath() + path)); auto reader_impl = std::make_unique( - path, blob_container_client, metadata, settings->max_single_read_retries, - settings->max_single_download_retries, read_settings); + blob_container_client, metadata.remote_fs_root_path, metadata.remote_fs_objects, + settings->max_single_read_retries, settings->max_single_download_retries, read_settings); if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { @@ -90,7 +90,8 @@ std::unique_ptr DiskAzureBlobStorage::readFile( std::unique_ptr DiskAzureBlobStorage::writeFile( const String & path, size_t buf_size, - WriteMode mode) + WriteMode mode, + const WriteSettings &) { auto blob_path = path + "_" + getRandomASCIIString(8); /// NOTE: path contains the tmp_* prefix in the blob name @@ -108,7 +109,7 @@ std::unique_ptr DiskAzureBlobStorage::writeFile( readOrCreateUpdateAndStoreMetadata(path, mode, false, [blob_path, count] (Metadata & metadata) { metadata.addObject(blob_path, count); return true; }); }; - return std::make_unique(std::move(buffer), std::move(create_metadata_callback), path); + return std::make_unique(std::move(buffer), std::move(create_metadata_callback), blob_path); } diff --git a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h index 63c3c735812..efc245e7eb3 100644 --- a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h +++ b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.h @@ -56,7 +56,8 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode) override; + WriteMode mode, + const WriteSettings & settings) override; DiskType getType() const override; diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index 3519b1212a4..cc2c330975a 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -150,7 +150,7 @@ DiskCacheWrapper::readFile( /// Note: enabling `threadpool` read requires to call setReadUntilEnd(). current_read_settings.remote_fs_method = RemoteFSReadMethod::read; /// Disable data cache. - current_read_settings.remote_fs_enable_cache = false; + current_read_settings.enable_filesystem_cache = false; if (metadata->status == DOWNLOADING) { @@ -167,7 +167,11 @@ DiskCacheWrapper::readFile( auto tmp_path = path + ".tmp"; { auto src_buffer = DiskDecorator::readFile(path, current_read_settings, read_hint, file_size); - auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite); + + WriteSettings write_settings; + write_settings.enable_filesystem_cache_on_write_operations = false; + + auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite, write_settings); copyData(*src_buffer, *dst_buffer); } cache_disk->moveFile(tmp_path, path); @@ -196,10 +200,15 @@ DiskCacheWrapper::readFile( } std::unique_ptr -DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode) +DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) { if (!cache_file_predicate(path)) - return DiskDecorator::writeFile(path, buf_size, mode); + return DiskDecorator::writeFile(path, buf_size, mode, settings); + + WriteSettings current_settings = settings; + /// There are two different cache implementations. Disable second one if the first is enabled. + /// The first will soon be removed, this disabling is temporary. + current_settings.enable_filesystem_cache_on_write_operations = false; LOG_TEST(log, "Write file {} to cache", backQuote(path)); @@ -208,15 +217,15 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode cache_disk->createDirectories(dir_path); return std::make_unique( - cache_disk->writeFile(path, buf_size, mode), + cache_disk->writeFile(path, buf_size, mode, current_settings), [this, path]() { /// Copy file from cache to actual disk when cached buffer is finalized. return cache_disk->readFile(path, ReadSettings(), /* read_hint= */ {}, /* file_size= */ {}); }, - [this, path, buf_size, mode]() + [this, path, buf_size, mode, current_settings]() { - return DiskDecorator::writeFile(path, buf_size, mode); + return DiskDecorator::writeFile(path, buf_size, mode, current_settings); }); } diff --git a/src/Disks/DiskCacheWrapper.h b/src/Disks/DiskCacheWrapper.h index dc66333758f..e413a3742f3 100644 --- a/src/Disks/DiskCacheWrapper.h +++ b/src/Disks/DiskCacheWrapper.h @@ -40,7 +40,7 @@ public: std::optional read_hint, std::optional file_size) const override; - std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode) override; + std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) override; void removeFile(const String & path) override; void removeFileIfExists(const String & path) override; diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp index 37911f16913..14f507af55d 100644 --- a/src/Disks/DiskDecorator.cpp +++ b/src/Disks/DiskDecorator.cpp @@ -121,9 +121,9 @@ DiskDecorator::readFile( } std::unique_ptr -DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode) +DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) { - return delegate->writeFile(path, buf_size, mode); + return delegate->writeFile(path, buf_size, mode, settings); } void DiskDecorator::removeFile(const String & path) diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index bace54ff22a..e5c9c7699bf 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -44,7 +44,8 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode) override; + WriteMode mode, + const WriteSettings & settings) override; void removeFile(const String & path) override; void removeFileIfExists(const String & path) override; @@ -71,6 +72,9 @@ public: void shutdown() override; void startup() override; void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map) override; + String getCacheBasePath() const override { return delegate->getCacheBasePath(); } + std::vector getRemotePaths(const String & path) const override { return delegate->getRemotePaths(path); } + void getRemotePathsRecursive(const String & path, std::vector & paths_map) override { return delegate->getRemotePathsRecursive(path, paths_map); } DiskPtr getMetadataDiskIfExistsOrSelf() override { return delegate->getMetadataDiskIfExistsOrSelf(); } diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index 714264b7720..3cee205fafc 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -269,7 +269,7 @@ std::unique_ptr DiskEncrypted::readFile( return std::make_unique(settings.local_fs_buffer_size, std::move(buffer), key, header); } -std::unique_ptr DiskEncrypted::writeFile(const String & path, size_t buf_size, WriteMode mode) +std::unique_ptr DiskEncrypted::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) { auto wrapped_path = wrappedPath(path); FileEncryption::Header header; diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index d99fe17457d..07a2ad81010 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -126,7 +126,8 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode) override; + WriteMode mode, + const WriteSettings & settings) override; void removeFile(const String & path) override { diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index a91db508295..8aad42ab475 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -345,7 +345,7 @@ std::unique_ptr DiskLocal::readFile(const String & path, } std::unique_ptr -DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode) +DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) { int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1; return std::make_unique(fs::path(disk_path) / path, buf_size, flags); @@ -624,7 +624,7 @@ bool DiskLocal::setup() pcg32_fast rng(randomSeed()); UInt32 magic_number = rng(); { - auto buf = writeFile(disk_checker_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + auto buf = writeFile(disk_checker_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeIntBinary(magic_number, *buf); } disk_checker_magic_number = magic_number; diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 76d5a88a626..59dcf5e5c13 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -79,7 +79,8 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode) override; + WriteMode mode, + const WriteSettings & settings) override; void removeFile(const String & path) override; void removeFileIfExists(const String & path) override; diff --git a/src/Disks/DiskMemory.cpp b/src/Disks/DiskMemory.cpp index abaea0846a5..4f0e881e079 100644 --- a/src/Disks/DiskMemory.cpp +++ b/src/Disks/DiskMemory.cpp @@ -326,7 +326,7 @@ std::unique_ptr DiskMemory::readFile(const String & path return std::make_unique(path, iter->second.data); } -std::unique_ptr DiskMemory::writeFile(const String & path, size_t buf_size, WriteMode mode) +std::unique_ptr DiskMemory::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) { std::lock_guard lock(mutex); diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h index fe108f53c68..726be8bc3b5 100644 --- a/src/Disks/DiskMemory.h +++ b/src/Disks/DiskMemory.h @@ -71,7 +71,8 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode) override; + WriteMode mode, + const WriteSettings & settings) override; void removeFile(const String & path) override; void removeFileIfExists(const String & path) override; diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index 43011a4cf72..8045a0e8c72 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -214,10 +214,10 @@ std::unique_ptr DiskRestartProxy::readFile( return std::make_unique(*this, std::move(impl)); } -std::unique_ptr DiskRestartProxy::writeFile(const String & path, size_t buf_size, WriteMode mode) +std::unique_ptr DiskRestartProxy::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) { ReadLock lock (mutex); - auto impl = DiskDecorator::writeFile(path, buf_size, mode); + auto impl = DiskDecorator::writeFile(path, buf_size, mode, settings); return std::make_unique(*this, std::move(impl)); } @@ -305,6 +305,24 @@ bool DiskRestartProxy::checkUniqueId(const String & id) const return DiskDecorator::checkUniqueId(id); } +String DiskRestartProxy::getCacheBasePath() const +{ + ReadLock lock (mutex); + return DiskDecorator::getCacheBasePath(); +} + +std::vector DiskRestartProxy::getRemotePaths(const String & path) const +{ + ReadLock lock (mutex); + return DiskDecorator::getRemotePaths(path); +} + +void DiskRestartProxy::getRemotePathsRecursive(const String & path, std::vector & paths_map) +{ + ReadLock lock (mutex); + return DiskDecorator::getRemotePathsRecursive(path, paths_map); +} + void DiskRestartProxy::restart() { /// Speed up processing unhealthy requests. diff --git a/src/Disks/DiskRestartProxy.h b/src/Disks/DiskRestartProxy.h index 30f553f4fe0..baa57386e68 100644 --- a/src/Disks/DiskRestartProxy.h +++ b/src/Disks/DiskRestartProxy.h @@ -48,7 +48,7 @@ public: const ReadSettings & settings, std::optional read_hint, std::optional file_size) const override; - std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode) override; + std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) override; void removeFile(const String & path) override; void removeFileIfExists(const String & path) override; void removeDirectory(const String & path) override; @@ -63,6 +63,9 @@ public: void truncateFile(const String & path, size_t size) override; String getUniqueId(const String & path) const override; bool checkUniqueId(const String & id) const override; + String getCacheBasePath() const override; + std::vector getRemotePaths(const String & path) const override; + void getRemotePathsRecursive(const String & path, std::vector & paths_map) override; void restart(); diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index f3039d9af2e..2f8929982e3 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -166,9 +166,9 @@ std::unique_ptr DiskWebServer::readFile(const String & p remote_path = remote_path.string().substr(url.size()); RemoteMetadata meta(path, remote_path); - meta.remote_fs_objects.emplace_back(std::make_pair(remote_path, iter->second.size)); + meta.remote_fs_objects.emplace_back(remote_path, iter->second.size); - auto web_impl = std::make_unique(path, url, meta, getContext(), read_settings); + auto web_impl = std::make_unique(url, meta.remote_fs_root_path, meta.remote_fs_objects, getContext(), read_settings); if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index e2da0b2a1e1..6341b582174 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -77,7 +77,6 @@ public: UInt64 getTotalSpace() const final override { return std::numeric_limits::max(); } UInt64 getAvailableSpace() const final override { return std::numeric_limits::max(); } - UInt64 getUnreservedSpace() const final override { return std::numeric_limits::max(); } /// Read-only part @@ -100,7 +99,7 @@ public: /// Write and modification part - std::unique_ptr writeFile(const String &, size_t, WriteMode) override + std::unique_ptr writeFile(const String &, size_t, WriteMode, const WriteSettings &) override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); } @@ -165,6 +164,10 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); } + std::vector getRemotePaths(const String &) const override { return {}; } + + void getRemotePathsRecursive(const String &, std::vector &) override {} + /// Create part void createFile(const String &) final override {} diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index 7f60b219a4b..a3817a85a36 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -82,17 +82,17 @@ std::unique_ptr DiskHDFS::readFile(const String & path, "Read from file by path: {}. Existing HDFS objects: {}", backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size()); - auto hdfs_impl = std::make_unique(path, config, remote_fs_root_path, metadata, read_settings); + auto hdfs_impl = std::make_unique(config, remote_fs_root_path, remote_fs_root_path, metadata.remote_fs_objects, read_settings); auto buf = std::make_unique(std::move(hdfs_impl)); return std::make_unique(std::move(buf), settings->min_bytes_for_seek); } -std::unique_ptr DiskHDFS::writeFile(const String & path, size_t buf_size, WriteMode mode) +std::unique_ptr DiskHDFS::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &) { /// Path to store new HDFS object. - auto file_name = getRandomName(); - auto hdfs_path = remote_fs_root_path + file_name; + std::string file_name = getRandomName(); + std::string hdfs_path = fs::path(remote_fs_root_path) / file_name; LOG_TRACE(log, "{} to file by path: {}. HDFS path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), hdfs_path); @@ -106,7 +106,7 @@ std::unique_ptr DiskHDFS::writeFile(const String & path readOrCreateUpdateAndStoreMetadata(path, mode, false, [file_name, count] (Metadata & metadata) { metadata.addObject(file_name, count); return true; }); }; - return std::make_unique(std::move(hdfs_buffer), std::move(create_metadata_callback), path); + return std::make_unique(std::move(hdfs_buffer), std::move(create_metadata_callback), hdfs_path); } diff --git a/src/Disks/HDFS/DiskHDFS.h b/src/Disks/HDFS/DiskHDFS.h index 23a108507b4..eba58101bc4 100644 --- a/src/Disks/HDFS/DiskHDFS.h +++ b/src/Disks/HDFS/DiskHDFS.h @@ -60,7 +60,7 @@ public: std::optional read_hint, std::optional file_size) const override; - std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode) override; + std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings) override; void removeFromRemoteFS(RemoteFSPathKeeperPtr fs_paths_keeper) override; diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 4fa73b8eba8..81cdf47e1fb 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -31,6 +32,11 @@ namespace Poco namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + class IDiskDirectoryIterator; using DiskDirectoryIteratorPtr = std::unique_ptr; @@ -168,7 +174,8 @@ public: virtual std::unique_ptr writeFile( /// NOLINT const String & path, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, - WriteMode mode = WriteMode::Rewrite) = 0; + WriteMode mode = WriteMode::Rewrite, + const WriteSettings & settings = {}) = 0; /// Remove file. Throws exception if file doesn't exists or it's a directory. virtual void removeFile(const String & path) = 0; @@ -197,6 +204,24 @@ public: /// Second bool param is a flag to remove (true) or keep (false) shared data on S3 virtual void removeSharedFileIfExists(const String & path, bool) { removeFileIfExists(path); } + + virtual String getCacheBasePath() const { return ""; } + + /// Returns a list of paths because for Log family engines there might be + /// multiple files in remote fs for single clickhouse file. + virtual std::vector getRemotePaths(const String &) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePaths() not implemented for disk: {}`", getType()); + } + + /// For one local path there might be multiple remote paths in case of Log family engines. + using LocalPathWithRemotePaths = std::pair>; + + virtual void getRemotePathsRecursive(const String &, std::vector &) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePathsRecursive() not implemented for disk: {}`", getType()); + } + struct RemoveRequest { String path; diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index fa4189abc53..b475ae1ee94 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -122,7 +122,8 @@ void IDiskRemote::Metadata::load() remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size()); } assertChar('\n', *buf); - remote_fs_objects[i] = {remote_fs_object_path, remote_fs_object_size}; + remote_fs_objects[i].relative_path = remote_fs_object_path; + remote_fs_objects[i].bytes_size = remote_fs_object_size; } readIntText(ref_count, *buf); @@ -136,13 +137,15 @@ void IDiskRemote::Metadata::load() } catch (Exception & e) { + tryLogCurrentException(__PRETTY_FUNCTION__); + if (e.code() == ErrorCodes::UNKNOWN_FORMAT) throw; if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED) throw; - throw Exception("Failed to read metadata file", e, ErrorCodes::UNKNOWN_FORMAT); + throw Exception("Failed to read metadata file: " + metadata_file_path, e, ErrorCodes::UNKNOWN_FORMAT); } } @@ -341,6 +344,30 @@ void IDiskRemote::removeMetadataRecursive(const String & path, RemoteFSPathKeepe } } +std::vector IDiskRemote::getRemotePaths(const String & local_path) const +{ + auto metadata = readMetadata(local_path); + + std::vector remote_paths; + for (const auto & [remote_path, _] : metadata.remote_fs_objects) + remote_paths.push_back(remote_path); + + return remote_paths; +} + +void IDiskRemote::getRemotePathsRecursive(const String & local_path, std::vector & paths_map) +{ + if (metadata_disk->isFile(local_path)) + { + paths_map.emplace_back(local_path, getRemotePaths(local_path)); + } + else + { + for (auto it = iterateDirectory(local_path); it->isValid(); it->next()) + IDiskRemote::getRemotePathsRecursive(fs::path(local_path) / it->name(), paths_map); + } +} + DiskPtr DiskRemoteReservation::getDisk(size_t i) const { if (i != 0) @@ -348,7 +375,6 @@ DiskPtr DiskRemoteReservation::getDisk(size_t i) const return disk; } - void DiskRemoteReservation::update(UInt64 new_size) { std::lock_guard lock(disk->reservation_mutex); @@ -402,6 +428,12 @@ IDiskRemote::IDiskRemote( } +String IDiskRemote::getCacheBasePath() const +{ + return cache ? cache->getBasePath() : ""; +} + + bool IDiskRemote::exists(const String & path) const { return metadata_disk->exists(path); @@ -607,7 +639,7 @@ String IDiskRemote::getUniqueId(const String & path) const auto metadata = readMetadata(path); String id; if (!metadata.remote_fs_objects.empty()) - id = metadata.remote_fs_root_path + metadata.remote_fs_objects[0].first; + id = metadata.remote_fs_root_path + metadata.remote_fs_objects[0].relative_path; return id; } diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index 82e76b8f68d..aa78468c7bb 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -13,7 +13,6 @@ #include #include - namespace CurrentMetrics { extern const Metric DiskSpaceReservedForMerge; @@ -22,6 +21,24 @@ namespace CurrentMetrics namespace DB { +/// Path to blob with it's size +struct BlobPathWithSize +{ + std::string relative_path; + uint64_t bytes_size; + + BlobPathWithSize() = default; + BlobPathWithSize(const BlobPathWithSize & other) = default; + + BlobPathWithSize(const std::string & relative_path_, uint64_t bytes_size_) + : relative_path(relative_path_) + , bytes_size(bytes_size_) + {} +}; + +/// List of blobs with their sizes +using BlobsPathToSize = std::vector; + /// Helper class to collect paths into chunks of maximum size. /// For s3 it is Aws::vector, for hdfs it is std::vector. class RemoteFSPathKeeper @@ -66,6 +83,12 @@ public: const String & getPath() const final override { return metadata_disk->getPath(); } + String getCacheBasePath() const final override; + + std::vector getRemotePaths(const String & local_path) const final override; + + void getRemotePathsRecursive(const String & local_path, std::vector & paths_map) override; + /// Methods for working with metadata. For some operations (like hardlink /// creation) metadata can be updated concurrently from multiple threads /// (file actually rewritten on disk). So additional RW lock is required for @@ -163,6 +186,7 @@ protected: const String remote_fs_root_path; DiskPtr metadata_disk; + FileCachePtr cache; private: @@ -184,10 +208,8 @@ using RemoteDiskPtr = std::shared_ptr; /// Minimum info, required to be passed to ReadIndirectBufferFromRemoteFS struct RemoteMetadata { - using PathAndSize = std::pair; - /// Remote FS objects paths and their sizes. - std::vector remote_fs_objects; + std::vector remote_fs_objects; /// URI const String & remote_fs_root_path; diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index de671e58687..16c1dd54f9f 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -122,10 +122,25 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment( { auto range = file_segment->range(); - size_t wait_download_max_tries = settings.remote_fs_cache_max_wait_sec; + size_t wait_download_max_tries = settings.filesystem_cache_max_wait_sec; size_t wait_download_tries = 0; auto download_state = file_segment->state(); + + if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache) + { + if (download_state == FileSegment::State::DOWNLOADED) + { + read_type = ReadType::CACHED; + return getCacheReadBuffer(range.left); + } + else + { + read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; + return getRemoteFSReadBuffer(file_segment, read_type); + } + } + while (true) { switch (download_state) @@ -375,6 +390,9 @@ bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext() implementation_buffer = getImplementationBuffer(*current_file_segment_it); + if (read_type == ReadType::CACHED) + (*current_file_segment_it)->incrementHitsCount(); + LOG_TEST(log, "New segment: {}", (*current_file_segment_it)->range().toString()); return true; } @@ -559,9 +577,6 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() { last_caller_id = FileSegment::getCallerId(); - if (IFileCache::shouldBypassCache()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Using cache when not allowed"); - if (!initialized) initialize(file_offset_of_buffer_end, getTotalSizeToRead()); @@ -606,6 +621,9 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() else { implementation_buffer = getImplementationBuffer(*current_file_segment_it); + + if (read_type == ReadType::CACHED) + (*current_file_segment_it)->incrementHitsCount(); } assert(!internal_buffer.empty()); diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 0d50b24f7a5..16a57b83771 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -38,12 +38,12 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S current_path = path; auto cache = settings.remote_fs_cache; - bool with_cache = cache && settings.remote_fs_enable_cache && !IFileCache::shouldBypassCache(); + bool with_cache = cache && settings.enable_filesystem_cache; auto remote_file_reader_creator = [=, this]() { return std::make_unique( - client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries, + client_ptr, bucket, fs::path(common_path_prefix) / path, max_single_read_retries, settings, /* use_external_buffer */true, /* offset */ 0, read_until_position, /* restricted_seek */true); }; @@ -83,11 +83,14 @@ SeekableReadBufferPtr ReadBufferFromHDFSGather::createImplementationBuffer(const #endif -ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(const RemoteMetadata & metadata_, const ReadSettings & settings_, const String & path_) +ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather( + const std::string & common_path_prefix_, + const BlobsPathToSize & blobs_to_read_, + const ReadSettings & settings_) : ReadBuffer(nullptr, 0) - , metadata(metadata_) + , common_path_prefix(common_path_prefix_) + , blobs_to_read(blobs_to_read_) , settings(settings_) - , canonical_path(path_) , log(&Poco::Logger::get("ReadBufferFromRemoteFSGather")) { } @@ -119,9 +122,9 @@ void ReadBufferFromRemoteFSGather::initialize() { /// One clickhouse file can be split into multiple files in remote fs. auto current_buf_offset = file_offset_of_buffer_end; - for (size_t i = 0; i < metadata.remote_fs_objects.size(); ++i) + for (size_t i = 0; i < blobs_to_read.size(); ++i) { - const auto & [file_path, size] = metadata.remote_fs_objects[i]; + const auto & [file_path, size] = blobs_to_read[i]; if (size > current_buf_offset) { @@ -138,7 +141,7 @@ void ReadBufferFromRemoteFSGather::initialize() current_buf_offset -= size; } - current_buf_idx = metadata.remote_fs_objects.size(); + current_buf_idx = blobs_to_read.size(); current_buf = nullptr; } @@ -168,12 +171,12 @@ bool ReadBufferFromRemoteFSGather::nextImpl() bool ReadBufferFromRemoteFSGather::moveToNextBuffer() { /// If there is no available buffers - nothing to read. - if (current_buf_idx + 1 >= metadata.remote_fs_objects.size()) + if (current_buf_idx + 1 >= blobs_to_read.size()) return false; ++current_buf_idx; - const auto & [path, size] = metadata.remote_fs_objects[current_buf_idx]; + const auto & [path, size] = blobs_to_read[current_buf_idx]; current_buf = createImplementationBuffer(path, size); return true; @@ -202,7 +205,7 @@ bool ReadBufferFromRemoteFSGather::readImpl() if (!result) result = current_buf->next(); - if (metadata.remote_fs_objects.size() == 1) + if (blobs_to_read.size() == 1) { file_offset_of_buffer_end = current_buf->getFileOffsetOfBufferEnd(); } @@ -255,8 +258,8 @@ String ReadBufferFromRemoteFSGather::getFileName() const size_t ReadBufferFromRemoteFSGather::getFileSize() const { size_t size = 0; - for (const auto & object : metadata.remote_fs_objects) - size += object.second; + for (const auto & object : blobs_to_read) + size += object.bytes_size; return size; } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 25bfe0b7e16..d12513cba1f 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -26,9 +26,9 @@ friend class ReadIndirectBufferFromRemoteFS; public: ReadBufferFromRemoteFSGather( - const RemoteMetadata & metadata_, - const ReadSettings & settings_, - const String & path_); + const std::string & common_path_prefix_, + const BlobsPathToSize & blobs_to_read_, + const ReadSettings & settings_); String getFileName() const; @@ -57,7 +57,9 @@ public: protected: virtual SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) = 0; - RemoteMetadata metadata; + std::string common_path_prefix; + + BlobsPathToSize blobs_to_read; ReadSettings settings; @@ -89,8 +91,6 @@ private: */ size_t bytes_to_ignore = 0; - String canonical_path; - Poco::Logger * log; }; @@ -101,13 +101,13 @@ class ReadBufferFromS3Gather final : public ReadBufferFromRemoteFSGather { public: ReadBufferFromS3Gather( - const String & path_, std::shared_ptr client_ptr_, const String & bucket_, - IDiskRemote::Metadata metadata_, + const std::string & common_path_prefix_, + const BlobsPathToSize & blobs_to_read_, size_t max_single_read_retries_, const ReadSettings & settings_) - : ReadBufferFromRemoteFSGather(metadata_, settings_, path_) + : ReadBufferFromRemoteFSGather(common_path_prefix_, blobs_to_read_, settings_) , client_ptr(std::move(client_ptr_)) , bucket(bucket_) , max_single_read_retries(max_single_read_retries_) @@ -130,13 +130,13 @@ class ReadBufferFromAzureBlobStorageGather final : public ReadBufferFromRemoteFS { public: ReadBufferFromAzureBlobStorageGather( - const String & path_, std::shared_ptr blob_container_client_, - IDiskRemote::Metadata metadata_, + const std::string & common_path_prefix_, + const BlobsPathToSize & blobs_to_read_, size_t max_single_read_retries_, size_t max_single_download_retries_, const ReadSettings & settings_) - : ReadBufferFromRemoteFSGather(metadata_, settings_, path_) + : ReadBufferFromRemoteFSGather(common_path_prefix_, blobs_to_read_, settings_) , blob_container_client(blob_container_client_) , max_single_read_retries(max_single_read_retries_) , max_single_download_retries(max_single_download_retries_) @@ -157,12 +157,12 @@ class ReadBufferFromWebServerGather final : public ReadBufferFromRemoteFSGather { public: ReadBufferFromWebServerGather( - const String & path_, const String & uri_, - RemoteMetadata metadata_, + const std::string & common_path_prefix_, + const BlobsPathToSize & blobs_to_read_, ContextPtr context_, const ReadSettings & settings_) - : ReadBufferFromRemoteFSGather(metadata_, settings_, path_) + : ReadBufferFromRemoteFSGather(common_path_prefix_, blobs_to_read_, settings_) , uri(uri_) , context(context_) { @@ -182,12 +182,12 @@ class ReadBufferFromHDFSGather final : public ReadBufferFromRemoteFSGather { public: ReadBufferFromHDFSGather( - const String & path_, const Poco::Util::AbstractConfiguration & config_, const String & hdfs_uri_, - IDiskRemote::Metadata metadata_, + const std::string & common_path_prefix_, + const BlobsPathToSize & blobs_to_read_, const ReadSettings & settings_) - : ReadBufferFromRemoteFSGather(metadata_, settings_, path_) + : ReadBufferFromRemoteFSGather(common_path_prefix_, blobs_to_read_, settings_) , config(config_) { const size_t begin_of_path = hdfs_uri_.find('/', hdfs_uri_.find("//") + 2); diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index af545d15c0e..b1ae42d03d6 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -54,14 +54,14 @@ std::future ThreadPoolRemoteFSReader::submit(Reques { ThreadStatus thread_status; - /// Save query context if any, because cache implementation needs it. - if (query_context) - thread_status.attachQueryContext(query_context); - /// To be able to pass ProfileEvents. if (running_group) thread_status.attachQuery(running_group); + /// Save query context if any, because cache implementation needs it. + if (query_context) + thread_status.attachQueryContext(query_context); + setThreadName("VFSRead"); CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; @@ -83,12 +83,11 @@ std::future ThreadPoolRemoteFSReader::submit(Reques watch.stop(); - if (running_group) - CurrentThread::detachQuery(); - ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, result.offset ? result.size - result.offset : result.size); + thread_status.detachQuery(/* if_not_detached */true); + return Result{ .size = result.size, .offset = result.offset }; }); diff --git a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp index 9b604341da9..dca2fb17ba7 100644 --- a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp @@ -12,10 +12,10 @@ namespace DB WriteIndirectBufferFromRemoteFS::WriteIndirectBufferFromRemoteFS( std::unique_ptr impl_, CreateMetadataCallback && create_callback_, - const String & metadata_file_path_) + const String & remote_path_) : WriteBufferFromFileDecorator(std::move(impl_)) , create_metadata_callback(std::move(create_callback_)) - , metadata_file_path(metadata_file_path_) + , remote_path(remote_path_) { } diff --git a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h index 25a93e2fe07..84bd2b99c7e 100644 --- a/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h @@ -18,17 +18,17 @@ public: WriteIndirectBufferFromRemoteFS( std::unique_ptr impl_, CreateMetadataCallback && create_callback_, - const String & metadata_file_path_); + const String & remote_path_); ~WriteIndirectBufferFromRemoteFS() override; - String getFileName() const override { return metadata_file_path; } + String getFileName() const override { return remote_path; } private: void finalizeImpl() override; CreateMetadataCallback create_metadata_callback; - String metadata_file_path; + String remote_path; }; } diff --git a/src/Disks/RemoteDisksCommon.cpp b/src/Disks/RemoteDisksCommon.cpp index 36f2aed3e7c..da6ffed5f11 100644 --- a/src/Disks/RemoteDisksCommon.cpp +++ b/src/Disks/RemoteDisksCommon.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { @@ -64,18 +65,23 @@ FileCachePtr getCachePtrForDisk( if (!fs::exists(cache_base_path)) fs::create_directories(cache_base_path); - LOG_INFO(&Poco::Logger::get("Disk(" + name + ")"), "Disk registered with cache path: {}", cache_base_path); - auto metadata_path = getDiskMetadataPath(name, config, config_prefix, context); if (metadata_path == cache_base_path) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Metadata path and cache base path must be different: {}", metadata_path); - size_t max_cache_size = config.getUInt64(config_prefix + ".data_cache_max_size", 1024*1024*1024); - size_t max_cache_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS); - size_t max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE); + FileCacheSettings file_cache_settings; + file_cache_settings.loadFromConfig(config, config_prefix); - auto cache = FileCacheFactory::instance().getOrCreate(cache_base_path, max_cache_size, max_cache_elements, max_file_segment_size); + auto cache = FileCacheFactory::instance().getOrCreate(cache_base_path, file_cache_settings); cache->initialize(); + + auto * log = &Poco::Logger::get("Disk(" + name + ")"); + LOG_INFO(log, "Disk registered with cache path: {}. Cache size: {}, max cache elements size: {}, max_file_segment_size: {}", + cache_base_path, + file_cache_settings.max_size ? toString(file_cache_settings.max_size) : "UNLIMITED", + file_cache_settings.max_elements ? toString(file_cache_settings.max_elements) : "UNLIMITED", + file_cache_settings.max_file_segment_size); + return cache; } diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index e05ccef74c0..b6171a41dfb 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include @@ -227,10 +229,15 @@ std::unique_ptr DiskS3::readFile(const String & path, co ReadSettings disk_read_settings{read_settings}; if (cache) + { + if (IFileCache::isReadOnly()) + disk_read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; + disk_read_settings.remote_fs_cache = cache; + } auto s3_impl = std::make_unique( - path, settings->client, bucket, metadata, + settings->client, bucket, metadata.remote_fs_root_path, metadata.remote_fs_objects, settings->s3_max_single_read_retries, disk_read_settings); if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) @@ -245,7 +252,7 @@ std::unique_ptr DiskS3::readFile(const String & path, co } } -std::unique_ptr DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode) +std::unique_ptr DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & write_settings) { auto settings = current_settings.get(); @@ -265,23 +272,28 @@ std::unique_ptr DiskS3::writeFile(const String & path, LOG_TRACE(log, "{} to file by path: {}. S3 path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name); + bool cache_on_write = cache + && fs::path(path).extension() != ".tmp" + && write_settings.enable_filesystem_cache_on_write_operations + && FileCacheFactory::instance().getSettings(getCacheBasePath()).cache_on_write_operations; + auto s3_buffer = std::make_unique( settings->client, bucket, - remote_fs_root_path + blob_name, + fs::path(remote_fs_root_path) / blob_name, settings->s3_min_upload_part_size, settings->s3_upload_part_size_multiply_factor, settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, std::move(object_metadata), - buf_size, threadPoolCallbackRunner(getThreadPoolWriter())); + buf_size, threadPoolCallbackRunner(getThreadPoolWriter()), blob_name, cache_on_write ? cache : nullptr); auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) { readOrCreateUpdateAndStoreMetadata(path, mode, false, [blob_name, count] (Metadata & metadata) { metadata.addObject(blob_name, count); return true; }); }; - return std::make_unique(std::move(s3_buffer), std::move(create_metadata_callback), path); + return std::make_unique(std::move(s3_buffer), std::move(create_metadata_callback), fs::path(remote_fs_root_path) / blob_name); } void DiskS3::createHardLink(const String & src_path, const String & dst_path) diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index 7e39c9d9b3c..32eb9ee7aef 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -88,7 +88,8 @@ public: std::unique_ptr writeFile( const String & path, size_t buf_size, - WriteMode mode) override; + WriteMode mode, + const WriteSettings & settings) override; void removeFromRemoteFS(RemoteFSPathKeeperPtr keeper) override; diff --git a/src/Disks/tests/gtest_disk_encrypted.cpp b/src/Disks/tests/gtest_disk_encrypted.cpp index fd3cc1acbe5..da041437951 100644 --- a/src/Disks/tests/gtest_disk_encrypted.cpp +++ b/src/Disks/tests/gtest_disk_encrypted.cpp @@ -96,7 +96,7 @@ TEST_F(DiskEncryptedTest, WriteAndRead) /// Write a file. { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeString(std::string_view{"Some text"}, *buf); } @@ -122,7 +122,7 @@ TEST_F(DiskEncryptedTest, Append) /// Write a file (we use the append mode). { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); writeString(std::string_view{"Some text"}, *buf); } @@ -132,7 +132,7 @@ TEST_F(DiskEncryptedTest, Append) /// Append the file. { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); writeString(std::string_view{" Another text"}, *buf); } @@ -148,7 +148,7 @@ TEST_F(DiskEncryptedTest, Truncate) /// Write a file (we use the append mode). { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); writeString(std::string_view{"Some text"}, *buf); } @@ -178,7 +178,7 @@ TEST_F(DiskEncryptedTest, ZeroFileSize) /// Write nothing to a file. { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); } EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0); @@ -187,7 +187,7 @@ TEST_F(DiskEncryptedTest, ZeroFileSize) /// Append the file with nothing. { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); } EXPECT_EQ(encrypted_disk->getFileSize("a.txt"), 0); @@ -211,7 +211,7 @@ TEST_F(DiskEncryptedTest, AnotherFolder) /// Write a file. { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeString(std::string_view{"Some text"}, *buf); } @@ -231,11 +231,11 @@ TEST_F(DiskEncryptedTest, RandomIV) /// Write two files with the same contents. { - auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + auto buf = encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeString(std::string_view{"Some text"}, *buf); } { - auto buf = encrypted_disk->writeFile("b.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + auto buf = encrypted_disk->writeFile("b.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, {}); writeString(std::string_view{"Some text"}, *buf); } @@ -277,7 +277,7 @@ TEST_F(DiskEncryptedTest, RemoveFileDuringWriting) std::thread t1{[&] { for (size_t i = 0; i != n; ++i) - encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + encrypted_disk->writeFile("a.txt", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); }}; std::thread t2{[&] diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index b0ea10abdb6..e9d7e464cce 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -5,12 +5,17 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include #include -#include +#include #include -#include -#include + namespace DB { @@ -18,7 +23,6 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; - extern const int LOGICAL_ERROR; } FormatSettings::EscapingRule stringToEscapingRule(const String & escaping_rule) @@ -138,7 +142,8 @@ bool deserializeFieldByEscapingRule( serialization->deserializeTextRaw(column, buf, format_settings); break; default: - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Escaping rule {} is not suitable for deserialization", escapingRuleToString(escaping_rule)); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Escaping rule {} is not suitable for deserialization", escapingRuleToString(escaping_rule)); } return read; } @@ -176,7 +181,8 @@ void serializeFieldByEscapingRule( } } -void writeStringByEscapingRule(const String & value, WriteBuffer & out, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings) +void writeStringByEscapingRule( + const String & value, WriteBuffer & out, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings) { switch (escaping_rule) { @@ -249,85 +255,269 @@ String readStringByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule e return readByEscapingRule(buf, escaping_rule, format_settings); } -static bool evaluateConstantExpressionFromString(const StringRef & field, DataTypePtr & type, ContextPtr context) +static DataTypePtr determineDataTypeForSingleFieldImpl(ReadBuffer & buf) { - if (!context) - throw Exception(ErrorCodes::LOGICAL_ERROR, "You must provide context to evaluate constant expression"); + if (buf.eof()) + return nullptr; - ParserExpression parser; - Expected expected; - Tokens tokens(field.data, field.data + field.size); - IParser::Pos token_iterator(tokens, context->getSettingsRef().max_parser_depth); - ASTPtr ast; - - /// FIXME: Our parser cannot parse maps in the form of '{key : value}' that is used in text formats. - bool parsed = parser.parse(token_iterator, ast, expected); - if (!parsed || !token_iterator->isEnd()) - return false; - - try + /// Array + if (checkChar('[', buf)) { - std::pair result = evaluateConstantExpression(ast, context); - type = generalizeDataType(result.second); - return true; + skipWhitespaceIfAny(buf); + + DataTypes nested_types; + bool first = true; + while (!buf.eof() && *buf.position() != ']') + { + if (!first) + { + skipWhitespaceIfAny(buf); + if (!checkChar(',', buf)) + return nullptr; + skipWhitespaceIfAny(buf); + } + else + first = false; + + auto nested_type = determineDataTypeForSingleFieldImpl(buf); + if (!nested_type) + return nullptr; + + nested_types.push_back(nested_type); + } + + if (buf.eof()) + return nullptr; + + ++buf.position(); + + if (nested_types.empty()) + return std::make_shared(std::make_shared()); + + auto least_supertype = tryGetLeastSupertype(nested_types); + if (!least_supertype) + return nullptr; + + return std::make_shared(least_supertype); } - catch (...) + + /// Tuple + if (checkChar('(', buf)) { - return false; + skipWhitespaceIfAny(buf); + + DataTypes nested_types; + bool first = true; + while (!buf.eof() && *buf.position() != ')') + { + if (!first) + { + skipWhitespaceIfAny(buf); + if (!checkChar(',', buf)) + return nullptr; + skipWhitespaceIfAny(buf); + } + else + first = false; + + auto nested_type = determineDataTypeForSingleFieldImpl(buf); + if (!nested_type) + return nullptr; + + nested_types.push_back(nested_type); + } + + if (buf.eof() || nested_types.empty()) + return nullptr; + + ++buf.position(); + + return std::make_shared(nested_types); } + + /// Map + if (checkChar('{', buf)) + { + skipWhitespaceIfAny(buf); + + DataTypes key_types; + DataTypes value_types; + bool first = true; + while (!buf.eof() && *buf.position() != '}') + { + if (!first) + { + skipWhitespaceIfAny(buf); + if (!checkChar(',', buf)) + return nullptr; + skipWhitespaceIfAny(buf); + } + else + first = false; + + auto key_type = determineDataTypeForSingleFieldImpl(buf); + if (!key_type) + return nullptr; + + key_types.push_back(key_type); + + skipWhitespaceIfAny(buf); + if (!checkChar(':', buf)) + return nullptr; + skipWhitespaceIfAny(buf); + + auto value_type = determineDataTypeForSingleFieldImpl(buf); + if (!value_type) + return nullptr; + + value_types.push_back(value_type); + } + + if (buf.eof()) + return nullptr; + + ++buf.position(); + skipWhitespaceIfAny(buf); + + if (key_types.empty()) + return std::make_shared(std::make_shared(), std::make_shared()); + + auto key_least_supertype = tryGetLeastSupertype(key_types); + + auto value_least_supertype = tryGetLeastSupertype(value_types); + if (!key_least_supertype || !value_least_supertype) + return nullptr; + + if (!DataTypeMap::checkKeyType(key_least_supertype)) + return nullptr; + + return std::make_shared(key_least_supertype, value_least_supertype); + } + + /// String + if (*buf.position() == '\'') + { + ++buf.position(); + while (!buf.eof()) + { + char * next_pos = find_first_symbols<'\\', '\''>(buf.position(), buf.buffer().end()); + buf.position() = next_pos; + + if (!buf.hasPendingData()) + continue; + + if (*buf.position() == '\'') + break; + + if (*buf.position() == '\\') + ++buf.position(); + } + + if (buf.eof()) + return nullptr; + + ++buf.position(); + return std::make_shared(); + } + + /// Bool + if (checkStringCaseInsensitive("true", buf) || checkStringCaseInsensitive("false", buf)) + return DataTypeFactory::instance().get("Bool"); + + /// Null + if (checkStringCaseInsensitive("NULL", buf)) + return std::make_shared(); + + /// Number + Float64 tmp; + if (tryReadFloatText(tmp, buf)) + return std::make_shared(); + + return nullptr; } -DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context) +static DataTypePtr determineDataTypeForSingleField(ReadBuffer & buf) +{ + return makeNullableRecursivelyAndCheckForNothing(determineDataTypeForSingleFieldImpl(buf)); +} + +DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule) { switch (escaping_rule) { case FormatSettings::EscapingRule::Quoted: { - DataTypePtr type; - bool parsed = evaluateConstantExpressionFromString(field, type, context); - return parsed ? type : nullptr; + ReadBufferFromString buf(field); + auto type = determineDataTypeForSingleField(buf); + return buf.eof() ? type : nullptr; } case FormatSettings::EscapingRule::JSON: return getDataTypeFromJSONField(field); case FormatSettings::EscapingRule::CSV: { + if (!format_settings.csv.input_format_use_best_effort_in_schema_inference) + return makeNullable(std::make_shared()); + if (field.empty() || field == format_settings.csv.null_representation) return nullptr; if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation) - return std::make_shared(); + return DataTypeFactory::instance().get("Nullable(Bool)"); - DataTypePtr type; - bool parsed; - if (field[0] == '\'' || field[0] == '"') + if (field.size() > 1 && ((field.front() == '\'' && field.back() == '\'') || (field.front() == '"' && field.back() == '"'))) { - /// Try to evaluate expression inside quotes. - parsed = evaluateConstantExpressionFromString(StringRef(field.data() + 1, field.size() - 2), type, context); - /// If it's a number in quotes we determine it as a string. - if (parsed && type && isNumber(removeNullable(type))) - return makeNullable(std::make_shared()); - } - else - parsed = evaluateConstantExpressionFromString(field, type, context); + ReadBufferFromString buf(std::string_view(field.data() + 1, field.size() - 2)); + /// Try to determine the type of value inside quotes + auto type = determineDataTypeForSingleField(buf); - /// If we couldn't parse an expression, determine it as a string. - return parsed ? type : makeNullable(std::make_shared()); + if (!type) + return nullptr; + + /// If it's a number or tuple in quotes or there is some unread data in buffer, we determine it as a string. + if (isNumber(removeNullable(type)) || isTuple(type) || !buf.eof()) + return makeNullable(std::make_shared()); + + return type; + } + + /// Case when CSV value is not in quotes. Check if it's a number, and if not, determine it's as a string. + ReadBufferFromString buf(field); + Float64 tmp; + if (tryReadFloatText(tmp, buf) && buf.eof()) + return makeNullable(std::make_shared()); + + return makeNullable(std::make_shared()); } case FormatSettings::EscapingRule::Raw: [[fallthrough]]; case FormatSettings::EscapingRule::Escaped: - /// TODO: Try to use some heuristics here to determine the type of data. - return field.empty() ? nullptr : makeNullable(std::make_shared()); + { + if (!format_settings.tsv.input_format_use_best_effort_in_schema_inference) + return makeNullable(std::make_shared()); + + if (field.empty() || field == format_settings.tsv.null_representation) + return nullptr; + + if (field == format_settings.bool_false_representation || field == format_settings.bool_true_representation) + return DataTypeFactory::instance().get("Nullable(Bool)"); + + ReadBufferFromString buf(field); + auto type = determineDataTypeForSingleField(buf); + if (!buf.eof()) + return makeNullable(std::make_shared()); + + return type; + } default: throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the type for value with {} escaping rule", escapingRuleToString(escaping_rule)); } } -DataTypes determineDataTypesByEscapingRule(const std::vector & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context) +DataTypes determineDataTypesByEscapingRule(const std::vector & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule) { DataTypes data_types; data_types.reserve(fields.size()); for (const auto & field : fields) - data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, escaping_rule, context)); + data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, escaping_rule)); return data_types; } @@ -344,4 +534,12 @@ DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escap } } +DataTypes getDefaultDataTypeForEscapingRules(const std::vector & escaping_rules) +{ + DataTypes data_types; + for (const auto & rule : escaping_rules) + data_types.push_back(getDefaultDataTypeForEscapingRule(rule)); + return data_types; +} + } diff --git a/src/Formats/EscapingRuleUtils.h b/src/Formats/EscapingRuleUtils.h index 10147b29ad6..1ce04a8d1b7 100644 --- a/src/Formats/EscapingRuleUtils.h +++ b/src/Formats/EscapingRuleUtils.h @@ -43,15 +43,21 @@ String readFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule es /// - For JSON escaping rule we can use JSON parser to parse a single field /// and then convert JSON type of this field to ClickHouse type. /// - For CSV escaping rule we can do the next: -/// - If the field is an unquoted string, then we could try to evaluate it -/// as a constant expression, and if it fails, treat it as a String. -/// - If the field is a string in quotes, then we can try to evaluate -/// expression inside quotes as a constant expression, and if it fails or -/// the result is a number (we don't parse numbers in quotes) we treat it as a String. -/// - For TSV and TSVRaw we treat each field as a String (TODO: try to use some tweaks and heuristics here) -DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context = nullptr); -DataTypes determineDataTypesByEscapingRule(const std::vector & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule, ContextPtr context = nullptr); +/// - If the field is an unquoted string, then we try to parse it as a number, +/// and if we cannot, treat it as a String. +/// - If the field is a string in quotes, then we try to use some +/// tweaks and heuristics to determine the type inside quotes, and if we can't or +/// the result is a number or tuple (we don't parse numbers in quotes and don't +/// support tuples in CSV) we treat it as a String. +/// - If input_format_csv_use_best_effort_in_schema_inference is disabled, we +/// treat everything as a string. +/// - For TSV and TSVRaw we try to use some tweaks and heuristics to determine the type +/// of value if setting input_format_tsv_use_best_effort_in_schema_inference is enabled, +/// otherwise we treat everything as a string. +DataTypePtr determineDataTypeByEscapingRule(const String & field, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule); +DataTypes determineDataTypesByEscapingRule(const std::vector & fields, const FormatSettings & format_settings, FormatSettings::EscapingRule escaping_rule); DataTypePtr getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule escaping_rule); +DataTypes getDefaultDataTypeForEscapingRules(const std::vector & escaping_rules); } diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 9dbce146ffa..8f9f600f594 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -65,6 +65,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.input_format_enum_as_number = settings.input_format_csv_enum_as_number; format_settings.csv.null_representation = settings.format_csv_null_representation; format_settings.csv.input_format_arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv; + format_settings.csv.input_format_use_best_effort_in_schema_inference = settings.input_format_csv_use_best_effort_in_schema_inference; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; @@ -97,6 +98,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; + format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width; @@ -117,6 +119,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.tsv.empty_as_default = settings.input_format_tsv_empty_as_default; format_settings.tsv.input_format_enum_as_number = settings.input_format_tsv_enum_as_number; format_settings.tsv.null_representation = settings.format_tsv_null_representation; + format_settings.tsv.input_format_use_best_effort_in_schema_inference = settings.input_format_tsv_use_best_effort_in_schema_inference; format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals; format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions; format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions; @@ -126,10 +129,17 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns; + format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference; + format_settings.orc.import_nested = settings.input_format_orc_import_nested; + format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; + format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; + format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference; + format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference; format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching; format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; + format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference; format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; @@ -137,6 +147,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.msgpack.number_of_columns = settings.input_format_msgpack_number_of_columns; format_settings.msgpack.output_uuid_representation = settings.output_format_msgpack_uuid_representation; format_settings.max_rows_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference; + format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) @@ -371,7 +382,7 @@ SchemaReaderPtr FormatFactory::getSchemaReader( throw Exception("FormatFactory: Format " + name + " doesn't support schema inference.", ErrorCodes::LOGICAL_ERROR); auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context); - return schema_reader_creator(buf, format_settings, context); + return schema_reader_creator(buf, format_settings); } ExternalSchemaReaderPtr FormatFactory::getExternalSchemaReader( diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 344dabd3f4d..2f53da3bdff 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -97,7 +97,7 @@ private: /// The checker should return true if format support append. using AppendSupportChecker = std::function; - using SchemaReaderCreator = std::function; + using SchemaReaderCreator = std::function; using ExternalSchemaReaderCreator = std::function; struct Creators diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 608afeb8a2c..6da13a6b032 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -36,6 +36,8 @@ struct FormatSettings bool seekable_read = true; UInt64 max_rows_to_read_for_schema_inference = 100; + String column_names_for_schema_inference = ""; + enum class DateTimeInputFormat { Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp. @@ -77,6 +79,7 @@ struct FormatSettings bool low_cardinality_as_dictionary = false; bool import_nested = false; bool allow_missing_columns = false; + bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; } arrow; @@ -104,6 +107,7 @@ struct FormatSettings bool input_format_arrays_as_nested_csv = false; String null_representation = "\\N"; char tuple_delimiter = ','; + bool input_format_use_best_effort_in_schema_inference = true; } csv; struct HiveText @@ -141,6 +145,7 @@ struct FormatSettings UInt64 row_group_size = 1000000; bool import_nested = false; bool allow_missing_columns = false; + bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; std::unordered_set skip_row_groups = {}; } parquet; @@ -209,6 +214,7 @@ struct FormatSettings bool crlf_end_of_line = false; String null_representation = "\\N"; bool input_format_enum_as_number = false; + bool input_format_use_best_effort_in_schema_inference = true; } tsv; struct @@ -223,6 +229,7 @@ struct FormatSettings bool import_nested = false; bool allow_missing_columns = false; int64_t row_batch_size = 100'000; + bool skip_columns_with_unsupported_types_in_schema_inference = false; bool case_insensitive_column_matching = false; std::unordered_set skip_stripes = {}; } orc; diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index 8e2531e2006..3e88b51152d 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -105,8 +105,11 @@ ColumnsDescription readSchemaFromFormat(const String & format_name, const std::o return readSchemaFromFormat(format_name, format_settings, read_buffer_creator, context, buf_out); } -DataTypePtr generalizeDataType(DataTypePtr type) +DataTypePtr makeNullableRecursivelyAndCheckForNothing(DataTypePtr type) { + if (!type) + return nullptr; + WhichDataType which(type); if (which.isNothing()) @@ -115,16 +118,13 @@ DataTypePtr generalizeDataType(DataTypePtr type) if (which.isNullable()) { const auto * nullable_type = assert_cast(type.get()); - return generalizeDataType(nullable_type->getNestedType()); + return makeNullableRecursivelyAndCheckForNothing(nullable_type->getNestedType()); } - if (isNumber(type)) - return makeNullable(std::make_shared()); - if (which.isArray()) { const auto * array_type = assert_cast(type.get()); - auto nested_type = generalizeDataType(array_type->getNestedType()); + auto nested_type = makeNullableRecursivelyAndCheckForNothing(array_type->getNestedType()); return nested_type ? std::make_shared(nested_type) : nullptr; } @@ -134,7 +134,7 @@ DataTypePtr generalizeDataType(DataTypePtr type) DataTypes nested_types; for (const auto & element : tuple_type->getElements()) { - auto nested_type = generalizeDataType(element); + auto nested_type = makeNullableRecursivelyAndCheckForNothing(element); if (!nested_type) return nullptr; nested_types.push_back(nested_type); @@ -145,19 +145,27 @@ DataTypePtr generalizeDataType(DataTypePtr type) if (which.isMap()) { const auto * map_type = assert_cast(type.get()); - auto key_type = removeNullable(generalizeDataType(map_type->getKeyType())); - auto value_type = generalizeDataType(map_type->getValueType()); - return key_type && value_type ? std::make_shared(key_type, value_type) : nullptr; + auto key_type = makeNullableRecursivelyAndCheckForNothing(map_type->getKeyType()); + auto value_type = makeNullableRecursivelyAndCheckForNothing(map_type->getValueType()); + return key_type && value_type ? std::make_shared(removeNullable(key_type), value_type) : nullptr; } if (which.isLowCarnality()) { const auto * lc_type = assert_cast(type.get()); - auto nested_type = generalizeDataType(lc_type->getDictionaryType()); + auto nested_type = makeNullableRecursivelyAndCheckForNothing(lc_type->getDictionaryType()); return nested_type ? std::make_shared(nested_type) : nullptr; } return makeNullable(type); } +NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header) +{ + NamesAndTypesList result; + for (auto & [name, type] : header.getNamesAndTypesList()) + result.emplace_back(name, makeNullableRecursivelyAndCheckForNothing(type)); + return result; +} + } diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h index 4446393a581..ea8ebbad4c0 100644 --- a/src/Formats/ReadSchemaUtils.h +++ b/src/Formats/ReadSchemaUtils.h @@ -29,14 +29,16 @@ ColumnsDescription readSchemaFromFormat( ContextPtr context, std::unique_ptr & buf_out); -/// Convert type to the most general type: -/// - IntN, UIntN, FloatN, Decimal -> Float64 +/// Make type Nullable recursively: /// - Type -> Nullable(type) /// - Array(Type) -> Array(Nullable(Type)) /// - Tuple(Type1, ..., TypeN) -> Tuple(Nullable(Type1), ..., Nullable(TypeN)) /// - Map(KeyType, ValueType) -> Map(KeyType, Nullable(ValueType)) /// - LowCardinality(Type) -> LowCardinality(Nullable(Type)) /// If type is Nothing or one of the nested types is Nothing, return nullptr. -DataTypePtr generalizeDataType(DataTypePtr type); +DataTypePtr makeNullableRecursivelyAndCheckForNothing(DataTypePtr type); +/// Call makeNullableRecursivelyAndCheckForNothing for all types +/// in the block and return names and types. +NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header); } diff --git a/src/Functions/FunctionConstantBase.h b/src/Functions/FunctionConstantBase.h index 2d237c77256..c178b3a256e 100644 --- a/src/Functions/FunctionConstantBase.h +++ b/src/Functions/FunctionConstantBase.h @@ -41,9 +41,9 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr & result_type, size_t input_rows_count) const override { - return ColumnT().createColumnConst(input_rows_count, constant_value); + return result_type->createColumnConst(input_rows_count, constant_value); } private: diff --git a/src/Functions/FunctionsTransactionCounters.cpp b/src/Functions/FunctionsTransactionCounters.cpp new file mode 100644 index 00000000000..f2e9d3aa84b --- /dev/null +++ b/src/Functions/FunctionsTransactionCounters.cpp @@ -0,0 +1,71 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace +{ + +class FunctionTransactionID : public FunctionConstantBase +{ +public: + static constexpr auto name = "transactionID"; + static Tuple getValue(const MergeTreeTransactionPtr & txn) + { + Tuple res; + if (txn) + res = {txn->tid.start_csn, txn->tid.local_tid, txn->tid.host_id}; + else + res = {UInt64(0), UInt64(0), UUIDHelpers::Nil}; + return res; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { return getTransactionIDDataType(); } + + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionTransactionID(ContextPtr context) : FunctionConstantBase(getValue(context->getCurrentTransaction()), context->isDistributed()) {} +}; + +class FunctionTransactionLatestSnapshot : public FunctionConstantBase +{ + static UInt64 getLatestSnapshot(ContextPtr context) + { + context->checkTransactionsAreAllowed(/* explicit_tcl_query */ true); + return TransactionLog::instance().getLatestSnapshot(); + } +public: + static constexpr auto name = "transactionLatestSnapshot"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionTransactionLatestSnapshot(ContextPtr context) : FunctionConstantBase(getLatestSnapshot(context), context->isDistributed()) {} +}; + +class FunctionTransactionOldestSnapshot : public FunctionConstantBase +{ + static UInt64 getOldestSnapshot(ContextPtr context) + { + context->checkTransactionsAreAllowed(/* explicit_tcl_query */ true); + return TransactionLog::instance().getOldestSnapshot(); + } +public: + static constexpr auto name = "transactionOldestSnapshot"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionTransactionOldestSnapshot(ContextPtr context) : FunctionConstantBase(getOldestSnapshot(context), context->isDistributed()) {} +}; + +} + +void registerFunctionsTransactionCounters(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); +} + +} diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index d18c73cc8b5..9cd9c70da16 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -75,6 +75,7 @@ void registerFunctionFile(FunctionFactory &); void registerFunctionConnectionId(FunctionFactory &); void registerFunctionPartitionId(FunctionFactory &); void registerFunctionIsIPAddressContainedIn(FunctionFactory &); +void registerFunctionsTransactionCounters(FunctionFactory & factory); void registerFunctionQueryID(FunctionFactory &); void registerFunctionInitialQueryID(FunctionFactory &); void registerFunctionServerUUID(FunctionFactory &); @@ -163,6 +164,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionConnectionId(factory); registerFunctionPartitionId(factory); registerFunctionIsIPAddressContainedIn(factory); + registerFunctionsTransactionCounters(factory); registerFunctionQueryID(factory); registerFunctionInitialQueryID(factory); registerFunctionServerUUID(factory); diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index e086f16be54..bf3cccccab8 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -26,6 +26,7 @@ namespace ErrorCodes extern const int CANNOT_PARSE_DATETIME; extern const int CANNOT_PARSE_DATE; extern const int INCORRECT_DATA; + extern const int ATTEMPT_TO_READ_AFTER_EOF; } template @@ -137,6 +138,12 @@ void assertEOF(ReadBuffer & buf) throwAtAssertionFailed("eof", buf); } +void assertNotEOF(ReadBuffer & buf) +{ + if (buf.eof()) + throw Exception("Attempt to read after EOF", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF); +} + void assertStringCaseInsensitive(const char * s, ReadBuffer & buf) { @@ -1366,6 +1373,7 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf) /// - Tuples: (...) /// - Maps: {...} /// - NULL + /// - Bool: true/false /// - Number: integer, float, decimal. if (*buf.position() == '\'') @@ -1394,6 +1402,16 @@ void readQuotedFieldIntoString(String & s, ReadBuffer & buf) s.append("NaN"); } } + else if (checkCharCaseInsensitive('t', buf)) + { + assertStringCaseInsensitive("rue", buf); + s.append("true"); + } + else if (checkCharCaseInsensitive('f', buf)) + { + assertStringCaseInsensitive("alse", buf); + s.append("false"); + } else { /// It's an integer, float or decimal. They all can be parsed as float. diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index e68da3a1c7d..13228853ff3 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -163,6 +163,7 @@ void readVectorBinary(std::vector & v, ReadBuffer & buf, size_t MAX_VECTOR_SI void assertString(const char * s, ReadBuffer & buf); void assertEOF(ReadBuffer & buf); +void assertNotEOF(ReadBuffer & buf); [[noreturn]] void throwAtAssertionFailed(const char * s, ReadBuffer & buf); diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index e321eecf104..92346615a7a 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -77,8 +77,9 @@ struct ReadSettings size_t remote_fs_read_max_backoff_ms = 10000; size_t remote_fs_read_backoff_max_tries = 4; - bool remote_fs_enable_cache = true; - size_t remote_fs_cache_max_wait_sec = 1; + bool enable_filesystem_cache = true; + size_t filesystem_cache_max_wait_sec = 1; + bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false; size_t remote_read_min_bytes_for_seek = DBMS_DEFAULT_BUFFER_SIZE; diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp index b91114995e8..d3ca4a9fc32 100644 --- a/src/IO/WriteBufferFromFileDescriptor.cpp +++ b/src/IO/WriteBufferFromFileDescriptor.cpp @@ -133,7 +133,6 @@ off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) // NOLINT return res; } - void WriteBufferFromFileDescriptor::truncate(off_t length) // NOLINT { int res = ftruncate(fd, length); diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index eda7bb6f8ae..c85f3989531 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -2,25 +2,28 @@ #if USE_AWS_S3 -# include -# include +#include +#include -# include -# include -# include -# include -# include -# include +#include +#include +#include -# include +#include +#include +#include +#include +#include + +#include namespace ProfileEvents { extern const Event S3WriteBytes; + extern const Event RemoteFSCacheDownloadBytes; } - namespace DB { // S3 protocol does not allow to have multipart upload with more than 10000 parts. @@ -32,6 +35,7 @@ const int S3_WARN_MAX_PARTS = 10000; namespace ErrorCodes { extern const int S3_ERROR; + extern const int LOGICAL_ERROR; } struct WriteBufferFromS3::UploadPartTask @@ -40,6 +44,7 @@ struct WriteBufferFromS3::UploadPartTask bool is_finised = false; std::string tag; std::exception_ptr exception; + std::optional cache_files; }; struct WriteBufferFromS3::PutObjectTask @@ -47,6 +52,7 @@ struct WriteBufferFromS3::PutObjectTask Aws::S3::Model::PutObjectRequest req; bool is_finised = false; std::exception_ptr exception; + std::optional cache_files; }; WriteBufferFromS3::WriteBufferFromS3( @@ -59,7 +65,9 @@ WriteBufferFromS3::WriteBufferFromS3( size_t max_single_part_upload_size_, std::optional> object_metadata_, size_t buffer_size_, - ScheduleFunc schedule_) + ScheduleFunc schedule_, + const String & blob_name_, + FileCachePtr cache_) : BufferWithOwnMemory(buffer_size_, nullptr, 0) , bucket(bucket_) , key(key_) @@ -70,6 +78,8 @@ WriteBufferFromS3::WriteBufferFromS3( , upload_part_size_multiply_threshold(upload_part_size_multiply_threshold_) , max_single_part_upload_size(max_single_part_upload_size_) , schedule(std::move(schedule_)) + , blob_name(blob_name_) + , cache(cache_) { allocateBuffer(); } @@ -83,7 +93,41 @@ void WriteBufferFromS3::nextImpl() if (temporary_buffer->tellp() == -1) allocateBuffer(); - temporary_buffer->write(working_buffer.begin(), offset()); + size_t size = offset(); + temporary_buffer->write(working_buffer.begin(), size); + + ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup() + ? CurrentThread::get().getThreadGroup() + : MainThreadStatus::getInstance().getThreadGroup(); + + if (cacheEnabled()) + { + if (blob_name.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty blob name"); + + auto cache_key = cache->hash(blob_name); + file_segments_holder.emplace(cache->setDownloading(cache_key, current_download_offset, size)); + current_download_offset += size; + + size_t remaining_size = size; + auto & file_segments = file_segments_holder->file_segments; + for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end(); ++file_segment_it) + { + auto & file_segment = *file_segment_it; + size_t current_size = std::min(file_segment->range().size(), remaining_size); + remaining_size -= current_size; + + if (file_segment->reserve(current_size)) + { + file_segment->writeInMemory(working_buffer.begin(), current_size); + } + else + { + file_segments.erase(file_segment_it, file_segments.end()); + break; + } + } + } ProfileEvents::increment(ProfileEvents::S3WriteBytes, offset()); @@ -95,7 +139,6 @@ void WriteBufferFromS3::nextImpl() if (!multipart_upload_id.empty() && last_part_size > upload_part_size) { - writePart(); allocateBuffer(); @@ -126,6 +169,11 @@ WriteBufferFromS3::~WriteBufferFromS3() } } +bool WriteBufferFromS3::cacheEnabled() const +{ + return cache != nullptr; +} + void WriteBufferFromS3::preFinalize() { next(); @@ -213,6 +261,13 @@ void WriteBufferFromS3::writePart() } fillUploadRequest(task->req, part_number); + + if (file_segments_holder) + { + task->cache_files.emplace(std::move(*file_segments_holder)); + file_segments_holder.reset(); + } + schedule([this, task]() { try @@ -224,6 +279,15 @@ void WriteBufferFromS3::writePart() task->exception = std::current_exception(); } + try + { + finalizeCacheIfNeeded(task->cache_files); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + { std::lock_guard lock(bg_tasks_mutex); task->is_finised = true; @@ -240,8 +304,14 @@ void WriteBufferFromS3::writePart() { UploadPartTask task; fillUploadRequest(task.req, part_tags.size() + 1); + if (file_segments_holder) + { + task.cache_files.emplace(std::move(*file_segments_holder)); + file_segments_holder.reset(); + } processUploadRequest(task); part_tags.push_back(task.tag); + finalizeCacheIfNeeded(task.cache_files); } } @@ -328,7 +398,14 @@ void WriteBufferFromS3::makeSinglepartUpload() if (schedule) { put_object_task = std::make_unique(); + fillPutRequest(put_object_task->req); + if (file_segments_holder) + { + put_object_task->cache_files.emplace(std::move(*file_segments_holder)); + file_segments_holder.reset(); + } + schedule([this]() { try @@ -340,6 +417,15 @@ void WriteBufferFromS3::makeSinglepartUpload() put_object_task->exception = std::current_exception(); } + try + { + finalizeCacheIfNeeded(put_object_task->cache_files); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + { std::lock_guard lock(bg_tasks_mutex); put_object_task->is_finised = true; @@ -349,14 +435,19 @@ void WriteBufferFromS3::makeSinglepartUpload() /// Releasing lock and condvar notification. bg_tasks_condvar.notify_one(); } - }); } else { PutObjectTask task; fillPutRequest(task.req); + if (file_segments_holder) + { + task.cache_files.emplace(std::move(*file_segments_holder)); + file_segments_holder.reset(); + } processPutRequest(task); + finalizeCacheIfNeeded(task.cache_files); } } @@ -384,6 +475,28 @@ void WriteBufferFromS3::processPutRequest(PutObjectTask & task) throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); } +void WriteBufferFromS3::finalizeCacheIfNeeded(std::optional & file_segments_holder) +{ + if (!file_segments_holder) + return; + + auto & file_segments = file_segments_holder->file_segments; + for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();) + { + try + { + size_t size = (*file_segment_it)->finalizeWrite(); + file_segment_it = file_segments.erase(file_segment_it); + + ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, size); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } +} + void WriteBufferFromS3::waitForReadyBackGroundTasks() { if (schedule) diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index a4fbcbcdeeb..8e91bbc04da 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -4,16 +4,20 @@ #if USE_AWS_S3 -# include -# include -# include -# include -# include +#include +#include +#include +#include +#include -# include -# include +#include +#include +#include -# include +#include +#include + +#include namespace Aws::S3 { @@ -30,6 +34,7 @@ namespace DB { using ScheduleFunc = std::function)>; +class WriteBufferFromFile; /** * Buffer to write a data to a S3 object with specified bucket and key. @@ -51,7 +56,9 @@ public: size_t max_single_part_upload_size_, std::optional> object_metadata_ = std::nullopt, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - ScheduleFunc schedule_ = {}); + ScheduleFunc schedule_ = {}, + const String & blob_name = "", + FileCachePtr cache_ = nullptr); ~WriteBufferFromS3() override; @@ -82,6 +89,8 @@ private: void waitForReadyBackGroundTasks(); void waitForAllBackGroundTasks(); + bool cacheEnabled() const; + String bucket; String key; std::optional> object_metadata; @@ -113,6 +122,12 @@ private: std::condition_variable bg_tasks_condvar; Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3"); + + const String blob_name; + FileCachePtr cache; + size_t current_download_offset = 0; + std::optional file_segments_holder; + static void finalizeCacheIfNeeded(std::optional &); }; } diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 7c6abf2aec7..f72213f0d11 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -1163,3 +1163,19 @@ struct PcgSerializer void writePointerHex(const void * ptr, WriteBuffer & buf); } + +template<> +struct fmt::formatter +{ + template + constexpr auto parse(ParseContext & context) + { + return context.begin(); + } + + template + auto format(const DB::UUID & uuid, FormatContext & context) + { + return fmt::format_to(context.out(), "{}", toString(uuid)); + } +}; diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h new file mode 100644 index 00000000000..3464bb31664 --- /dev/null +++ b/src/IO/WriteSettings.h @@ -0,0 +1,12 @@ +#pragma once + +namespace DB +{ + +/// Settings to be passed to IDisk::writeFile() +struct WriteSettings +{ + bool enable_filesystem_cache_on_write_operations = false; +}; + +} diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 23ef5541456..40ed4d58993 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -68,6 +68,7 @@ #include #include #include +#include #include #include #include @@ -89,6 +90,7 @@ #include #include #include +#include #include #if USE_ROCKSDB @@ -132,6 +134,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int INVALID_SETTING_VALUE; extern const int UNKNOWN_READ_METHOD; + extern const int NOT_IMPLEMENTED; } @@ -280,6 +283,8 @@ struct ContextSharedPart Context::ConfigReloadCallback config_reload_callback; + bool is_server_completely_started = false; + #if USE_ROCKSDB /// Global merge tree metadata cache, stored in rocksdb. MergeTreeMetadataCachePtr merge_tree_metadata_cache; @@ -364,6 +369,8 @@ struct ContextSharedPart if (common_executor) common_executor->wait(); + TransactionLog::shutdownIfAny(); + std::unique_ptr delete_system_logs; std::unique_ptr delete_embedded_dictionaries; std::unique_ptr delete_external_dictionaries_loader; @@ -491,6 +498,8 @@ ContextMutablePtr Context::createGlobal(ContextSharedPart * shared) void Context::initGlobal() { + assert(!global_context_instance); + global_context_instance = shared_from_this(); DatabaseCatalog::init(shared_from_this()); } @@ -2474,6 +2483,17 @@ std::shared_ptr Context::getZooKeeperLog() const } +std::shared_ptr Context::getTransactionsInfoLog() const +{ + auto lock = getLock(); + + if (!shared->system_logs) + return {}; + + return shared->system_logs->transactions_info_log; +} + + std::shared_ptr Context::getProcessorsProfileLog() const { auto lock = getLock(); @@ -3077,6 +3097,56 @@ void Context::resetZooKeeperMetadataTransaction() metadata_transaction = nullptr; } + +void Context::checkTransactionsAreAllowed(bool explicit_tcl_query /* = false */) const +{ + if (getConfigRef().getInt("allow_experimental_transactions", 0)) + return; + + if (explicit_tcl_query) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Transactions are not supported"); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Experimental support for transactions is disabled, " + "however, some query or background task tried to access TransactionLog. " + "If you have not enabled this feature explicitly, then it's a bug."); +} + +void Context::initCurrentTransaction(MergeTreeTransactionPtr txn) +{ + merge_tree_transaction_holder = MergeTreeTransactionHolder(txn, false, this); + setCurrentTransaction(std::move(txn)); +} + +void Context::setCurrentTransaction(MergeTreeTransactionPtr txn) +{ + assert(!merge_tree_transaction || !txn); + assert(this == session_context.lock().get() || this == query_context.lock().get()); + merge_tree_transaction = std::move(txn); + if (!merge_tree_transaction) + merge_tree_transaction_holder = {}; +} + +MergeTreeTransactionPtr Context::getCurrentTransaction() const +{ + return merge_tree_transaction; +} + +bool Context::isServerCompletelyStarted() const +{ + auto lock = getLock(); + assert(getApplicationType() == ApplicationType::SERVER); + return shared->is_server_completely_started; +} + +void Context::setServerCompletelyStarted() +{ + auto lock = getLock(); + assert(global_context.lock().get() == this); + assert(!shared->is_server_completely_started); + assert(getApplicationType() == ApplicationType::SERVER); + shared->is_server_completely_started = true; +} + PartUUIDsPtr Context::getPartUUIDs() const { auto lock = getLock(); @@ -3242,8 +3312,9 @@ ReadSettings Context::getReadSettings() const res.remote_fs_read_max_backoff_ms = settings.remote_fs_read_max_backoff_ms; res.remote_fs_read_backoff_max_tries = settings.remote_fs_read_backoff_max_tries; - res.remote_fs_enable_cache = settings.remote_fs_enable_cache; - res.remote_fs_cache_max_wait_sec = settings.remote_fs_cache_max_wait_sec; + res.enable_filesystem_cache = settings.enable_filesystem_cache; + res.filesystem_cache_max_wait_sec = settings.filesystem_cache_max_wait_sec; + res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek; @@ -3262,4 +3333,13 @@ ReadSettings Context::getReadSettings() const return res; } +WriteSettings Context::getWriteSettings() const +{ + WriteSettings res; + + res.enable_filesystem_cache_on_write_operations = settings.enable_filesystem_cache_on_write_operations; + + return res; +} + } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index cf4a8fc1b7a..b53e3945188 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,8 @@ #include #include #include + +#include #include @@ -80,6 +83,7 @@ class AsynchronousMetricLog; class OpenTelemetrySpanLog; class ZooKeeperLog; class SessionLog; +class TransactionsInfoLog; class ProcessorsProfileLog; struct MergeTreeSettings; class StorageS3Settings; @@ -119,6 +123,7 @@ struct PartUUIDs; using PartUUIDsPtr = std::shared_ptr; class KeeperDispatcher; class Session; +struct WriteSettings; class IInputFormat; class IOutputFormat; @@ -312,6 +317,7 @@ private: /// A flag, used to distinguish between user query and internal query to a database engine (MaterializedPostgreSQL). bool is_internal_query = false; + inline static ContextPtr global_context_instance; public: // Top-level OpenTelemetry trace context for the query. Makes sense only for a query context. @@ -339,6 +345,11 @@ private: /// thousands of signatures. /// And I hope it will be replaced with more common Transaction sometime. + MergeTreeTransactionPtr merge_tree_transaction; /// Current transaction context. Can be inside session or query context. + /// It's shared with all children contexts. + MergeTreeTransactionHolder merge_tree_transaction_holder; /// It will rollback or commit transaction on Context destruction. + + /// Use copy constructor or createGlobal() instead Context(); Context(const Context &); Context & operator=(const Context &); @@ -633,6 +644,8 @@ public: ContextMutablePtr getGlobalContext() const; + static ContextPtr getGlobalContextInstance() { return global_context_instance; } + bool hasGlobalContext() const { return !global_context.expired(); } bool isGlobalContext() const { @@ -802,6 +815,7 @@ public: std::shared_ptr getOpenTelemetrySpanLog() const; std::shared_ptr getZooKeeperLog() const; std::shared_ptr getSessionLog() const; + std::shared_ptr getTransactionsInfoLog() const; std::shared_ptr getProcessorsProfileLog() const; /// Returns an object used to log operations with parts if it possible. @@ -890,6 +904,14 @@ public: /// Removes context of current distributed DDL. void resetZooKeeperMetadataTransaction(); + void checkTransactionsAreAllowed(bool explicit_tcl_query = false) const; + void initCurrentTransaction(MergeTreeTransactionPtr txn); + void setCurrentTransaction(MergeTreeTransactionPtr txn); + MergeTreeTransactionPtr getCurrentTransaction() const; + + bool isServerCompletelyStarted() const; + void setServerCompletelyStarted(); + PartUUIDsPtr getPartUUIDs() const; PartUUIDsPtr getIgnoredPartUUIDs() const; @@ -913,6 +935,9 @@ public: /** Get settings for reading from filesystem. */ ReadSettings getReadSettings() const; + /** Get settings for writing to filesystem. */ + WriteSettings getWriteSettings() const; + private: std::unique_lock getLock() const; diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index a490d7bed43..3f43c5eb412 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -142,10 +142,11 @@ void DDLTaskBase::parseQueryFromEntry(ContextPtr context) { const char * begin = entry.query.data(); const char * end = begin + entry.query.size(); + const auto & settings = context->getSettingsRef(); - ParserQuery parser_query(end); + ParserQuery parser_query(end, settings.allow_settings_after_format_in_insert); String description; - query = parseQuery(parser_query, begin, end, description, 0, context->getSettingsRef().max_parser_depth); + query = parseQuery(parser_query, begin, end, description, 0, settings.max_parser_depth); } ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const ZooKeeperPtr & /*zookeeper*/) @@ -390,12 +391,7 @@ ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_conte String DDLTaskBase::getLogEntryName(UInt32 log_entry_number) { - /// Sequential counter in ZooKeeper is Int32. - assert(log_entry_number < std::numeric_limits::max()); - constexpr size_t seq_node_digits = 10; - String number = toString(log_entry_number); - String name = "query-" + String(seq_node_digits - number.size(), '0') + number; - return name; + return zkutil::getSequentialNodeName("query-", log_entry_number); } UInt32 DDLTaskBase::getLogEntryNumber(const String & log_entry_name) diff --git a/src/Interpreters/IInterpreter.cpp b/src/Interpreters/IInterpreter.cpp index af0c06e7503..84fbfee7905 100644 --- a/src/Interpreters/IInterpreter.cpp +++ b/src/Interpreters/IInterpreter.cpp @@ -1,9 +1,16 @@ #include #include #include +#include namespace DB { + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + void IInterpreter::extendQueryLogElem( QueryLogElement & elem, const ASTPtr & ast, ContextPtr context, const String & query_database, const String & query_table) const { @@ -21,4 +28,18 @@ void IInterpreter::extendQueryLogElem( extendQueryLogElemImpl(elem, ast, context); } + +void IInterpreter::checkStorageSupportsTransactionsIfNeeded(const StoragePtr & storage, ContextPtr context) +{ + if (!context->getCurrentTransaction()) + return; + + if (storage->supportsTransactions()) + return; + + if (context->getSettingsRef().throw_on_unsupported_query_inside_transaction) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Storage {} (table {}) does not support transactions", + storage->getName(), storage->getStorageID().getNameForLogs()); +} + } diff --git a/src/Interpreters/IInterpreter.h b/src/Interpreters/IInterpreter.h index 665a46190fd..74a568c5cba 100644 --- a/src/Interpreters/IInterpreter.h +++ b/src/Interpreters/IInterpreter.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -33,6 +34,13 @@ public: virtual void extendQueryLogElemImpl(QueryLogElement &, const ASTPtr &, ContextPtr) const {} + /// Returns true if transactions maybe supported for this type of query. + /// If Interpreter returns true, than it is responsible to check that specific query with specific Storage is supported. + virtual bool supportsTransactions() const { return false; } + + /// Helper function for some Interpreters. + static void checkStorageSupportsTransactionsIfNeeded(const StoragePtr & storage, ContextPtr context); + virtual ~IInterpreter() = default; }; diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index d01f2b05567..df1d6b8c92c 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -86,6 +86,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) } StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext()); + checkStorageSupportsTransactionsIfNeeded(table, getContext()); if (table->isStaticStorage()) throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only"); auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); diff --git a/src/Interpreters/InterpreterAlterQuery.h b/src/Interpreters/InterpreterAlterQuery.h index 9494a400e7b..c6648ff9e7e 100644 --- a/src/Interpreters/InterpreterAlterQuery.h +++ b/src/Interpreters/InterpreterAlterQuery.h @@ -26,6 +26,8 @@ public: void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr context) const override; + bool supportsTransactions() const override { return true; } + private: AccessRightsElements getRequiredAccess() const; diff --git a/src/Interpreters/InterpreterBackupQuery.cpp b/src/Interpreters/InterpreterBackupQuery.cpp index 01970bc5cc2..b630bac9515 100644 --- a/src/Interpreters/InterpreterBackupQuery.cpp +++ b/src/Interpreters/InterpreterBackupQuery.cpp @@ -41,8 +41,8 @@ namespace void executeBackup(const ContextPtr & context, const ASTBackupQuery & query) { auto backup_settings = BackupSettings::fromBackupQuery(query); - BackupMutablePtr backup = createBackup(BackupInfo::fromAST(*query.backup_name), backup_settings, context); auto backup_entries = makeBackupEntries(context, query.elements, backup_settings); + BackupMutablePtr backup = createBackup(BackupInfo::fromAST(*query.backup_name), backup_settings, context); writeBackupEntries(backup, std::move(backup_entries), context->getSettingsRef().max_backup_threads); } diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index da5fcedd469..9919b1272bd 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -89,7 +89,7 @@ BlockIO InterpreterDescribeQuery::execute() auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - storage_snapshot = table->getStorageSnapshot(metadata_snapshot); + storage_snapshot = table->getStorageSnapshot(metadata_snapshot, getContext()); columns = metadata_snapshot->getColumns(); } diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index edca48d3600..529ff806180 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -400,6 +401,23 @@ QueryPipeline InterpreterExplainQuery::executeImpl() override_info.appendTo(buf); break; } + case ASTExplainQuery::CurrentTransaction: + { + if (ast.getSettings()) + throw Exception("Settings are not supported for EXPLAIN CURRENT TRANSACTION query.", ErrorCodes::UNKNOWN_SETTING); + + if (auto txn = getContext()->getCurrentTransaction()) + { + String dump = txn->dumpDescription(); + buf.write(dump.data(), dump.size()); + } + else + { + writeCString("", buf); + } + + break; + } } if (insert_buf) { diff --git a/src/Interpreters/InterpreterExplainQuery.h b/src/Interpreters/InterpreterExplainQuery.h index a640b1c977c..ccfe8ec88a5 100644 --- a/src/Interpreters/InterpreterExplainQuery.h +++ b/src/Interpreters/InterpreterExplainQuery.h @@ -17,6 +17,8 @@ public: static Block getSampleBlock(ASTExplainQuery::ExplainKind kind); + bool supportsTransactions() const override { return true; } + private: ASTPtr query; diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index e9ee2b0910a..5dcee1eae05 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -62,6 +63,7 @@ #include #include #include +#include #include #include @@ -278,6 +280,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context, true /*persist_function*/); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 49e63a91721..58ca9ccc978 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -287,6 +287,8 @@ BlockIO InterpreterInsertQuery::execute() QueryPipelineBuilder pipeline; StoragePtr table = getTable(query); + checkStorageSupportsTransactionsIfNeeded(table, getContext()); + StoragePtr inner_table; if (const auto * mv = dynamic_cast(table.get())) inner_table = mv->getTargetTable(); diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 93de92a0680..51a3f0384aa 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -46,6 +46,8 @@ public: StoragePtr getTable(ASTInsertQuery & query); Block getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const; + bool supportsTransactions() const override { return true; } + private: Block getSampleBlock(const Names & names, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 5ec6abb08a7..481355878aa 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -358,6 +359,49 @@ BlockIO InterpreterKillQueryQuery::execute() break; } + case ASTKillQueryQuery::Type::Transaction: + { + getContext()->checkAccess(AccessType::KILL_TRANSACTION); + + Block transactions_block = getSelectResult("tid, tid_hash, elapsed, is_readonly, state", "system.transactions"); + + if (!transactions_block) + return res_io; + + const ColumnUInt64 & tid_hash_col = typeid_cast(*transactions_block.getByName("tid_hash").column); + + auto header = transactions_block.cloneEmpty(); + header.insert(0, {ColumnString::create(), std::make_shared(), "kill_status"}); + MutableColumns res_columns = header.cloneEmptyColumns(); + + for (size_t i = 0; i < transactions_block.rows(); ++i) + { + UInt64 tid_hash = tid_hash_col.getUInt(i); + + CancellationCode code = CancellationCode::Unknown; + if (!query.test) + { + auto txn = TransactionLog::instance().tryGetRunningTransaction(tid_hash); + if (txn) + { + txn->onException(); + if (txn->getState() == MergeTreeTransaction::ROLLED_BACK) + code = CancellationCode::CancelSent; + else + code = CancellationCode::CancelCannotBeSent; + } + else + { + code = CancellationCode::NotFound; + } + } + + insertResultRow(i, code, transactions_block, header, res_columns); + } + + res_io.pipeline = QueryPipeline(Pipe(std::make_shared(header.cloneWithColumns(std::move(res_columns))))); + break; + } } return res_io; diff --git a/src/Interpreters/InterpreterOptimizeQuery.cpp b/src/Interpreters/InterpreterOptimizeQuery.cpp index d4fe7604ced..83bf23ab4ad 100644 --- a/src/Interpreters/InterpreterOptimizeQuery.cpp +++ b/src/Interpreters/InterpreterOptimizeQuery.cpp @@ -31,8 +31,9 @@ BlockIO InterpreterOptimizeQuery::execute() auto table_id = getContext()->resolveStorageID(ast, Context::ResolveOrdinary); StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext()); + checkStorageSupportsTransactionsIfNeeded(table, getContext()); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto storage_snapshot = table->getStorageSnapshot(metadata_snapshot); + auto storage_snapshot = table->getStorageSnapshot(metadata_snapshot, getContext()); // Empty list of names means we deduplicate by all columns, but user can explicitly state which columns to use. Names column_names; diff --git a/src/Interpreters/InterpreterOptimizeQuery.h b/src/Interpreters/InterpreterOptimizeQuery.h index 8491fe8df49..932700e51b5 100644 --- a/src/Interpreters/InterpreterOptimizeQuery.h +++ b/src/Interpreters/InterpreterOptimizeQuery.h @@ -18,6 +18,8 @@ public: BlockIO execute() override; + bool supportsTransactions() const override { return true; } + private: AccessRightsElements getRequiredAccess() const; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 5091debbe72..270c7502ecd 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -329,12 +329,28 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (!metadata_snapshot) metadata_snapshot = storage->getInMemoryMetadataPtr(); - storage_snapshot = storage->getStorageSnapshotForQuery(metadata_snapshot, query_ptr); + storage_snapshot = storage->getStorageSnapshotForQuery(metadata_snapshot, query_ptr, context); } if (has_input || !joined_tables.resolveTables()) joined_tables.makeFakeTable(storage, metadata_snapshot, source_header); + + if (context->getCurrentTransaction() && context->getSettingsRef().throw_on_unsupported_query_inside_transaction) + { + if (storage) + checkStorageSupportsTransactionsIfNeeded(storage, context); + for (const auto & table : joined_tables.tablesWithColumns()) + { + if (table.table.table.empty()) + continue; + auto maybe_storage = DatabaseCatalog::instance().tryGetTable({table.table.database, table.table.table}, context); + if (!maybe_storage) + continue; + checkStorageSupportsTransactionsIfNeeded(storage, context); + } + } + /// Rewrite JOINs if (!has_input && joined_tables.tablesCount() > 1) { @@ -1791,7 +1807,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc const auto & func = desc.function; std::optional num_rows{}; - if (!query.prewhere() && !query.where()) + if (!query.prewhere() && !query.where() && !context->getCurrentTransaction()) { num_rows = storage->totalRows(settings); } diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index 6bb12caff7d..ff3e8a1f706 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -104,6 +104,8 @@ public: Names getRequiredColumns() { return required_columns; } + bool supportsTransactions() const override { return true; } + private: InterpreterSelectQuery( const ASTPtr & query_ptr_, diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.h b/src/Interpreters/InterpreterSelectWithUnionQuery.h index 720632e7be5..adf8540d626 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.h +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.h @@ -39,6 +39,8 @@ public: virtual void ignoreWithTotals() override; + bool supportsTransactions() const override { return true; } + private: std::vector> nested_interpreters; diff --git a/src/Interpreters/InterpreterSetQuery.h b/src/Interpreters/InterpreterSetQuery.h index 9bd49708421..39d331100d6 100644 --- a/src/Interpreters/InterpreterSetQuery.h +++ b/src/Interpreters/InterpreterSetQuery.h @@ -25,6 +25,8 @@ public: */ void executeForCurrentContext(); + bool supportsTransactions() const override { return true; } + private: ASTPtr query_ptr; }; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 092978f4748..30f0f892ca4 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include #include @@ -29,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -297,6 +300,21 @@ BlockIO InterpreterSystemQuery::execute() cache->reset(); break; #endif + case Type::DROP_FILESYSTEM_CACHE: + { + if (query.filesystem_cache_path.empty()) + { + auto caches = FileCacheFactory::instance().getAll(); + for (const auto & [_, cache_data] : caches) + cache_data.cache->tryRemoveAll(); + } + else + { + auto cache = FileCacheFactory::instance().get(query.filesystem_cache_path); + cache->tryRemoveAll(); + } + break; + } case Type::RELOAD_DICTIONARY: { getContext()->checkAccess(AccessType::SYSTEM_RELOAD_DICTIONARY); @@ -445,6 +463,7 @@ BlockIO InterpreterSystemQuery::execute() [&] { if (auto query_views_log = getContext()->getQueryViewsLog()) query_views_log->flush(true); }, [&] { if (auto zookeeper_log = getContext()->getZooKeeperLog()) zookeeper_log->flush(true); }, [&] { if (auto session_log = getContext()->getSessionLog()) session_log->flush(true); }, + [&] { if (auto transactions_info_log = getContext()->getTransactionsInfoLog()) transactions_info_log->flush(true); }, [&] { if (auto processors_profile_log = getContext()->getProcessorsProfileLog()) processors_profile_log->flush(true); } ); break; @@ -760,6 +779,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() case Type::DROP_UNCOMPRESSED_CACHE: case Type::DROP_INDEX_MARK_CACHE: case Type::DROP_INDEX_UNCOMPRESSED_CACHE: + case Type::DROP_FILESYSTEM_CACHE: { required_access.emplace_back(AccessType::SYSTEM_DROP_CACHE); break; diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp new file mode 100644 index 00000000000..61b2a4e865f --- /dev/null +++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp @@ -0,0 +1,90 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INVALID_TRANSACTION; +} + +BlockIO InterpreterTransactionControlQuery::execute() +{ + if (!query_context->hasSessionContext()) + throw Exception(ErrorCodes::INVALID_TRANSACTION, "Transaction Control Language queries are allowed only inside session"); + + ContextMutablePtr session_context = query_context->getSessionContext(); + const auto & tcl = query_ptr->as(); + + switch (tcl.action) + { + case ASTTransactionControl::BEGIN: + return executeBegin(session_context); + case ASTTransactionControl::COMMIT: + return executeCommit(session_context); + case ASTTransactionControl::ROLLBACK: + return executeRollback(session_context); + case ASTTransactionControl::SET_SNAPSHOT: + return executeSetSnapshot(session_context, tcl.snapshot); + } + assert(false); + __builtin_unreachable(); +} + +BlockIO InterpreterTransactionControlQuery::executeBegin(ContextMutablePtr session_context) +{ + if (session_context->getCurrentTransaction()) + throw Exception(ErrorCodes::INVALID_TRANSACTION, "Nested transactions are not supported"); + + session_context->checkTransactionsAreAllowed(/* explicit_tcl_query = */ true); + auto txn = TransactionLog::instance().beginTransaction(); + session_context->initCurrentTransaction(txn); + query_context->setCurrentTransaction(txn); + return {}; +} + +BlockIO InterpreterTransactionControlQuery::executeCommit(ContextMutablePtr session_context) +{ + auto txn = session_context->getCurrentTransaction(); + if (!txn) + throw Exception(ErrorCodes::INVALID_TRANSACTION, "There is no current transaction"); + if (txn->getState() != MergeTreeTransaction::RUNNING) + throw Exception(ErrorCodes::INVALID_TRANSACTION, "Transaction is not in RUNNING state"); + + TransactionLog::instance().commitTransaction(txn); + session_context->setCurrentTransaction(NO_TRANSACTION_PTR); + return {}; +} + +BlockIO InterpreterTransactionControlQuery::executeRollback(ContextMutablePtr session_context) +{ + auto txn = session_context->getCurrentTransaction(); + if (!txn) + throw Exception(ErrorCodes::INVALID_TRANSACTION, "There is no current transaction"); + if (txn->getState() == MergeTreeTransaction::COMMITTED) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Transaction is in COMMITTED state"); + + if (txn->getState() == MergeTreeTransaction::RUNNING) + TransactionLog::instance().rollbackTransaction(txn); + session_context->setCurrentTransaction(NO_TRANSACTION_PTR); + return {}; +} + +BlockIO InterpreterTransactionControlQuery::executeSetSnapshot(ContextMutablePtr session_context, UInt64 snapshot) +{ + auto txn = session_context->getCurrentTransaction(); + if (!txn) + throw Exception(ErrorCodes::INVALID_TRANSACTION, "There is no current transaction"); + + if (snapshot <= Tx::MaxReservedCSN && snapshot != Tx::PrehistoricCSN && snapshot != Tx::EverythingVisibleCSN) + throw Exception(ErrorCodes::INVALID_TRANSACTION, "Cannot set snapshot to reserved CSN"); + + txn->setSnapshot(snapshot); + return {}; +} + +} diff --git a/src/Interpreters/InterpreterTransactionControlQuery.h b/src/Interpreters/InterpreterTransactionControlQuery.h new file mode 100644 index 00000000000..05d3068e095 --- /dev/null +++ b/src/Interpreters/InterpreterTransactionControlQuery.h @@ -0,0 +1,34 @@ +#pragma once +#include +#include + +namespace DB +{ + +class InterpreterTransactionControlQuery : public IInterpreter +{ +public: + InterpreterTransactionControlQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) + : query_context(context_) + , query_ptr(query_ptr_) + { + } + + BlockIO execute() override; + + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } + bool supportsTransactions() const override { return true; } + +private: + BlockIO executeBegin(ContextMutablePtr session_context); + static BlockIO executeCommit(ContextMutablePtr session_context); + static BlockIO executeRollback(ContextMutablePtr session_context); + static BlockIO executeSetSnapshot(ContextMutablePtr session_context, UInt64 snapshot); + +private: + ContextMutablePtr query_context; + ASTPtr query_ptr; +}; + +} diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index d6a00ba89b4..c43302e0de9 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -182,7 +182,6 @@ struct RewriteTablesVisitorData } }; -template bool needRewrite(ASTSelectQuery & select, std::vector & table_expressions) { if (!select.tables()) @@ -233,8 +232,6 @@ bool needRewrite(ASTSelectQuery & select, std::vectorchildren.empty()) + data.expression_list->children.emplace_back(std::make_shared()); select.setExpression(ASTSelectQuery::Expression::SELECT, std::move(data.expression_list)); + } data.done = true; } }; @@ -605,7 +606,7 @@ void JoinToSubqueryTransformMatcher::visit(ASTPtr & ast, Data & data) void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast, Data & data) { std::vector table_expressions; - if (!needRewrite<2>(select, table_expressions)) + if (!needRewrite(select, table_expressions)) return; auto & src_tables = select.tables()->children; diff --git a/src/Interpreters/MergeTreeTransaction.cpp b/src/Interpreters/MergeTreeTransaction.cpp new file mode 100644 index 00000000000..7c1feb579e2 --- /dev/null +++ b/src/Interpreters/MergeTreeTransaction.cpp @@ -0,0 +1,351 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_TRANSACTION; + extern const int LOGICAL_ERROR; +} + +static TableLockHolder getLockForOrdinary(const StoragePtr & storage) +{ + if (storage->getStorageID().uuid != UUIDHelpers::Nil) + return {}; + + /// Maybe we should just throw an exception and do not support Ordinary database? + auto default_timeout = std::chrono::milliseconds(10 * 1000); + return storage->lockForShare(RWLockImpl::NO_QUERY, default_timeout); +} + +MergeTreeTransaction::MergeTreeTransaction(CSN snapshot_, LocalTID local_tid_, UUID host_id) + : tid({snapshot_, local_tid_, host_id}) + , snapshot(snapshot_) + , csn(Tx::UnknownCSN) +{ +} + +void MergeTreeTransaction::setSnapshot(CSN new_snapshot) +{ + snapshot = new_snapshot; +} + +MergeTreeTransaction::State MergeTreeTransaction::getState() const +{ + CSN c = csn.load(); + if (c == Tx::UnknownCSN || c == Tx::CommittingCSN) + return RUNNING; + if (c == Tx::RolledBackCSN) + return ROLLED_BACK; + return COMMITTED; +} + +void MergeTreeTransaction::checkIsNotCancelled() const +{ + CSN c = csn.load(); + if (c == Tx::RolledBackCSN) + throw Exception(ErrorCodes::INVALID_TRANSACTION, "Transaction was cancelled"); + else if (c != Tx::UnknownCSN) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected CSN state: {}", c); +} + +void MergeTreeTransaction::addNewPart(const StoragePtr & storage, const DataPartPtr & new_part, MergeTreeTransaction * txn) +{ + /// Creation TID was written to data part earlier on part creation. + /// We only need to ensure that it's written and add part to in-memory set of new parts. + new_part->assertHasVersionMetadata(txn); + if (txn) + { + txn->addNewPart(storage, new_part); + /// Now we know actual part name and can write it to system log table. + tryWriteEventToSystemLog(new_part->version.log, TransactionsInfoLogElement::ADD_PART, txn->tid, TransactionInfoContext{storage->getStorageID(), new_part->name}); + } +} + +void MergeTreeTransaction::removeOldPart(const StoragePtr & storage, const DataPartPtr & part_to_remove, MergeTreeTransaction * txn) +{ + TransactionInfoContext transaction_context{storage->getStorageID(), part_to_remove->name}; + if (txn) + { + /// Lock part for removal and write current TID into version metadata file. + /// If server crash just after committing transactions + /// we will find this TID in version metadata and will finally remove part. + txn->removeOldPart(storage, part_to_remove, transaction_context); + } + else + { + /// Lock part for removal with special TID, so transactions will not try to remove it concurrently. + /// We lock it only in memory if part was not involved in any transactions. + part_to_remove->version.lockRemovalTID(Tx::PrehistoricTID, transaction_context); + if (part_to_remove->wasInvolvedInTransaction()) + part_to_remove->appendRemovalTIDToVersionMetadata(); + } +} + +void MergeTreeTransaction::addNewPartAndRemoveCovered(const StoragePtr & storage, const DataPartPtr & new_part, const DataPartsVector & covered_parts, MergeTreeTransaction * txn) +{ + TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID; + TransactionInfoContext transaction_context{storage->getStorageID(), new_part->name}; + tryWriteEventToSystemLog(new_part->version.log, TransactionsInfoLogElement::ADD_PART, tid, transaction_context); + transaction_context.covering_part = std::move(transaction_context.part_name); + new_part->assertHasVersionMetadata(txn); + + if (txn) + { + txn->addNewPart(storage, new_part); + for (const auto & covered : covered_parts) + { + transaction_context.part_name = covered->name; + txn->removeOldPart(storage, covered, transaction_context); + } + } + else + { + for (const auto & covered : covered_parts) + { + transaction_context.part_name = covered->name; + covered->version.lockRemovalTID(tid, transaction_context); + } + } +} + +void MergeTreeTransaction::addNewPart(const StoragePtr & storage, const DataPartPtr & new_part) +{ + auto maybe_lock = getLockForOrdinary(storage); + std::lock_guard lock{mutex}; + checkIsNotCancelled(); + storages.insert(storage); + if (maybe_lock) + table_read_locks_for_ordinary_db.emplace_back(std::move(maybe_lock)); + creating_parts.push_back(new_part); +} + +void MergeTreeTransaction::removeOldPart(const StoragePtr & storage, const DataPartPtr & part_to_remove, const TransactionInfoContext & context) +{ + auto maybe_lock = getLockForOrdinary(storage); + + { + std::lock_guard lock{mutex}; + checkIsNotCancelled(); + + LockMemoryExceptionInThread lock_memory_tracker(VariableContext::Global); + part_to_remove->version.lockRemovalTID(tid, context); + storages.insert(storage); + if (maybe_lock) + table_read_locks_for_ordinary_db.emplace_back(std::move(maybe_lock)); + removing_parts.push_back(part_to_remove); + } + + part_to_remove->appendRemovalTIDToVersionMetadata(); +} + +void MergeTreeTransaction::addMutation(const StoragePtr & table, const String & mutation_id) +{ + auto maybe_lock = getLockForOrdinary(table); + std::lock_guard lock{mutex}; + checkIsNotCancelled(); + storages.insert(table); + if (maybe_lock) + table_read_locks_for_ordinary_db.emplace_back(std::move(maybe_lock)); + mutations.emplace_back(table, mutation_id); +} + +bool MergeTreeTransaction::isReadOnly() const +{ + std::lock_guard lock{mutex}; + assert((creating_parts.empty() && removing_parts.empty() && mutations.empty()) == storages.empty()); + return storages.empty(); +} + +scope_guard MergeTreeTransaction::beforeCommit() +{ + RunningMutationsList mutations_to_wait; + { + std::lock_guard lock{mutex}; + mutations_to_wait = mutations; + } + + /// We should wait for mutations to finish before committing transaction, because some mutation may fail and cause rollback. + for (const auto & table_and_mutation : mutations_to_wait) + table_and_mutation.first->waitForMutation(table_and_mutation.second); + + assert([&]() + { + std::lock_guard lock{mutex}; + return mutations == mutations_to_wait; + }()); + + CSN expected = Tx::UnknownCSN; + bool can_commit = csn.compare_exchange_strong(expected, Tx::CommittingCSN); + if (!can_commit) + { + /// Transaction was concurrently cancelled by KILL TRANSACTION or KILL MUTATION + if (expected == Tx::RolledBackCSN) + throw Exception(ErrorCodes::INVALID_TRANSACTION, "Transaction was cancelled"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected CSN state: {}", expected); + } + + /// We should set CSN back to Unknown if we will fail to commit transaction for some reason (connection loss, etc) + return [this]() + { + CSN expected_value = Tx::CommittingCSN; + csn.compare_exchange_strong(expected_value, Tx::UnknownCSN); + }; +} + +void MergeTreeTransaction::afterCommit(CSN assigned_csn) noexcept +{ + /// Write allocated CSN into version metadata, so we will know CSN without reading it from transaction log + /// and we will be able to remove old entries from transaction log in ZK. + /// It's not a problem if server crash before CSN is written, because we already have TID in data part and entry in the log. + [[maybe_unused]] CSN prev_value = csn.exchange(assigned_csn); + assert(prev_value == Tx::CommittingCSN); + for (const auto & part : creating_parts) + { + part->version.creation_csn.store(csn); + part->appendCSNToVersionMetadata(VersionMetadata::WhichCSN::CREATION); + } + + for (const auto & part : removing_parts) + { + part->version.removal_csn.store(csn); + part->appendCSNToVersionMetadata(VersionMetadata::WhichCSN::REMOVAL); + } + + for (const auto & storage_and_mutation : mutations) + storage_and_mutation.first->setMutationCSN(storage_and_mutation.second, csn); +} + +bool MergeTreeTransaction::rollback() noexcept +{ + CSN expected = Tx::UnknownCSN; + bool need_rollback = csn.compare_exchange_strong(expected, Tx::RolledBackCSN); + + /// Check that it was not rolled back concurrently + if (!need_rollback) + return false; + + /// It's not a problem if server crash at this point + /// because on startup we will see that TID is not committed and will simply discard these changes. + + RunningMutationsList mutations_to_kill; + DataPartsVector parts_to_remove; + DataPartsVector parts_to_activate; + + { + std::lock_guard lock{mutex}; + mutations_to_kill = mutations; + parts_to_remove = creating_parts; + parts_to_activate = removing_parts; + } + + /// Forcefully stop related mutations if any + for (const auto & table_and_mutation : mutations_to_kill) + table_and_mutation.first->killMutation(table_and_mutation.second); + + /// Discard changes in active parts set + /// Remove parts that were created, restore parts that were removed (except parts that were created by this transaction too) + for (const auto & part : parts_to_remove) + { + if (part->version.isRemovalTIDLocked()) + { + /// Don't need to remove part from working set if it was created and removed by this transaction + assert(part->version.removal_tid_lock == tid.getHash()); + continue; + } + /// FIXME do not lock removal_tid when rolling back part creation, it's ugly + const_cast(part->storage).removePartsFromWorkingSet(NO_TRANSACTION_RAW, {part}, true); + } + + for (const auto & part : parts_to_activate) + if (part->version.getCreationTID() != tid) + const_cast(part->storage).restoreAndActivatePart(part); + + /// Kind of optimization: cleanup thread can remove these parts immediately + for (const auto & part : parts_to_remove) + { + part->version.creation_csn.store(Tx::RolledBackCSN); + /// Write special RolledBackCSN, so we will be able to cleanup transaction log + part->appendCSNToVersionMetadata(VersionMetadata::CREATION); + } + + for (const auto & part : parts_to_activate) + { + /// Clear removal_tid from version metadata file, so we will not need to distinguish TIDs that were not committed + /// and TIDs that were committed long time ago and were removed from the log on log cleanup. + part->appendRemovalTIDToVersionMetadata(/* clear */ true); + part->version.unlockRemovalTID(tid, TransactionInfoContext{part->storage.getStorageID(), part->name}); + } + + + assert([&]() + { + std::lock_guard lock{mutex}; + assert(mutations_to_kill == mutations); + assert(parts_to_remove == creating_parts); + assert(parts_to_activate == removing_parts); + return csn == Tx::RolledBackCSN; + }()); + + return true; +} + +void MergeTreeTransaction::onException() +{ + TransactionLog::instance().rollbackTransaction(shared_from_this()); +} + +String MergeTreeTransaction::dumpDescription() const +{ + String res = fmt::format("{} state: {}, snapshot: {}", tid, getState(), snapshot); + + if (isReadOnly()) + { + res += ", readonly"; + return res; + } + + std::lock_guard lock{mutex}; + + res += fmt::format(", affects {} tables:", storages.size()); + + using ChangesInTable = std::tuple; + std::unordered_map storage_to_changes; + + for (const auto & part : creating_parts) + std::get<0>(storage_to_changes[&(part->storage)]).push_back(part->name); + + for (const auto & part : removing_parts) + { + String info = fmt::format("{} (created by {}, {})", part->name, part->version.getCreationTID(), part->version.creation_csn); + std::get<1>(storage_to_changes[&(part->storage)]).push_back(std::move(info)); + assert(!part->version.creation_csn || part->version.creation_csn <= snapshot); + } + + for (const auto & mutation : mutations) + std::get<2>(storage_to_changes[mutation.first.get()]).push_back(mutation.second); + + for (const auto & storage_changes : storage_to_changes) + { + res += fmt::format("\n\t{}:", storage_changes.first->getStorageID().getNameForLogs()); + const auto & creating_info = std::get<0>(storage_changes.second); + const auto & removing_info = std::get<1>(storage_changes.second); + const auto & mutations_info = std::get<2>(storage_changes.second); + + if (!creating_info.empty()) + res += fmt::format("\n\t\tcreating parts:\n\t\t\t{}", fmt::join(creating_info, "\n\t\t\t")); + if (!removing_info.empty()) + res += fmt::format("\n\t\tremoving parts:\n\t\t\t{}", fmt::join(removing_info, "\n\t\t\t")); + if (!mutations_info.empty()) + res += fmt::format("\n\t\tmutations:\n\t\t\t{}", fmt::join(mutations_info, "\n\t\t\t")); + } + + return res; +} + +} diff --git a/src/Interpreters/MergeTreeTransaction.h b/src/Interpreters/MergeTreeTransaction.h new file mode 100644 index 00000000000..7ebea450dd0 --- /dev/null +++ b/src/Interpreters/MergeTreeTransaction.h @@ -0,0 +1,84 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +class IMergeTreeDataPart; +using DataPartPtr = std::shared_ptr; +using DataPartsVector = std::vector; + +/// This object is responsible for tracking all changes that some transaction is making in MergeTree tables. +/// It collects all changes that queries of current transaction made in data part sets of all MergeTree tables +/// to ether make them visible when transaction commits or undo when transaction rolls back. +class MergeTreeTransaction : public std::enable_shared_from_this, private boost::noncopyable +{ + friend class TransactionLog; +public: + enum State + { + RUNNING, + COMMITTED, + ROLLED_BACK, + }; + + CSN getSnapshot() const { return snapshot; } + void setSnapshot(CSN new_snapshot); + State getState() const; + + const TransactionID tid; + + MergeTreeTransaction(CSN snapshot_, LocalTID local_tid_, UUID host_id); + + void addNewPart(const StoragePtr & storage, const DataPartPtr & new_part); + void removeOldPart(const StoragePtr & storage, const DataPartPtr & part_to_remove, const TransactionInfoContext & context); + + void addMutation(const StoragePtr & table, const String & mutation_id); + + static void addNewPart(const StoragePtr & storage, const DataPartPtr & new_part, MergeTreeTransaction * txn); + static void removeOldPart(const StoragePtr & storage, const DataPartPtr & part_to_remove, MergeTreeTransaction * txn); + static void addNewPartAndRemoveCovered(const StoragePtr & storage, const DataPartPtr & new_part, const DataPartsVector & covered_parts, MergeTreeTransaction * txn); + + bool isReadOnly() const; + + void onException(); + + String dumpDescription() const; + + Float64 elapsedSeconds() const { return elapsed.elapsedSeconds(); } + +private: + scope_guard beforeCommit(); + void afterCommit(CSN assigned_csn) noexcept; + bool rollback() noexcept; + void checkIsNotCancelled() const; + + mutable std::mutex mutex; + Stopwatch elapsed; + + /// Usually it's equal to tid.start_csn, but can be changed by SET SNAPSHOT query (for introspection purposes and time-traveling) + CSN snapshot; + std::list::iterator snapshot_in_use_it; + + /// Lists of changes made by transaction + std::unordered_set storages; + std::vector table_read_locks_for_ordinary_db; + DataPartsVector creating_parts; + DataPartsVector removing_parts; + using RunningMutationsList = std::vector>; + RunningMutationsList mutations; + + std::atomic csn; +}; + +using MergeTreeTransactionPtr = std::shared_ptr; + +} diff --git a/src/Interpreters/MergeTreeTransactionHolder.cpp b/src/Interpreters/MergeTreeTransactionHolder.cpp new file mode 100644 index 00000000000..bf63a471282 --- /dev/null +++ b/src/Interpreters/MergeTreeTransactionHolder.cpp @@ -0,0 +1,84 @@ +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +MergeTreeTransactionHolder::MergeTreeTransactionHolder(const MergeTreeTransactionPtr & txn_, bool autocommit_ = false, const Context * owned_by_session_context_) + : txn(txn_) + , autocommit(autocommit_) + , owned_by_session_context(owned_by_session_context_) +{ + assert(!txn || txn->getState() == MergeTreeTransaction::RUNNING); + assert(!owned_by_session_context || owned_by_session_context == owned_by_session_context->getSessionContext().get()); +} + +MergeTreeTransactionHolder::MergeTreeTransactionHolder(MergeTreeTransactionHolder && rhs) noexcept +{ + *this = std::move(rhs); +} + +MergeTreeTransactionHolder & MergeTreeTransactionHolder::operator=(MergeTreeTransactionHolder && rhs) noexcept +{ + onDestroy(); + txn = NO_TRANSACTION_PTR; + autocommit = false; + owned_by_session_context = nullptr; + std::swap(txn, rhs.txn); + std::swap(autocommit, rhs.autocommit); + std::swap(owned_by_session_context, rhs.owned_by_session_context); + return *this; +} + +MergeTreeTransactionHolder::~MergeTreeTransactionHolder() +{ + onDestroy(); +} + +void MergeTreeTransactionHolder::onDestroy() noexcept +{ + if (!txn) + return; + if (txn->getState() != MergeTreeTransaction::RUNNING) + return; + + if (autocommit && std::uncaught_exceptions() == 0) + { + try + { + TransactionLog::instance().commitTransaction(txn); + return; + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + TransactionLog::instance().rollbackTransaction(txn); +} + +MergeTreeTransactionHolder::MergeTreeTransactionHolder(const MergeTreeTransactionHolder & rhs) +{ + *this = rhs; +} + +MergeTreeTransactionHolder & MergeTreeTransactionHolder::operator=(const MergeTreeTransactionHolder & rhs) // NOLINT +{ + if (rhs.txn && !rhs.owned_by_session_context) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Tried to copy non-empty MergeTreeTransactionHolder that is not owned by session context. It's a bug"); + assert(!txn); + assert(!autocommit); + assert(!owned_by_session_context); + return *this; +} + +} diff --git a/src/Interpreters/MergeTreeTransactionHolder.h b/src/Interpreters/MergeTreeTransactionHolder.h new file mode 100644 index 00000000000..4e8a196f4d2 --- /dev/null +++ b/src/Interpreters/MergeTreeTransactionHolder.h @@ -0,0 +1,42 @@ +#pragma once +#include + +namespace DB +{ + +class Context; + +class MergeTreeTransaction; +/// TODO maybe replace with raw pointer? It should not be shared, only MergeTreeTransactionHolder can own a transaction object +using MergeTreeTransactionPtr = std::shared_ptr; + +/// Owns a MergeTreeTransactionObject. +/// Rolls back a transaction in dtor if it was not committed. +/// If `autocommit` flag is true, then it commits transaction if dtor is called normally +/// or rolls it back if dtor was called due to an exception. +class MergeTreeTransactionHolder +{ +public: + MergeTreeTransactionHolder() = default; + MergeTreeTransactionHolder(const MergeTreeTransactionPtr & txn_, bool autocommit_, const Context * owned_by_session_context_ = nullptr); + MergeTreeTransactionHolder(MergeTreeTransactionHolder && rhs) noexcept; + MergeTreeTransactionHolder & operator=(MergeTreeTransactionHolder && rhs) noexcept; + ~MergeTreeTransactionHolder(); + + /// NOTE: We cannot make it noncopyable, because we use it as a field of Context. + /// So the following copy constructor and operator does not copy anything, + /// they just leave txn nullptr. + MergeTreeTransactionHolder(const MergeTreeTransactionHolder & rhs); + MergeTreeTransactionHolder & operator=(const MergeTreeTransactionHolder & rhs); + + MergeTreeTransactionPtr getTransaction() const { return txn; } + +private: + void onDestroy() noexcept; + + MergeTreeTransactionPtr txn; + bool autocommit = false; + const Context * owned_by_session_context = nullptr; +}; + +} diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index f46333dc00a..2c03f109fe4 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -802,7 +802,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & /// e.g. ALTER referencing the same table in scalar subquery bool execute_scalar_subqueries = !dry_run; auto syntax_result = TreeRewriter(context).analyze( - all_asts, all_columns, storage, storage->getStorageSnapshot(metadata_snapshot), + all_asts, all_columns, storage, storage->getStorageSnapshot(metadata_snapshot, context), false, true, execute_scalar_subqueries); if (execute_scalar_subqueries && context->hasQueryContext()) diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index 5adca8f0e79..03eeb81f14d 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -117,7 +117,9 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"used_formats", std::make_shared(std::make_shared())}, {"used_functions", std::make_shared(std::make_shared())}, {"used_storages", std::make_shared(std::make_shared())}, - {"used_table_functions", std::make_shared(std::make_shared())} + {"used_table_functions", std::make_shared(std::make_shared())}, + + {"transaction_id", getTransactionIDDataType()}, }; } @@ -257,6 +259,8 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const fill_column(used_storages, column_storage_factory_objects); fill_column(used_table_functions, column_table_function_factory_objects); } + + columns[i++]->insert(Tuple{tid.start_csn, tid.local_tid, tid.host_id}); } void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableColumns & columns, size_t & i) diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index f015afb9249..651769cbab6 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace ProfileEvents { @@ -85,6 +86,8 @@ struct QueryLogElement std::shared_ptr profile_counters; std::shared_ptr query_settings; + TransactionID tid; + static std::string name() { return "QueryLog"; } static NamesAndTypesList getNamesAndTypes(); diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 0f8c782463b..f079e41851a 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -202,6 +203,8 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf query_views_log = createSystemLog(global_context, "system", "query_views_log", config, "query_views_log"); zookeeper_log = createSystemLog(global_context, "system", "zookeeper_log", config, "zookeeper_log"); session_log = createSystemLog(global_context, "system", "session_log", config, "session_log"); + transactions_info_log = createSystemLog( + global_context, "system", "transactions_info_log", config, "transactions_info_log"); processors_profile_log = createSystemLog(global_context, "system", "processors_profile_log", config, "processors_profile_log"); if (query_log) @@ -228,6 +231,8 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf logs.emplace_back(zookeeper_log.get()); if (session_log) logs.emplace_back(session_log.get()); + if (transactions_info_log) + logs.emplace_back(transactions_info_log.get()); if (processors_profile_log) logs.emplace_back(processors_profile_log.get()); @@ -544,6 +549,7 @@ ASTPtr SystemLog::getCreateTableQuery() return create; } + #define INSTANTIATE_SYSTEM_LOG(ELEMENT) template class SystemLog; SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 4ad6a0666bb..b5135e8a73a 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -43,6 +43,7 @@ class OpenTelemetrySpanLog; class QueryViewsLog; class ZooKeeperLog; class SessionLog; +class TransactionsInfoLog; class ProcessorsProfileLog; /// System logs should be destroyed in destructor of the last Context and before tables, @@ -71,6 +72,8 @@ struct SystemLogs std::shared_ptr zookeeper_log; /// Login, LogOut and Login failure events std::shared_ptr session_log; + /// Events related to transactions + std::shared_ptr transactions_info_log; /// Used to log processors profiling std::shared_ptr processors_profile_log; diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 2ea371d3d03..8fbbdb44c99 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -597,6 +597,16 @@ CurrentThread::QueryScope::QueryScope(ContextMutablePtr query_context) query_context->makeQueryContext(); } +CurrentThread::QueryScope::QueryScope(ContextPtr query_context) +{ + if (!query_context->hasQueryContext()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Cannot initialize query scope without query context"); + + CurrentThread::initializeQuery(); + CurrentThread::attachQueryContext(query_context); +} + void CurrentThread::QueryScope::logPeakMemoryUsage() { auto group = CurrentThread::getGroup(); diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp new file mode 100644 index 00000000000..393a8aa848b --- /dev/null +++ b/src/Interpreters/TransactionLog.cpp @@ -0,0 +1,484 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/// It's used in critical places to exit on unexpected exceptions. +/// SIGABRT is usually better that broken state in memory with unpredictable consequences. +#define NOEXCEPT_SCOPE SCOPE_EXIT({ if (std::uncaught_exceptions()) { tryLogCurrentException("NOEXCEPT_SCOPE"); abort(); } }) + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +static void tryWriteEventToSystemLog(Poco::Logger * log, ContextPtr context, + TransactionsInfoLogElement::Type type, const TransactionID & tid, CSN csn = Tx::UnknownCSN) +try +{ + auto system_log = context->getTransactionsInfoLog(); + if (!system_log) + return; + + TransactionsInfoLogElement elem; + elem.type = type; + elem.tid = tid; + elem.csn = csn; + elem.fillCommonFields(nullptr); + system_log->add(elem); +} +catch (...) +{ + tryLogCurrentException(log); +} + + +TransactionLog::TransactionLog() + : log(&Poco::Logger::get("TransactionLog")) +{ + global_context = Context::getGlobalContextInstance(); + global_context->checkTransactionsAreAllowed(); + + zookeeper_path = global_context->getConfigRef().getString("transaction_log.zookeeper_path", "/clickhouse/txn"); + zookeeper_path_log = zookeeper_path + "/log"; + + loadLogFromZooKeeper(); + + updating_thread = ThreadFromGlobalPool(&TransactionLog::runUpdatingThread, this); +} + +TransactionLog::~TransactionLog() +{ + shutdown(); +} + +void TransactionLog::shutdown() +{ + if (stop_flag.exchange(true)) + return; + log_updated_event->set(); + latest_snapshot.notify_all(); + updating_thread.join(); + + std::lock_guard lock{mutex}; + /// This is required to... you'll never guess - avoid race condition inside Poco::Logger (Coordination::ZooKeeper::log) + zookeeper.reset(); +} + +ZooKeeperPtr TransactionLog::getZooKeeper() const +{ + std::lock_guard lock{mutex}; + return zookeeper; +} + +UInt64 TransactionLog::deserializeCSN(const String & csn_node_name) +{ + ReadBufferFromString buf{csn_node_name}; + assertString("csn-", buf); + UInt64 res; + readText(res, buf); + assertEOF(buf); + return res; +} + +String TransactionLog::serializeCSN(CSN csn) +{ + return zkutil::getSequentialNodeName("csn-", csn); +} + +TransactionID TransactionLog::deserializeTID(const String & csn_node_content) +{ + TransactionID tid = Tx::EmptyTID; + if (csn_node_content.empty()) + return tid; + + ReadBufferFromString buf{csn_node_content}; + tid = TransactionID::read(buf); + assertEOF(buf); + return tid; +} + +String TransactionLog::serializeTID(const TransactionID & tid) +{ + WriteBufferFromOwnString buf; + TransactionID::write(tid, buf); + return buf.str(); +} + + +void TransactionLog::loadEntries(Strings::const_iterator beg, Strings::const_iterator end) +{ + std::vector> futures; + size_t entries_count = std::distance(beg, end); + if (!entries_count) + return; + + String last_entry = *std::prev(end); + LOG_TRACE(log, "Loading {} entries from {}: {}..{}", entries_count, zookeeper_path_log, *beg, last_entry); + futures.reserve(entries_count); + for (auto it = beg; it != end; ++it) + futures.emplace_back(zookeeper->asyncGet(fs::path(zookeeper_path_log) / *it)); + + std::vector> loaded; + loaded.reserve(entries_count); + auto it = beg; + for (size_t i = 0; i < entries_count; ++i, ++it) + { + auto res = futures[i].get(); + CSN csn = deserializeCSN(*it); + TransactionID tid = deserializeTID(res.data); + loaded.emplace_back(tid.getHash(), CSNEntry{csn, tid}); + LOG_TEST(log, "Got entry {} -> {}", tid, csn); + } + futures.clear(); + + NOEXCEPT_SCOPE; + LockMemoryExceptionInThread lock_memory_tracker(VariableContext::Global); + std::lock_guard lock{mutex}; + for (const auto & entry : loaded) + { + if (entry.first == Tx::EmptyTID.getHash()) + continue; + + tid_to_csn.emplace(entry.first, entry.second); + } + last_loaded_entry = last_entry; + latest_snapshot = loaded.back().second.csn; + local_tid_counter = Tx::MaxReservedLocalTID; +} + +void TransactionLog::loadLogFromZooKeeper() +{ + assert(!zookeeper); + assert(tid_to_csn.empty()); + assert(last_loaded_entry.empty()); + zookeeper = global_context->getZooKeeper(); + + /// We do not write local_tid_counter to disk or zk and maintain it only in memory. + /// Create empty entry to allocate new CSN to safely start counting from the beginning and avoid TID duplication. + /// TODO It's possible to skip this step in come cases (especially for multi-host configuration). + Coordination::Error code = zookeeper->tryCreate(zookeeper_path_log + "/csn-", "", zkutil::CreateMode::PersistentSequential); + if (code != Coordination::Error::ZOK) + { + /// Log probably does not exist, create it + assert(code == Coordination::Error::ZNONODE); + zookeeper->createAncestors(zookeeper_path_log); + Coordination::Requests ops; + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/tail_ptr", serializeCSN(Tx::MaxReservedCSN), zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path_log, "", zkutil::CreateMode::Persistent)); + + /// Fast-forward sequential counter to skip reserved CSNs + for (size_t i = 0; i <= Tx::MaxReservedCSN; ++i) + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path_log + "/csn-", "", zkutil::CreateMode::PersistentSequential)); + Coordination::Responses res; + code = zookeeper->tryMulti(ops, res); + if (code != Coordination::Error::ZNODEEXISTS) + zkutil::KeeperMultiException::check(code, ops, res); + } + + /// TODO Split log into "subdirectories" to: + /// 1. fetch it more optimal way (avoid listing all CSNs on further incremental updates) + /// 2. simplify log rotation + /// 3. support 64-bit CSNs on top of Apache ZooKeeper (it uses Int32 for sequential numbers) + Strings entries_list = zookeeper->getChildren(zookeeper_path_log, nullptr, log_updated_event); + assert(!entries_list.empty()); + std::sort(entries_list.begin(), entries_list.end()); + loadEntries(entries_list.begin(), entries_list.end()); + assert(!last_loaded_entry.empty()); + assert(latest_snapshot == deserializeCSN(last_loaded_entry)); + local_tid_counter = Tx::MaxReservedLocalTID; + + tail_ptr = deserializeCSN(zookeeper->get(zookeeper_path + "/tail_ptr")); +} + +void TransactionLog::runUpdatingThread() +{ + while (true) + { + try + { + log_updated_event->wait(); + if (stop_flag.load()) + return; + + if (!zookeeper) + { + auto new_zookeeper = global_context->getZooKeeper(); + std::lock_guard lock{mutex}; + zookeeper = new_zookeeper; + } + + loadNewEntries(); + removeOldEntries(); + } + catch (const Coordination::Exception & e) + { + tryLogCurrentException(log); + /// TODO better backoff + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + if (Coordination::isHardwareError(e.code)) + { + std::lock_guard lock{mutex}; + zookeeper.reset(); + } + log_updated_event->set(); + } + catch (...) + { + tryLogCurrentException(log); + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + log_updated_event->set(); + } + } +} + +void TransactionLog::loadNewEntries() +{ + Strings entries_list = zookeeper->getChildren(zookeeper_path_log, nullptr, log_updated_event); + assert(!entries_list.empty()); + std::sort(entries_list.begin(), entries_list.end()); + auto it = std::upper_bound(entries_list.begin(), entries_list.end(), last_loaded_entry); + loadEntries(it, entries_list.end()); + assert(last_loaded_entry == entries_list.back()); + assert(latest_snapshot == deserializeCSN(last_loaded_entry)); + latest_snapshot.notify_all(); +} + +void TransactionLog::removeOldEntries() +{ + /// Try to update tail pointer. It's (almost) safe to set it to the oldest snapshot + /// because if a transaction released snapshot, then CSN is already written into metadata. + /// Why almost? Because on server startup we do not have the oldest snapshot (it's simply equal to the latest one), + /// but it's possible that some CSNs are not written into data parts (and we will write them during startup). + if (!global_context->isServerCompletelyStarted()) + return; + + /// Also similar problem is possible if some table was not attached during startup (for example, if table is detached permanently). + /// Also we write CSNs into data parts without fsync, so it's theoretically possible that we wrote CSN, finished transaction, + /// removed its entry from the log, but after that server restarts and CSN is not actually saved to metadata on disk. + /// We should store a bit more entries in ZK and keep outdated entries for a while. + + /// TODO we will need a bit more complex logic for multiple hosts + Coordination::Stat stat; + CSN old_tail_ptr = deserializeCSN(zookeeper->get(zookeeper_path + "/tail_ptr", &stat)); + CSN new_tail_ptr = getOldestSnapshot(); + if (new_tail_ptr < old_tail_ptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected tail_ptr {}, oldest snapshot is {}, it's a bug", old_tail_ptr, new_tail_ptr); + else if (new_tail_ptr == old_tail_ptr) + return; + + /// (it's not supposed to fail with ZBADVERSION while there is only one host) + LOG_TRACE(log, "Updating tail_ptr from {} to {}", old_tail_ptr, new_tail_ptr); + zookeeper->set(zookeeper_path + "/tail_ptr", serializeCSN(new_tail_ptr), stat.version); + tail_ptr.store(new_tail_ptr); + + /// Now we can find and remove old entries + TIDMap tids; + { + std::lock_guard lock{mutex}; + tids = tid_to_csn; + } + + /// TODO support batching + std::vector removed_entries; + CSN latest_entry_csn = latest_snapshot.load(); + for (const auto & elem : tids) + { + /// Definitely not safe to remove + if (new_tail_ptr <= elem.second.tid.start_csn) + continue; + + /// Keep at least one node (the latest one we fetched) + if (elem.second.csn == latest_entry_csn) + continue; + + LOG_TEST(log, "Removing entry {} -> {}", elem.second.tid, elem.second.csn); + auto code = zookeeper->tryRemove(zookeeper_path_log + "/" + serializeCSN(elem.second.csn)); + if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNONODE) + removed_entries.push_back(elem.first); + } + + std::lock_guard lock{mutex}; + for (const auto & tid_hash : removed_entries) + tid_to_csn.erase(tid_hash); +} + +CSN TransactionLog::getLatestSnapshot() const +{ + return latest_snapshot.load(); +} + +MergeTreeTransactionPtr TransactionLog::beginTransaction() +{ + MergeTreeTransactionPtr txn; + { + std::lock_guard lock{running_list_mutex}; + CSN snapshot = latest_snapshot.load(); + LocalTID ltid = 1 + local_tid_counter.fetch_add(1); + txn = std::make_shared(snapshot, ltid, ServerUUID::get()); + bool inserted = running_list.try_emplace(txn->tid.getHash(), txn).second; + if (!inserted) + throw Exception(ErrorCodes::LOGICAL_ERROR, "I's a bug: TID {} {} exists", txn->tid.getHash(), txn->tid); + txn->snapshot_in_use_it = snapshots_in_use.insert(snapshots_in_use.end(), snapshot); + } + + LOG_TEST(log, "Beginning transaction {} ({})", txn->tid, txn->tid.getHash()); + tryWriteEventToSystemLog(log, global_context, TransactionsInfoLogElement::BEGIN, txn->tid); + + return txn; +} + +CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn) +{ + /// Some precommit checks, may throw + auto committing_lock = txn->beforeCommit(); + + CSN new_csn; + if (txn->isReadOnly()) + { + /// Don't need to allocate CSN in ZK for readonly transactions, it's safe to use snapshot/start_csn as "commit" timestamp + LOG_TEST(log, "Closing readonly transaction {}", txn->tid); + new_csn = txn->snapshot; + tryWriteEventToSystemLog(log, global_context, TransactionsInfoLogElement::COMMIT, txn->tid, new_csn); + } + else + { + LOG_TEST(log, "Committing transaction {}", txn->dumpDescription()); + /// TODO handle connection loss + /// TODO support batching + auto current_zookeeper = getZooKeeper(); + String path_created = current_zookeeper->create(zookeeper_path_log + "/csn-", serializeTID(txn->tid), zkutil::CreateMode::PersistentSequential); /// Commit point + NOEXCEPT_SCOPE; + + /// FIXME Transactions: Sequential node numbers in ZooKeeper are Int32, but 31 bit is not enough for production use + /// (overflow is possible in a several weeks/months of active usage) + new_csn = deserializeCSN(path_created.substr(zookeeper_path_log.size() + 1)); + + LOG_INFO(log, "Transaction {} committed with CSN={}", txn->tid, new_csn); + tryWriteEventToSystemLog(log, global_context, TransactionsInfoLogElement::COMMIT, txn->tid, new_csn); + + /// Wait for committed changes to become actually visible, so the next transaction in this session will see the changes + /// TODO it's optional, add a setting for this + auto current_latest_snapshot = latest_snapshot.load(); + while (current_latest_snapshot < new_csn && !stop_flag) + { + latest_snapshot.wait(current_latest_snapshot); + current_latest_snapshot = latest_snapshot.load(); + } + } + + /// Write allocated CSN, so we will be able to cleanup log in ZK. This method is noexcept. + txn->afterCommit(new_csn); + + { + /// Finally we can remove transaction from the list and release the snapshot + std::lock_guard lock{running_list_mutex}; + bool removed = running_list.erase(txn->tid.getHash()); + if (!removed) + throw Exception(ErrorCodes::LOGICAL_ERROR, "I's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid); + snapshots_in_use.erase(txn->snapshot_in_use_it); + } + + return new_csn; +} + +void TransactionLog::rollbackTransaction(const MergeTreeTransactionPtr & txn) noexcept +{ + LOG_TRACE(log, "Rolling back transaction {}{}", txn->tid, + std::uncaught_exceptions() ? fmt::format(" due to uncaught exception (code: {})", getCurrentExceptionCode()) : ""); + + if (!txn->rollback()) + { + /// Transaction was cancelled concurrently, it's already rolled back. + assert(txn->csn == Tx::RolledBackCSN); + return; + } + + { + std::lock_guard lock{running_list_mutex}; + bool removed = running_list.erase(txn->tid.getHash()); + if (!removed) + abort(); + snapshots_in_use.erase(txn->snapshot_in_use_it); + } + + tryWriteEventToSystemLog(log, global_context, TransactionsInfoLogElement::ROLLBACK, txn->tid); +} + +MergeTreeTransactionPtr TransactionLog::tryGetRunningTransaction(const TIDHash & tid) +{ + std::lock_guard lock{running_list_mutex}; + auto it = running_list.find(tid); + if (it == running_list.end()) + return NO_TRANSACTION_PTR; + return it->second; +} + +CSN TransactionLog::getCSN(const TransactionID & tid) +{ + /// Avoid creation of the instance if transactions are not actually involved + if (tid == Tx::PrehistoricTID) + return Tx::PrehistoricCSN; + return instance().getCSNImpl(tid.getHash()); +} + +CSN TransactionLog::getCSN(const TIDHash & tid) +{ + /// Avoid creation of the instance if transactions are not actually involved + if (tid == Tx::PrehistoricTID.getHash()) + return Tx::PrehistoricCSN; + return instance().getCSNImpl(tid); +} + +CSN TransactionLog::getCSNImpl(const TIDHash & tid_hash) const +{ + assert(tid_hash); + assert(tid_hash != Tx::EmptyTID.getHash()); + + std::lock_guard lock{mutex}; + auto it = tid_to_csn.find(tid_hash); + if (it != tid_to_csn.end()) + return it->second.csn; + + return Tx::UnknownCSN; +} + +void TransactionLog::assertTIDIsNotOutdated(const TransactionID & tid) +{ + if (tid == Tx::PrehistoricTID) + return; + + /// Ensure that we are not trying to get CSN for TID that was already removed from the log + CSN tail = instance().tail_ptr.load(); + if (tail <= tid.start_csn) + return; + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get CSN for too old TID {}, current tail_ptr is {}, probably it's a bug", tid, tail); +} + +CSN TransactionLog::getOldestSnapshot() const +{ + std::lock_guard lock{running_list_mutex}; + if (snapshots_in_use.empty()) + return getLatestSnapshot(); + return snapshots_in_use.front(); +} + +TransactionLog::TransactionsList TransactionLog::getTransactionsList() const +{ + std::lock_guard lock{running_list_mutex}; + return running_list; +} + +} diff --git a/src/Interpreters/TransactionLog.h b/src/Interpreters/TransactionLog.h new file mode 100644 index 00000000000..86584a74c68 --- /dev/null +++ b/src/Interpreters/TransactionLog.h @@ -0,0 +1,192 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +/// We want to create a TransactionLog object lazily and avoid creation if it's not needed. +/// But we also want to call shutdown() in a specific place to avoid race conditions. +/// We cannot simply use return-static-variable pattern, +/// because a call to shutdown() may construct unnecessary object in this case. +template +class SingletonHelper : private boost::noncopyable +{ +public: + static Derived & instance() + { + Derived * ptr = instance_raw_ptr.load(); + if (likely(ptr)) + return *ptr; + + return createInstanceOrThrow(); + } + + static void shutdownIfAny() + { + std::lock_guard lock{instance_mutex}; + if (instance_holder) + instance_holder->shutdown(); + } + +private: + static Derived & createInstanceOrThrow(); + + static inline std::atomic instance_raw_ptr; + /// It was supposed to be std::optional, but gcc fails to compile it for some reason + static inline std::shared_ptr instance_holder; + static inline std::mutex instance_mutex; +}; + +class TransactionsInfoLog; +using TransactionsInfoLogPtr = std::shared_ptr; +using ZooKeeperPtr = std::shared_ptr; + +/// This class maintains transaction log in ZooKeeper and a list of currently running transactions in memory. +/// +/// Each transaction has unique ID (TID, see details below). +/// TransactionID is allocated when transaction begins. +/// +/// We use TransactionID to associate changes (created/removed data parts) with transaction that has made/is going to make these changes. +/// To commit a transaction we create sequential node "/path_to_log/log/csn-" in ZK and write TID into this node. +/// Allocated sequential number is a commit timestamp or Commit Sequence Number (CSN). It indicates a (logical) point in time +/// when transaction is committed and all its changes became visible. So we have total order of all changes. +/// +/// Also CSNs are used as snapshots: all changes that were made by a transaction that was committed with a CSN less or equal than some_csn +/// are visible in some_csn snapshot. +/// +/// TransactionID consists of three parts: (start_csn, local_tid, host_id) +/// - start_csn is the newest CSN that existed when the transaction was started and also it's snapshot that is visible for this transaction +/// - local_tid is local sequential number of the transaction, each server allocates local_tids independently without requests to ZK +/// - host_id is persistent UUID of host that has started the transaction, it's kind of tie-breaker that makes ID unique across all servers +/// +/// To check if some transaction is committed or not we fetch "csn-xxxxxx" nodes from ZK and construct TID -> CSN mapping, +/// so for committed transactions we know commit timestamps. +/// However, if we did not find a mapping for some TID, it means one of the following cases: +/// 1. Transaction is not committed (yet) +/// 2. Transaction is rolled back (quite similar to the first case, but it will never be committed) +/// 3. Transactions was committed a long time ago and we removed its entry from the log +/// To distinguish the third case we store a "tail pointer" in "/path_to_log/tail_ptr". It's a CSN such that it's safe to remove from log +/// entries with tid.start_csn < tail_ptr, because CSNs for those TIDs are already written into data parts +/// and we will not do a CSN lookup for those TIDs anymore. +/// +/// (however, transactions involving multiple hosts and/or ReplicatedMergeTree tables are currently not supported) +class TransactionLog final : public SingletonHelper +{ +public: + + TransactionLog(); + + ~TransactionLog(); + + void shutdown(); + + /// Returns the newest snapshot available for reading + CSN getLatestSnapshot() const; + /// Returns the oldest snapshot that is visible for some running transaction + CSN getOldestSnapshot() const; + + /// Allocates TID, returns new transaction object + MergeTreeTransactionPtr beginTransaction(); + + /// Tries to commit transaction. Returns Commit Sequence Number. + /// Throw if transaction was concurrently killed or if some precommit check failed. + /// May throw if ZK connection is lost. Transaction status is unknown in this case. + CSN commitTransaction(const MergeTreeTransactionPtr & txn); + + /// Releases locks that that were acquired by transaction, releases snapshot, removes transaction from the list of active transactions. + /// Normally it should not throw, but if it does for some reason (global memory limit exceeded, disk failure, etc) + /// then we should terminate server and reinitialize it to avoid corruption of data structures. That's why it's noexcept. + void rollbackTransaction(const MergeTreeTransactionPtr & txn) noexcept; + + /// Returns CSN if transaction with specified ID was committed and UnknownCSN if it was not. + /// Returns PrehistoricCSN for PrehistoricTID without creating a TransactionLog instance as a special case. + static CSN getCSN(const TransactionID & tid); + static CSN getCSN(const TIDHash & tid); + + /// Ensures that getCSN returned UnknownCSN because transaction is not committed and not because entry was removed from the log. + static void assertTIDIsNotOutdated(const TransactionID & tid); + + /// Returns a pointer to transaction object if it's running or nullptr. + MergeTreeTransactionPtr tryGetRunningTransaction(const TIDHash & tid); + + using TransactionsList = std::unordered_map; + /// Returns copy of list of running transactions. + TransactionsList getTransactionsList() const; + +private: + void loadLogFromZooKeeper(); + void runUpdatingThread(); + + void loadEntries(Strings::const_iterator beg, Strings::const_iterator end); + void loadNewEntries(); + void removeOldEntries(); + + static UInt64 deserializeCSN(const String & csn_node_name); + static String serializeCSN(CSN csn); + static TransactionID deserializeTID(const String & csn_node_content); + static String serializeTID(const TransactionID & tid); + + ZooKeeperPtr getZooKeeper() const; + + CSN getCSNImpl(const TIDHash & tid_hash) const; + + ContextPtr global_context; + Poco::Logger * log; + + /// The newest snapshot available for reading + std::atomic latest_snapshot; + + /// Local part of TransactionID number. We reset this counter for each new snapshot. + std::atomic local_tid_counter; + + mutable std::mutex mutex; + /// Mapping from TransactionID to CSN for recently committed transactions. + /// Allows to check if some transactions is committed. + struct CSNEntry + { + CSN csn; + TransactionID tid; + }; + using TIDMap = std::unordered_map; + TIDMap tid_to_csn; + + mutable std::mutex running_list_mutex; + /// Transactions that are currently processed + TransactionsList running_list; + /// Ordered list of snapshots that are currently used by some transactions. Needed for background cleanup. + std::list snapshots_in_use; + + ZooKeeperPtr zookeeper; + String zookeeper_path; + + String zookeeper_path_log; + /// Name of the newest entry that was loaded from log in ZK + String last_loaded_entry; + /// The oldest CSN such that we store in log entries with TransactionIDs containing this CSN. + std::atomic tail_ptr = Tx::UnknownCSN; + + zkutil::EventPtr log_updated_event = std::make_shared(); + + std::atomic_bool stop_flag = false; + ThreadFromGlobalPool updating_thread; +}; + +template +Derived & SingletonHelper::createInstanceOrThrow() +{ + std::lock_guard lock{instance_mutex}; + if (!instance_holder) + { + instance_holder = std::make_shared(); + instance_raw_ptr = instance_holder.get(); + } + return *instance_holder; +} + +} diff --git a/src/Interpreters/TransactionVersionMetadata.cpp b/src/Interpreters/TransactionVersionMetadata.cpp new file mode 100644 index 00000000000..ac02f29661d --- /dev/null +++ b/src/Interpreters/TransactionVersionMetadata.cpp @@ -0,0 +1,431 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SERIALIZATION_ERROR; + extern const int LOGICAL_ERROR; + extern const int CANNOT_PARSE_TEXT; +} + +inline static CSN getCSNAndAssert(TIDHash tid_hash, std::atomic & csn, const TransactionID * tid = nullptr) +{ + CSN maybe_csn = TransactionLog::getCSN(tid_hash); + if (maybe_csn) + return maybe_csn; + + /// Either transaction is not committed (yet) or it was committed and then the CSN entry was cleaned up from the log. + /// We should load CSN again to distinguish the second case. + /// If entry was cleaned up, then CSN is already stored in VersionMetadata and we will get it. + /// And for the first case we will get UnknownCSN again. + maybe_csn = csn.load(); + if (maybe_csn) + return maybe_csn; + + if (tid) + TransactionLog::assertTIDIsNotOutdated(*tid); + + return Tx::UnknownCSN; +} + +VersionMetadata::VersionMetadata() +{ + /// It would be better to make it static, but static loggers do not work for some reason (initialization order?) + log = &Poco::Logger::get("VersionMetadata"); +} + +/// It can be used for introspection purposes only +TransactionID VersionMetadata::getRemovalTID() const +{ + TIDHash removal_lock = removal_tid_lock.load(); + if (removal_lock) + { + if (removal_lock == Tx::PrehistoricTID.getHash()) + return Tx::PrehistoricTID; + if (auto txn = TransactionLog::instance().tryGetRunningTransaction(removal_lock)) + return txn->tid; + } + + if (removal_csn.load(std::memory_order_relaxed)) + { + /// removal_tid cannot be changed since we have removal_csn, so it's readonly + return removal_tid; + } + + return Tx::EmptyTID; +} + +void VersionMetadata::lockRemovalTID(const TransactionID & tid, const TransactionInfoContext & context) +{ + LOG_TEST(log, "Trying to lock removal_tid by {}, table: {}, part: {}", tid, context.table.getNameForLogs(), context.part_name); + TIDHash locked_by = 0; + if (tryLockRemovalTID(tid, context, &locked_by)) + return; + + String part_desc; + if (context.covering_part.empty()) + part_desc = context.part_name; + else + part_desc = fmt::format("{} (covered by {})", context.part_name, context.covering_part); + throw Exception(ErrorCodes::SERIALIZATION_ERROR, + "Serialization error: " + "Transaction {} tried to remove data part {} from {}, " + "but it's locked by another transaction (TID: {}, TIDH: {}) which is currently removing this part.", + tid, part_desc, context.table.getNameForLogs(), getRemovalTID(), locked_by); +} + +bool VersionMetadata::tryLockRemovalTID(const TransactionID & tid, const TransactionInfoContext & context, TIDHash * locked_by_id) +{ + assert(!tid.isEmpty()); + assert(!creation_tid.isEmpty()); + TIDHash removal_lock_value = tid.getHash(); + TIDHash expected_removal_lock_value = 0; + bool locked = removal_tid_lock.compare_exchange_strong(expected_removal_lock_value, removal_lock_value); + if (!locked) + { + if (tid == Tx::PrehistoricTID && expected_removal_lock_value == Tx::PrehistoricTID.getHash()) + { + /// Don't need to lock part for queries without transaction + LOG_TEST(log, "Assuming removal_tid is locked by {}, table: {}, part: {}", tid, context.table.getNameForLogs(), context.part_name); + return true; + } + + if (locked_by_id) + *locked_by_id = expected_removal_lock_value; + return false; + } + + removal_tid = tid; + tryWriteEventToSystemLog(log, TransactionsInfoLogElement::LOCK_PART, tid, context); + return true; +} + +void VersionMetadata::unlockRemovalTID(const TransactionID & tid, const TransactionInfoContext & context) +{ + LOG_TEST(log, "Unlocking removal_tid by {}, table: {}, part: {}", tid, context.table.getNameForLogs(), context.part_name); + assert(!tid.isEmpty()); + TIDHash removal_lock_value = tid.getHash(); + TIDHash locked_by = removal_tid_lock.load(); + + auto throw_cannot_unlock = [&]() + { + auto locked_by_txn = TransactionLog::instance().tryGetRunningTransaction(locked_by); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unlock removal_tid, it's a bug. Current: {} {}, actual: {} {}", + removal_lock_value, tid, locked_by, locked_by_txn ? locked_by_txn->tid : Tx::EmptyTID); + }; + + if (locked_by != removal_lock_value) + throw_cannot_unlock(); + + removal_tid = Tx::EmptyTID; + bool unlocked = removal_tid_lock.compare_exchange_strong(locked_by, 0); + if (!unlocked) + throw_cannot_unlock(); + + tryWriteEventToSystemLog(log, TransactionsInfoLogElement::UNLOCK_PART, tid, context); +} + +bool VersionMetadata::isRemovalTIDLocked() const +{ + return removal_tid_lock.load() != 0; +} + +void VersionMetadata::setCreationTID(const TransactionID & tid, TransactionInfoContext * context) +{ + /// NOTE ReplicatedMergeTreeBlockOutputStream may add one part multiple times + assert(creation_tid.isEmpty() || creation_tid == tid); + creation_tid = tid; + if (context) + tryWriteEventToSystemLog(log, TransactionsInfoLogElement::ADD_PART, tid, *context); +} + +bool VersionMetadata::isVisible(const MergeTreeTransaction & txn) +{ + return isVisible(txn.getSnapshot(), txn.tid); +} + +bool VersionMetadata::isVisible(CSN snapshot_version, TransactionID current_tid) +{ + assert(!creation_tid.isEmpty()); + CSN creation = creation_csn.load(std::memory_order_relaxed); + TIDHash removal_lock = removal_tid_lock.load(std::memory_order_relaxed); + CSN removal = removal_csn.load(std::memory_order_relaxed); + + [[maybe_unused]] bool had_creation_csn = creation; + [[maybe_unused]] bool had_removal_tid = removal_lock; + [[maybe_unused]] bool had_removal_csn = removal; + assert(!had_removal_csn || had_removal_tid); + assert(!had_removal_csn || had_creation_csn); + assert(creation == Tx::UnknownCSN || creation == Tx::PrehistoricCSN || Tx::MaxReservedCSN < creation); + assert(removal == Tx::UnknownCSN || removal == Tx::PrehistoricCSN || Tx::MaxReservedCSN < removal); + + /// Special snapshot for introspection purposes + if (unlikely(snapshot_version == Tx::EverythingVisibleCSN)) + return true; + + /// Fast path: + + /// Part is definitely not visible if: + /// - creation was committed after we took the snapshot + /// - removal was committed before we took the snapshot + /// - current transaction is removing it + if (creation && snapshot_version < creation) + return false; + if (removal && removal <= snapshot_version) + return false; + if (!current_tid.isEmpty() && removal_lock && removal_lock == current_tid.getHash()) + return false; + + /// Otherwise, part is definitely visible if: + /// - creation was committed before we took the snapshot and nobody tried to remove the part + /// - creation was committed before and removal was committed after + /// - current transaction is creating it + if (creation && creation <= snapshot_version && !removal_lock) + return true; + if (creation && creation <= snapshot_version && removal && snapshot_version < removal) + return true; + if (!current_tid.isEmpty() && creation_tid == current_tid) + return true; + + /// End of fast path. + + /// Data part has creation_tid/removal_tid, but does not have creation_csn/removal_csn. + /// It means that some transaction is creating/removing the part right now or has done it recently + /// and we don't know if it was already committed or not. + assert(!had_creation_csn || (had_removal_tid && !had_removal_csn)); + assert(current_tid.isEmpty() || (creation_tid != current_tid && removal_lock != current_tid.getHash())); + + /// Before doing CSN lookup, let's check some extra conditions. + /// If snapshot_version <= some_tid.start_csn, then changes of the transaction with some_tid + /// are definitely not visible for us (because the transaction can be committed with greater CSN only), + /// so we don't need to check if it was committed. + if (snapshot_version <= creation_tid.start_csn) + return false; + + /// Check if creation_tid/removal_tid transactions are committed and write CSNs + /// TODO Transactions: we probably need more optimizations here + /// to avoid some CSN lookups or make the lookups cheaper. + /// NOTE: Old enough committed parts always have written CSNs, + /// so we can determine their visibility through fast path. + /// But for long-running writing transactions we will always do + /// CNS lookup and get 0 (UnknownCSN) until the transaction is committed/rolled back. + creation = getCSNAndAssert(creation_tid.getHash(), creation_csn, &creation_tid); + if (!creation) + { + return false; /// Part creation is not committed yet + } + + /// We don't need to check if CSNs are already written or not, + /// because once written CSN cannot be changed, so it's safe to overwrite it (with the same value). + creation_csn.store(creation, std::memory_order_relaxed); + + if (removal_lock) + { + removal = getCSNAndAssert(removal_lock, removal_csn); + if (removal) + removal_csn.store(removal, std::memory_order_relaxed); + } + + return creation <= snapshot_version && (!removal || snapshot_version < removal); +} + +bool VersionMetadata::canBeRemoved() +{ + if (creation_tid == Tx::PrehistoricTID) + { + /// Avoid access to Transaction log if transactions are not involved + + TIDHash removal_lock = removal_tid_lock.load(std::memory_order_relaxed); + if (!removal_lock) + return false; + + if (removal_lock == Tx::PrehistoricTID.getHash()) + return true; + } + + return canBeRemovedImpl(TransactionLog::instance().getOldestSnapshot()); +} + +bool VersionMetadata::canBeRemovedImpl(CSN oldest_snapshot_version) +{ + CSN creation = creation_csn.load(std::memory_order_relaxed); + /// We can safely remove part if its creation was rolled back + if (creation == Tx::RolledBackCSN) + return true; + + if (!creation) + { + /// Cannot remove part if its creation not committed yet + creation = getCSNAndAssert(creation_tid.getHash(), creation_csn, &creation_tid); + if (creation) + creation_csn.store(creation, std::memory_order_relaxed); + else + return false; + } + + /// Part is probably visible for some transactions (part is too new or the oldest snapshot is too old) + if (oldest_snapshot_version < creation) + return false; + + TIDHash removal_lock = removal_tid_lock.load(std::memory_order_relaxed); + /// Part is active + if (!removal_lock) + return false; + + CSN removal = removal_csn.load(std::memory_order_relaxed); + if (!removal) + { + /// Part removal is not committed yet + removal = getCSNAndAssert(removal_lock, removal_csn); + if (removal) + removal_csn.store(removal, std::memory_order_relaxed); + else + return false; + } + + /// We can safely remove part if all running transactions were started after part removal was committed + return removal <= oldest_snapshot_version; +} + +#define CREATION_TID_STR "creation_tid: " +#define CREATION_CSN_STR "creation_csn: " +#define REMOVAL_TID_STR "removal_tid: " +#define REMOVAL_CSN_STR "removal_csn: " + + +void VersionMetadata::writeCSN(WriteBuffer & buf, WhichCSN which_csn, bool internal /* = false*/) const +{ + if (which_csn == CREATION) + { + if (CSN creation = creation_csn.load()) + { + writeCString("\n" CREATION_CSN_STR, buf); + writeText(creation, buf); + } + else if (!internal) + throw Exception(ErrorCodes::LOGICAL_ERROR, "writeCSN called for creation_csn = 0, it's a bug"); + } + else /// if (which_csn == REMOVAL) + { + if (CSN removal = removal_csn.load()) + { + writeCString("\n" REMOVAL_CSN_STR, buf); + writeText(removal, buf); + } + else if (!internal) + throw Exception(ErrorCodes::LOGICAL_ERROR, "writeCSN called for removal_csn = 0, it's a bug"); + } +} + +void VersionMetadata::writeRemovalTID(WriteBuffer & buf, bool clear) const +{ + writeCString("\n" REMOVAL_TID_STR, buf); + if (clear) + TransactionID::write(Tx::EmptyTID, buf); + else + TransactionID::write(removal_tid, buf); +} + +void VersionMetadata::write(WriteBuffer & buf) const +{ + writeCString("version: 1", buf); + writeCString("\n" CREATION_TID_STR, buf); + TransactionID::write(creation_tid, buf); + writeCSN(buf, CREATION, /* internal */ true); + + if (removal_tid_lock) + { + assert(!removal_tid.isEmpty()); + assert(removal_tid.getHash() == removal_tid_lock); + writeRemovalTID(buf); + writeCSN(buf, REMOVAL, /* internal */ true); + } +} + +void VersionMetadata::read(ReadBuffer & buf) +{ + constexpr size_t size = sizeof(CREATION_TID_STR) - 1; + static_assert(sizeof(CREATION_CSN_STR) - 1 == size); + static_assert(sizeof(REMOVAL_TID_STR) - 1 == size); + static_assert(sizeof(REMOVAL_CSN_STR) - 1 == size); + + assertString("version: 1", buf); + assertString("\n" CREATION_TID_STR, buf); + creation_tid = TransactionID::read(buf); + if (buf.eof()) + return; + + String name; + name.resize(size); + + auto read_csn = [&]() + { + UInt64 val; + readText(val, buf); + return val; + }; + + while (!buf.eof()) + { + assertChar('\n', buf); + buf.readStrict(name.data(), size); + + if (name == CREATION_CSN_STR) + { + assert(!creation_csn); + creation_csn = read_csn(); + } + else if (name == REMOVAL_TID_STR) + { + /// NOTE Metadata file may actually contain multiple creation TIDs, we need the last one. + removal_tid = TransactionID::read(buf); + if (!removal_tid.isEmpty()) + removal_tid_lock = removal_tid.getHash(); + } + else if (name == REMOVAL_CSN_STR) + { + if (removal_tid.isEmpty()) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Found removal_csn in metadata file, but removal_tid is {}", removal_tid); + assert(!removal_csn); + removal_csn = read_csn(); + } + else + { + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Got unexpected content: {}", name); + } + } +} + +String VersionMetadata::toString(bool one_line) const +{ + WriteBufferFromOwnString buf; + write(buf); + String res = buf.str(); + if (one_line) + std::replace(res.begin(), res.end(), '\n', ' '); + return res; +} + + +DataTypePtr getTransactionIDDataType() +{ + DataTypes types; + types.push_back(std::make_shared()); + types.push_back(std::make_shared()); + types.push_back(std::make_shared()); + return std::make_shared(std::move(types)); +} + +} diff --git a/src/Interpreters/TransactionVersionMetadata.h b/src/Interpreters/TransactionVersionMetadata.h new file mode 100644 index 00000000000..18ac445cc29 --- /dev/null +++ b/src/Interpreters/TransactionVersionMetadata.h @@ -0,0 +1,81 @@ +#pragma once +#include +#include + +namespace Poco +{ +class Logger; +} + +namespace DB +{ + +/// This structure allows to pass more information about a part that transaction is trying to create/remove. +/// It's useful for logging and for exception messages. +struct TransactionInfoContext +{ + /// To which table a part belongs + StorageID table = StorageID::createEmpty(); + /// Name of a part that transaction is trying to create/remove + String part_name; + /// Optional: name of part that covers `part_name` if transaction is trying to remove `part_name` + String covering_part; + + TransactionInfoContext(StorageID id, String part) : table(std::move(id)), part_name(std::move(part)) {} +}; + +/// This structure contains metadata of an object (currently it's used for data parts in MergeTree only) +/// that allows to determine when and by which transaction it has been created/removed +struct VersionMetadata +{ + /// ID of transaction that has created/is trying to create this object + TransactionID creation_tid = Tx::EmptyTID; + /// ID of transaction that has removed/is trying to remove this object + TransactionID removal_tid = Tx::EmptyTID; + + /// Hash of removal_tid, used to lock an object for removal + std::atomic removal_tid_lock = 0; + + /// CSN of transaction that has created this object + std::atomic creation_csn = Tx::UnknownCSN; + /// CSN of transaction that has removed this object + std::atomic removal_csn = Tx::UnknownCSN; + + /// Checks if an object is visible for transaction or not. + bool isVisible(const MergeTreeTransaction & txn); + bool isVisible(CSN snapshot_version, TransactionID current_tid = Tx::EmptyTID); + + TransactionID getCreationTID() const { return creation_tid; } + TransactionID getRemovalTID() const; + + /// Looks an object for removal, throws if it's already locked by concurrent transaction + bool tryLockRemovalTID(const TransactionID & tid, const TransactionInfoContext & context, TIDHash * locked_by_id = nullptr); + void lockRemovalTID(const TransactionID & tid, const TransactionInfoContext & context); + /// Unlocks an object for removal (when transaction is rolling back) + void unlockRemovalTID(const TransactionID & tid, const TransactionInfoContext & context); + + bool isRemovalTIDLocked() const; + + /// It can be called only from MergeTreeTransaction or on server startup + void setCreationTID(const TransactionID & tid, TransactionInfoContext * context); + + /// Checks if it's safe to remove outdated version of an object + bool canBeRemoved(); + bool canBeRemovedImpl(CSN oldest_snapshot_version); + + void write(WriteBuffer & buf) const; + void read(ReadBuffer & buf); + + enum WhichCSN { CREATION, REMOVAL }; + void writeCSN(WriteBuffer & buf, WhichCSN which_csn, bool internal = false) const; + void writeRemovalTID(WriteBuffer & buf, bool clear = false) const; + + String toString(bool one_line = true) const; + + Poco::Logger * log; + VersionMetadata(); +}; + +DataTypePtr getTransactionIDDataType(); + +} diff --git a/src/Interpreters/TransactionsInfoLog.cpp b/src/Interpreters/TransactionsInfoLog.cpp new file mode 100644 index 00000000000..0498ee00e9e --- /dev/null +++ b/src/Interpreters/TransactionsInfoLog.cpp @@ -0,0 +1,111 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +NamesAndTypesList TransactionsInfoLogElement::getNamesAndTypes() +{ + auto type_enum = std::make_shared( + DataTypeEnum8::Values + { + {"Begin", static_cast(BEGIN)}, + {"Commit", static_cast(COMMIT)}, + {"Rollback", static_cast(ROLLBACK)}, + + {"AddPart", static_cast(ADD_PART)}, + {"LockPart", static_cast(LOCK_PART)}, + {"UnlockPart", static_cast(UNLOCK_PART)}, + }); + + return + { + {"type", std::move(type_enum)}, + {"event_date", std::make_shared()}, + {"event_time", std::make_shared(6)}, + {"thread_id", std::make_shared()}, + + {"query_id", std::make_shared()}, + {"tid", getTransactionIDDataType()}, + {"tid_hash", std::make_shared()}, + + {"csn", std::make_shared()}, + + {"database", std::make_shared()}, + {"table", std::make_shared()}, + {"uuid", std::make_shared()}, + {"part", std::make_shared()}, + }; +} + +void TransactionsInfoLogElement::fillCommonFields(const TransactionInfoContext * context) +{ + event_time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + thread_id = getThreadId(); + + query_id = CurrentThread::getQueryId().toString(); + + if (!context) + return; + + table = context->table; + part_name = context->part_name; +} + +void TransactionsInfoLogElement::appendToBlock(MutableColumns & columns) const +{ + assert(type != UNKNOWN); + size_t i = 0; + + columns[i++]->insert(type); + auto event_time_seconds = event_time / 1000000; + columns[i++]->insert(DateLUT::instance().toDayNum(event_time_seconds).toUnderType()); + columns[i++]->insert(event_time); + columns[i++]->insert(thread_id); + + columns[i++]->insert(query_id); + columns[i++]->insert(Tuple{tid.start_csn, tid.local_tid, tid.host_id}); + columns[i++]->insert(tid.getHash()); + + columns[i++]->insert(csn); + + columns[i++]->insert(table.database_name); + columns[i++]->insert(table.table_name); + columns[i++]->insert(table.uuid); + columns[i++]->insert(part_name); +} + + +void tryWriteEventToSystemLog(Poco::Logger * log, + TransactionsInfoLogElement::Type type, const TransactionID & tid, + const TransactionInfoContext & context) +try +{ + auto system_log = Context::getGlobalContextInstance()->getTransactionsInfoLog(); + if (!system_log) + return; + + TransactionsInfoLogElement elem; + elem.type = type; + elem.tid = tid; + elem.fillCommonFields(&context); + system_log->add(elem); +} +catch (...) +{ + tryLogCurrentException(log); +} + +} diff --git a/src/Interpreters/TransactionsInfoLog.h b/src/Interpreters/TransactionsInfoLog.h new file mode 100644 index 00000000000..f595413a729 --- /dev/null +++ b/src/Interpreters/TransactionsInfoLog.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +struct TransactionInfoContext; + +struct TransactionsInfoLogElement +{ + enum Type + { + UNKNOWN = 0, + + BEGIN = 1, + COMMIT = 2, + ROLLBACK = 3, + + ADD_PART = 10, + LOCK_PART = 11, + UNLOCK_PART = 12, + }; + + Type type = UNKNOWN; + Decimal64 event_time = 0; + UInt64 thread_id; + + String query_id; + TransactionID tid = Tx::EmptyTID; + + /// For COMMIT events + CSN csn = Tx::UnknownCSN; + + /// For *_PART events + StorageID table = StorageID::createEmpty(); + String part_name; + + static std::string name() { return "TransactionsInfoLog"; } + static NamesAndTypesList getNamesAndTypes(); + static NamesAndAliases getNamesAndAliases() { return {}; } + void appendToBlock(MutableColumns & columns) const; + + void fillCommonFields(const TransactionInfoContext * context = nullptr); +}; + +class TransactionsInfoLog : public SystemLog +{ + using SystemLog::SystemLog; +}; + + +void tryWriteEventToSystemLog(Poco::Logger * log, TransactionsInfoLogElement::Type type, + const TransactionID & tid, const TransactionInfoContext & context); + +} diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index f0279bafca2..28bcbcf06dc 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -63,6 +63,9 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, ContextPtr context, c BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, AccessRightsElements && query_requires_access) { + if (context->getCurrentTransaction() && context->getSettingsRef().throw_on_unsupported_query_inside_transaction) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ON CLUSTER queries inside transactions are not supported"); + /// Remove FORMAT and INTO OUTFILE if exists ASTPtr query_ptr = query_ptr_->clone(); ASTQueryWithOutput::resetOutputASTIfExist(*query_ptr); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index a3349f12f8f..a3232b798e5 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include #include @@ -49,6 +51,7 @@ #include #include #include +#include #include #include @@ -85,6 +88,7 @@ namespace ErrorCodes { extern const int INTO_OUTFILE_NOT_ALLOWED; extern const int QUERY_WAS_CANCELLED; + extern const int INVALID_TRANSACTION; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; } @@ -176,10 +180,15 @@ static void logQuery(const String & query, ContextPtr context, bool internal) if (!comment.empty()) comment = fmt::format(" (comment: {})", comment); - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(from {}{}{}){} {}", + String transaction_info; + if (auto txn = context->getCurrentTransaction()) + transaction_info = fmt::format(" (TID: {}, TIDH: {})", txn->tid, txn->tid.getHash()); + + LOG_DEBUG(&Poco::Logger::get("executeQuery"), "(from {}{}{}){}{} {}", client_info.current_address.toString(), (current_user != "default" ? ", user: " + current_user : ""), (!initial_query_id.empty() && current_query_id != initial_query_id ? ", initial_query_id: " + initial_query_id : std::string()), + transaction_info, comment, joinLines(query)); @@ -294,6 +303,9 @@ static void onExceptionBeforeStart(const String & query_for_logging, ContextPtr if (elem.log_comment.size() > settings.max_query_size) elem.log_comment.resize(settings.max_query_size); + if (auto txn = context->getCurrentTransaction()) + elem.tid = txn->tid; + if (settings.calculate_text_stack_trace) setExceptionStackTrace(elem); logException(context, elem); @@ -423,11 +435,18 @@ static std::tuple executeQueryImpl( String query_table; try { - ParserQuery parser(end); + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + if (auto txn = context->getCurrentTransaction()) + { + assert(txn->getState() != MergeTreeTransaction::COMMITTED); + if (txn->getState() == MergeTreeTransaction::ROLLED_BACK && !ast->as() && !ast->as()) + throw Exception(ErrorCodes::INVALID_TRANSACTION, "Cannot execute query: transaction is rolled back"); + } + /// Interpret SETTINGS clauses as early as possible (before invoking the corresponding interpreter), /// to allow settings to take effect. if (const auto * select_query = ast->as()) @@ -629,11 +648,18 @@ static std::tuple executeQueryImpl( const auto & table_id = insert_query->table_id; if (!table_id.empty()) context->setInsertionTable(table_id); + + if (context->getCurrentTransaction() && context->getSettingsRef().throw_on_unsupported_query_inside_transaction) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Async inserts inside transactions are not supported"); } else { interpreter = InterpreterFactory::get(ast, context, SelectQueryOptions(stage).setInternal(internal)); + if (context->getCurrentTransaction() && !interpreter->supportsTransactions() && + context->getSettingsRef().throw_on_unsupported_query_inside_transaction) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Transactions are not supported for this type of query ({})", ast->getID()); + if (!interpreter->ignoreQuota()) { quota = context->getQuota(); @@ -724,6 +750,9 @@ static std::tuple executeQueryImpl( elem.client_info = client_info; + if (auto txn = context->getCurrentTransaction()) + elem.tid = txn->tid; + bool log_queries = settings.log_queries && !internal; /// Log into system table start of query execution, if need. @@ -945,6 +974,9 @@ static std::tuple executeQueryImpl( log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(), quota(quota), status_info_to_query_log] () mutable { + if (auto txn = context->getCurrentTransaction()) + txn->onException(); + if (quota) quota->used(QuotaType::ERRORS, 1, /* check_exceeded = */ false); @@ -1001,6 +1033,9 @@ static std::tuple executeQueryImpl( } catch (...) { + if (auto txn = context->getCurrentTransaction()) + txn->onException(); + if (!internal) { if (query_for_logging.empty()) diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h index abed9803a7b..3f169a93bad 100644 --- a/src/Parsers/ASTExplainQuery.h +++ b/src/Parsers/ASTExplainQuery.h @@ -19,6 +19,7 @@ public: QueryPipeline, /// 'EXPLAIN PIPELINE ...' QueryEstimates, /// 'EXPLAIN ESTIMATE ...' TableOverride, /// 'EXPLAIN TABLE OVERRIDE ...' + CurrentTransaction, /// 'EXPLAIN CURRENT TRANSACTION' }; explicit ASTExplainQuery(ExplainKind kind_) : kind(kind_) {} @@ -111,6 +112,7 @@ private: case QueryPipeline: return "EXPLAIN PIPELINE"; case QueryEstimates: return "EXPLAIN ESTIMATE"; case TableOverride: return "EXPLAIN TABLE OVERRIDE"; + case CurrentTransaction: return "EXPLAIN CURRENT TRANSACTION"; } __builtin_unreachable(); diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp index 40e14c918ff..3fa6a6ed1a9 100644 --- a/src/Parsers/ASTInsertQuery.cpp +++ b/src/Parsers/ASTInsertQuery.cpp @@ -94,33 +94,48 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s << quoteString(compression->as().value.safeGet()); } - if (select) - { - settings.ostr << " "; - select->formatImpl(settings, state, frame); - } - else if (watch) - { - settings.ostr << " "; - watch->formatImpl(settings, state, frame); - } - else - { - if (!format.empty()) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FORMAT " << (settings.hilite ? hilite_none : "") << format; - } - else if (!infile) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " VALUES" << (settings.hilite ? hilite_none : ""); - } - } - if (settings_ast) { settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << "SETTINGS " << (settings.hilite ? hilite_none : ""); settings_ast->formatImpl(settings, state, frame); } + + /// Compatibility for INSERT w/o SETTINGS to format in oneline, i.e.: + /// + /// INSERT INTO foo VALUES + /// + /// But + /// + /// INSERT INTO foo + /// SETTINGS max_threads=1 + /// VALUES + /// + char delim = settings_ast ? settings.nl_or_ws : ' '; + + if (select) + { + settings.ostr << delim; + select->formatImpl(settings, state, frame); + } + else if (watch) + { + settings.ostr << delim; + watch->formatImpl(settings, state, frame); + } + + if (!select && !watch) + { + if (!format.empty()) + { + settings.ostr << delim + << (settings.hilite ? hilite_keyword : "") << "FORMAT " << (settings.hilite ? hilite_none : "") << format; + } + else if (!infile) + { + settings.ostr << delim + << (settings.hilite ? hilite_keyword : "") << "VALUES" << (settings.hilite ? hilite_none : ""); + } + } } void ASTInsertQuery::updateTreeHashImpl(SipHash & hash_state) const diff --git a/src/Parsers/ASTKillQueryQuery.cpp b/src/Parsers/ASTKillQueryQuery.cpp index 71c3011dd2c..8bf99312544 100644 --- a/src/Parsers/ASTKillQueryQuery.cpp +++ b/src/Parsers/ASTKillQueryQuery.cpp @@ -24,6 +24,9 @@ void ASTKillQueryQuery::formatQueryImpl(const FormatSettings & settings, FormatS case Type::PartMoveToShard: settings.ostr << "PART_MOVE_TO_SHARD"; break; + case Type::Transaction: + settings.ostr << "TRANSACTION"; + break; } formatOnCluster(settings); diff --git a/src/Parsers/ASTKillQueryQuery.h b/src/Parsers/ASTKillQueryQuery.h index 6ff12bcba93..95be3ec6309 100644 --- a/src/Parsers/ASTKillQueryQuery.h +++ b/src/Parsers/ASTKillQueryQuery.h @@ -14,6 +14,7 @@ public: Query, /// KILL QUERY Mutation, /// KILL MUTATION PartMoveToShard, /// KILL PART_MOVE_TO_SHARD + Transaction, /// KILL TRANSACTION }; Type type = Type::Query; diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 22488e35e12..600525f9abe 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -28,6 +28,7 @@ public: #if USE_EMBEDDED_COMPILER DROP_COMPILED_EXPRESSION_CACHE, #endif + DROP_FILESYSTEM_CACHE, STOP_LISTEN_QUERIES, START_LISTEN_QUERIES, RESTART_REPLICAS, @@ -88,6 +89,7 @@ public: String volume; String disk; UInt64 seconds{}; + String filesystem_cache_path; String getID(char) const override { return "SYSTEM query"; } diff --git a/src/Parsers/ASTTransactionControl.cpp b/src/Parsers/ASTTransactionControl.cpp new file mode 100644 index 00000000000..3ff29d9e43e --- /dev/null +++ b/src/Parsers/ASTTransactionControl.cpp @@ -0,0 +1,32 @@ +#include +#include +#include + +namespace DB +{ + +void ASTTransactionControl::formatImpl(const FormatSettings & format /*state*/, FormatState &, FormatStateStacked /*frame*/) const +{ + switch (action) + { + case BEGIN: + format.ostr << (format.hilite ? hilite_keyword : "") << "BEGIN TRANSACTION" << (format.hilite ? hilite_none : ""); + break; + case COMMIT: + format.ostr << (format.hilite ? hilite_keyword : "") << "COMMIT" << (format.hilite ? hilite_none : ""); + break; + case ROLLBACK: + format.ostr << (format.hilite ? hilite_keyword : "") << "ROLLBACK" << (format.hilite ? hilite_none : ""); + break; + case SET_SNAPSHOT: + format.ostr << (format.hilite ? hilite_keyword : "") << "SET TRANSACTION SNAPSHOT " << (format.hilite ? hilite_none : "") << snapshot; + break; + } +} + +void ASTTransactionControl::updateTreeHashImpl(SipHash & hash_state) const +{ + hash_state.update(action); +} + +} diff --git a/src/Parsers/ASTTransactionControl.h b/src/Parsers/ASTTransactionControl.h new file mode 100644 index 00000000000..06f578ff138 --- /dev/null +++ b/src/Parsers/ASTTransactionControl.h @@ -0,0 +1,32 @@ +#pragma once +#include + +namespace DB +{ + +/// Common AST for TCL queries +class ASTTransactionControl : public IAST +{ +public: + enum QueryType + { + BEGIN, + COMMIT, + ROLLBACK, + SET_SNAPSHOT, + }; + + QueryType action; + + UInt64 snapshot; /// For SET TRANSACTION SNAPSHOT ... + + ASTTransactionControl(QueryType action_) : action(action_) {} + + String getID(char /*delimiter*/) const override { return "ASTTransactionControl"; } + ASTPtr clone() const override { return std::make_shared(*this); } + + void formatImpl(const FormatSettings & format, FormatState & /*state*/, FormatStateStacked /*frame*/) const override; + void updateTreeHashImpl(SipHash & hash_state) const override; +}; + +} diff --git a/src/Parsers/ParserExplainQuery.cpp b/src/Parsers/ParserExplainQuery.cpp index e072f6a14d7..71c49a020cc 100644 --- a/src/Parsers/ParserExplainQuery.cpp +++ b/src/Parsers/ParserExplainQuery.cpp @@ -22,6 +22,7 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_plan("PLAN"); ParserKeyword s_estimates("ESTIMATE"); ParserKeyword s_table_override("TABLE OVERRIDE"); + ParserKeyword s_current_transaction("CURRENT TRANSACTION"); if (s_explain.ignore(pos, expected)) { @@ -39,6 +40,8 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected kind = ASTExplainQuery::ExplainKind::QueryEstimates; //-V1048 else if (s_table_override.ignore(pos, expected)) kind = ASTExplainQuery::ExplainKind::TableOverride; + else if (s_current_transaction.ignore(pos, expected)) + kind = ASTExplainQuery::ExplainKind::CurrentTransaction; } else return false; @@ -58,11 +61,11 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserCreateTableQuery create_p; ParserSelectWithUnionQuery select_p; - ParserInsertQuery insert_p(end); + ParserInsertQuery insert_p(end, allow_settings_after_format_in_insert); ASTPtr query; if (kind == ASTExplainQuery::ExplainKind::ParsedAST) { - ParserQuery p(end); + ParserQuery p(end, allow_settings_after_format_in_insert); if (p.parse(pos, query, expected)) explain_query->setExplainedQuery(std::move(query)); else @@ -79,6 +82,10 @@ bool ParserExplainQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected explain_query->setTableFunction(table_function); explain_query->setTableOverride(table_override); } + else if (kind == ASTExplainQuery::ExplainKind::CurrentTransaction) + { + /// Nothing to parse + } else if (select_p.parse(pos, query, expected) || create_p.parse(pos, query, expected) || insert_p.parse(pos, query, expected)) diff --git a/src/Parsers/ParserExplainQuery.h b/src/Parsers/ParserExplainQuery.h index ba30e97a58f..1a415a04dde 100644 --- a/src/Parsers/ParserExplainQuery.h +++ b/src/Parsers/ParserExplainQuery.h @@ -10,11 +10,15 @@ class ParserExplainQuery : public IParserBase { protected: const char * end; + bool allow_settings_after_format_in_insert; const char * getName() const override { return "EXPLAIN"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: - explicit ParserExplainQuery(const char* end_) : end(end_) {} + explicit ParserExplainQuery(const char* end_, bool allow_settings_after_format_in_insert_) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} }; } diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index 44db07278c2..b0ca361155f 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -130,8 +130,17 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } } - Pos before_values = pos; + /// Read SETTINGS if they are defined + if (s_settings.ignore(pos, expected)) + { + /// Settings are written like SET query, so parse them with ParserSetQuery + ParserSetQuery parser_settings(true); + if (!parser_settings.parse(pos, settings_ast, expected)) + return false; + } + String format_str; + Pos before_values = pos; /// VALUES or FORMAT or SELECT or WITH or WATCH. /// After FROM INFILE we expect FORMAT, SELECT, WITH or nothing. @@ -177,9 +186,21 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } - /// Read SETTINGS if they are defined - if (s_settings.ignore(pos, expected)) + /// Read SETTINGS after FORMAT. + /// + /// Note, that part of SETTINGS can be interpreted as values, + /// hence it is done only under option. + /// + /// Refs: https://github.com/ClickHouse/ClickHouse/issues/35100 + if (allow_settings_after_format_in_insert && s_settings.ignore(pos, expected)) { + if (settings_ast) + throw Exception("You have SETTINGS before and after FORMAT, " + "this is not allowed. " + "Consider switching to SETTINGS before FORMAT " + "and disable allow_settings_after_format_in_insert.", + ErrorCodes::SYNTAX_ERROR); + /// Settings are written like SET query, so parse them with ParserSetQuery ParserSetQuery parser_settings(true); if (!parser_settings.parse(pos, settings_ast, expected)) diff --git a/src/Parsers/ParserInsertQuery.h b/src/Parsers/ParserInsertQuery.h index f98e433551d..0d7ce25e09d 100644 --- a/src/Parsers/ParserInsertQuery.h +++ b/src/Parsers/ParserInsertQuery.h @@ -26,11 +26,15 @@ class ParserInsertQuery : public IParserBase { private: const char * end; + bool allow_settings_after_format_in_insert; const char * getName() const override { return "INSERT query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: - explicit ParserInsertQuery(const char * end_) : end(end_) {} + explicit ParserInsertQuery(const char * end_, bool allow_settings_after_format_in_insert_) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} }; /** Insert accepts an identifier and an asterisk with variants. diff --git a/src/Parsers/ParserKillQueryQuery.cpp b/src/Parsers/ParserKillQueryQuery.cpp index bc895406c9f..0b1b37e61bf 100644 --- a/src/Parsers/ParserKillQueryQuery.cpp +++ b/src/Parsers/ParserKillQueryQuery.cpp @@ -18,6 +18,7 @@ bool ParserKillQueryQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expect ParserKeyword p_query{"QUERY"}; ParserKeyword p_mutation{"MUTATION"}; ParserKeyword p_part_move_to_shard{"PART_MOVE_TO_SHARD"}; + ParserKeyword p_transaction{"TRANSACTION"}; ParserKeyword p_on{"ON"}; ParserKeyword p_test{"TEST"}; ParserKeyword p_sync{"SYNC"}; @@ -34,6 +35,8 @@ bool ParserKillQueryQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expect query->type = ASTKillQueryQuery::Type::Mutation; else if (p_part_move_to_shard.ignore(pos, expected)) query->type = ASTKillQueryQuery::Type::PartMoveToShard; + else if (p_transaction.ignore(pos, expected)) + query->type = ASTKillQueryQuery::Type::Transaction; else return false; diff --git a/src/Parsers/ParserQuery.cpp b/src/Parsers/ParserQuery.cpp index 7677efd9415..eaea5dd0f5f 100644 --- a/src/Parsers/ParserQuery.cpp +++ b/src/Parsers/ParserQuery.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -30,8 +31,8 @@ namespace DB bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserQueryWithOutput query_with_output_p(end); - ParserInsertQuery insert_p(end); + ParserQueryWithOutput query_with_output_p(end, allow_settings_after_format_in_insert); + ParserInsertQuery insert_p(end, allow_settings_after_format_in_insert); ParserUseQuery use_p; ParserSetQuery set_p; ParserSystemQuery system_p; @@ -46,6 +47,7 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserGrantQuery grant_p; ParserSetRoleQuery set_role_p; ParserExternalDDLQuery external_ddl_p; + ParserTransactionControl transaction_control_p; ParserBackupQuery backup_p; bool res = query_with_output_p.parse(pos, node, expected) @@ -64,6 +66,7 @@ bool ParserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) || drop_access_entity_p.parse(pos, node, expected) || grant_p.parse(pos, node, expected) || external_ddl_p.parse(pos, node, expected) + || transaction_control_p.parse(pos, node, expected) || backup_p.parse(pos, node, expected); return res; diff --git a/src/Parsers/ParserQuery.h b/src/Parsers/ParserQuery.h index be72a436be8..a2d4e6e04df 100644 --- a/src/Parsers/ParserQuery.h +++ b/src/Parsers/ParserQuery.h @@ -10,12 +10,16 @@ class ParserQuery : public IParserBase { private: const char * end; + bool allow_settings_after_format_in_insert; const char * getName() const override { return "Query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: - explicit ParserQuery(const char * end_) : end(end_) {} + explicit ParserQuery(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} }; } diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index f1e007948f9..6041f986a49 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -49,7 +49,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserShowCreateAccessEntityQuery show_create_access_entity_p; ParserShowGrantsQuery show_grants_p; ParserShowPrivilegesQuery show_privileges_p; - ParserExplainQuery explain_p(end); + ParserExplainQuery explain_p(end, allow_settings_after_format_in_insert); ASTPtr query; diff --git a/src/Parsers/ParserQueryWithOutput.h b/src/Parsers/ParserQueryWithOutput.h index 1fd7bec1eea..dba420a077a 100644 --- a/src/Parsers/ParserQueryWithOutput.h +++ b/src/Parsers/ParserQueryWithOutput.h @@ -12,10 +12,16 @@ class ParserQueryWithOutput : public IParserBase { protected: const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "Query with output"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + public: - explicit ParserQueryWithOutput(const char * end_) : end(end_) {} + explicit ParserQueryWithOutput(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} }; } diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index d50de5f700d..76d6a299857 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -53,6 +53,10 @@ bool ParserSetQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!s_set.ignore(pos, expected)) return false; + + /// Parse SET TRANSACTION ... queries using ParserTransactionControl + if (ParserKeyword{"TRANSACTION"}.check(pos, expected)) + return false; } SettingsChanges changes; diff --git a/src/Parsers/ParserTransactionControl.cpp b/src/Parsers/ParserTransactionControl.cpp new file mode 100644 index 00000000000..da593170002 --- /dev/null +++ b/src/Parsers/ParserTransactionControl.cpp @@ -0,0 +1,41 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserTransactionControl::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTTransactionControl::QueryType action; + UInt64 snapshot = 0; + + if (ParserKeyword("BEGIN TRANSACTION").ignore(pos, expected)) + action = ASTTransactionControl::BEGIN; + else if (ParserKeyword("COMMIT").ignore(pos, expected)) + action = ASTTransactionControl::COMMIT; + else if (ParserKeyword("ROLLBACK").ignore(pos, expected)) + action = ASTTransactionControl::ROLLBACK; + else if (ParserKeyword("SET TRANSACTION SNAPSHOT").ignore(pos, expected)) + { + action = ASTTransactionControl::SET_SNAPSHOT; + ASTPtr ast; + if (!ParserNumber{}.parse(pos, ast, expected)) + return false; + + const auto & snapshot_num = ast->as()->value; + if (!snapshot_num.tryGet(snapshot)) + return false; + } + else + return false; + + auto ast = std::make_shared(action); + ast->snapshot = snapshot; + node = ast; + return true; +} + +} diff --git a/src/Parsers/ParserTransactionControl.h b/src/Parsers/ParserTransactionControl.h new file mode 100644 index 00000000000..157c088624c --- /dev/null +++ b/src/Parsers/ParserTransactionControl.h @@ -0,0 +1,14 @@ +#pragma once +#include + +namespace DB +{ + +class ParserTransactionControl : public IParserBase +{ +public: + const char * getName() const override { return "TCL query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index ed09d648477..af8c9dc58a6 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -386,7 +386,8 @@ std::pair splitMultipartQuery( const std::string & queries, std::vector & queries_list, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + bool allow_settings_after_format_in_insert) { ASTPtr ast; @@ -394,7 +395,7 @@ std::pair splitMultipartQuery( const char * pos = begin; /// parser moves pos from begin to the end of current query const char * end = begin + queries.size(); - ParserQuery parser(end); + ParserQuery parser(end, allow_settings_after_format_in_insert); queries_list.clear(); diff --git a/src/Parsers/parseQuery.h b/src/Parsers/parseQuery.h index d8d7426872b..cc077bbdab2 100644 --- a/src/Parsers/parseQuery.h +++ b/src/Parsers/parseQuery.h @@ -61,6 +61,7 @@ std::pair splitMultipartQuery( const std::string & queries, std::vector & queries_list, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + bool allow_settings_after_format_in_insert); } diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 7cdb0644a9d..2e045466ae8 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { @@ -66,9 +67,32 @@ static void checkTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, c result.emplace_back(name, type); } -IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_, bool allow_bools_as_numbers_) - : ISchemaReader(in_), max_rows_to_read(max_rows_to_read_), default_type(default_type_), allow_bools_as_numbers(allow_bools_as_numbers_) +IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, bool allow_bools_as_numbers_) + : ISchemaReader(in_), max_rows_to_read(format_settings.max_rows_to_read_for_schema_inference), allow_bools_as_numbers(allow_bools_as_numbers_) { + if (!format_settings.column_names_for_schema_inference.empty()) + { + /// column_names_for_schema_inference is a string in format 'column1,column2,column3,...' + boost::split(column_names, format_settings.column_names_for_schema_inference, boost::is_any_of(",")); + for (auto & column_name : column_names) + { + std::string col_name_trimmed = boost::trim_copy(column_name); + if (!col_name_trimmed.empty()) + column_name = col_name_trimmed; + } + } +} + +IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, DataTypePtr default_type_, bool allow_bools_as_numbers_) + : IRowSchemaReader(in_, format_settings, allow_bools_as_numbers_) +{ + default_type = default_type_; +} + +IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, const DataTypes & default_types_, bool allow_bools_as_numbers_) + : IRowSchemaReader(in_, format_settings, allow_bools_as_numbers_) +{ + default_types = default_types_; } NamesAndTypesList IRowSchemaReader::readSchema() @@ -90,7 +114,7 @@ NamesAndTypesList IRowSchemaReader::readSchema() if (!new_data_types[i]) continue; - chooseResultType(data_types[i], new_data_types[i], allow_bools_as_numbers, default_type, std::to_string(i + 1), row); + chooseResultType(data_types[i], new_data_types[i], allow_bools_as_numbers, getDefaultType(i), std::to_string(i + 1), row); } } @@ -115,12 +139,21 @@ NamesAndTypesList IRowSchemaReader::readSchema() for (size_t i = 0; i != data_types.size(); ++i) { /// Check that we could determine the type of this column. - checkTypeAndAppend(result, data_types[i], column_names[i], default_type, max_rows_to_read); + checkTypeAndAppend(result, data_types[i], column_names[i], getDefaultType(i), max_rows_to_read); } return result; } +DataTypePtr IRowSchemaReader::getDefaultType(size_t column) const +{ + if (default_type) + return default_type; + if (column < default_types.size() && default_types[column]) + return default_types[column]; + return nullptr; +} + IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_, bool allow_bools_as_numbers_) : ISchemaReader(in_), max_rows_to_read(max_rows_to_read_), default_type(default_type_), allow_bools_as_numbers(allow_bools_as_numbers_) { diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index 7e809d3d963..a9697374bc0 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -31,12 +31,17 @@ protected: /// Base class for schema inference for formats that read data row by row. /// It reads data row by row (up to max_rows_to_read), determines types of columns /// for each row and compare them with types from the previous rows. If some column -/// contains values with different types in different rows, the default type will be -/// used for this column or the exception will be thrown (if default type is not set). +/// contains values with different types in different rows, the default type +/// (from argument default_type_) will be used for this column or the exception +/// will be thrown (if default type is not set). If different columns have different +/// default types, you can provide them by default_types_ argument. class IRowSchemaReader : public ISchemaReader { public: - IRowSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_ = nullptr, bool allow_bools_as_numbers_ = false); + IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, bool allow_bools_as_numbers_ = false); + IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, DataTypePtr default_type_, bool allow_bools_as_numbers_ = false); + IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, const DataTypes & default_types_, bool allow_bools_as_numbers_ = false); + NamesAndTypesList readSchema() override; protected: @@ -49,8 +54,11 @@ protected: void setColumnNames(const std::vector & names) { column_names = names; } private: + + DataTypePtr getDefaultType(size_t column) const; size_t max_rows_to_read; DataTypePtr default_type; + DataTypes default_types; bool allow_bools_as_numbers; std::vector column_names; }; diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 37a107ae367..792ebd09392 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -3,6 +3,7 @@ #if USE_ARROW #include +#include #include #include #include @@ -171,8 +172,9 @@ NamesAndTypesList ArrowSchemaReader::readSchema() schema = createFileReader(in, format_settings, is_stopped)->schema(); } - auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema, stream ? "ArrowStream" : "Arrow"); - return header.getNamesAndTypesList(); + auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( + *schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference); + return getNamesAndRecursivelyNullableTypes(header); } void registerInputFormatArrow(FormatFactory & factory) @@ -202,13 +204,13 @@ void registerArrowSchemaReader(FormatFactory & factory) { factory.registerSchemaReader( "Arrow", - [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + [](ReadBuffer & buf, const FormatSettings & settings) { return std::make_shared(buf, false, settings); }); factory.registerSchemaReader( "ArrowStream", - [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + [](ReadBuffer & buf, const FormatSettings & settings) { return std::make_shared(buf, true, settings); });} diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 0a72e561e4e..c792d828e44 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -26,11 +27,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. @@ -329,12 +332,17 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( const std::string & format_name, bool is_nullable, std::unordered_map> & dictionary_values, - bool read_ints_as_dates) + bool read_ints_as_dates, + bool allow_null_type, + bool skip_columns_with_unsupported_types, + bool & skipped) { if (!is_nullable && arrow_column->null_count() && arrow_column->type()->id() != arrow::Type::LIST && arrow_column->type()->id() != arrow::Type::MAP && arrow_column->type()->id() != arrow::Type::STRUCT) { - auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_values, read_ints_as_dates); + auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped); + if (skipped) + return {}; auto nullmap_column = readByteMapFromArrowColumn(arrow_column); auto nullable_type = std::make_shared(std::move(nested_column.type)); auto nullable_column = ColumnNullable::create(nested_column.column, nullmap_column); @@ -379,7 +387,10 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( case arrow::Type::MAP: { auto arrow_nested_column = getNestedArrowColumn(arrow_column); - auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); + auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped); + if (skipped) + return {}; + auto offsets_column = readOffsetsFromArrowListColumn(arrow_column); const auto * tuple_column = assert_cast(nested_column.column.get()); @@ -391,7 +402,9 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( case arrow::Type::LIST: { auto arrow_nested_column = getNestedArrowColumn(arrow_column); - auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); + auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped); + if (skipped) + return {}; auto offsets_column = readOffsetsFromArrowListColumn(arrow_column); auto array_column = ColumnArray::create(nested_column.column, offsets_column); auto array_type = std::make_shared(nested_column.type); @@ -416,7 +429,9 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( for (int i = 0; i != arrow_struct_type->num_fields(); ++i) { auto nested_arrow_column = std::make_shared(nested_arrow_columns[i]); - auto element = readColumnFromArrowColumn(nested_arrow_column, arrow_struct_type->field(i)->name(), format_name, false, dictionary_values, read_ints_as_dates); + auto element = readColumnFromArrowColumn(nested_arrow_column, arrow_struct_type->field(i)->name(), format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped); + if (skipped) + return {}; tuple_elements.emplace_back(std::move(element.column)); tuple_types.emplace_back(std::move(element.type)); tuple_names.emplace_back(std::move(element.name)); @@ -439,7 +454,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( dict_array.emplace_back(dict_chunk.dictionary()); } auto arrow_dict_column = std::make_shared(dict_array); - auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); + auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_values, read_ints_as_dates, allow_null_type, skip_columns_with_unsupported_types, skipped); /// We should convert read column to ColumnUnique. auto tmp_lc_column = DataTypeLowCardinality(dict_column.type).createColumn(); @@ -469,9 +484,33 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( # undef DISPATCH // TODO: read JSON as a string? // TODO: read UUID as a string? + case arrow::Type::NA: + { + if (allow_null_type) + { + auto type = std::make_shared(); + auto column = ColumnNothing::create(arrow_column->length()); + return {std::move(column), type, column_name}; + } + [[fallthrough]]; + } default: - throw Exception(ErrorCodes::UNKNOWN_TYPE, - "Unsupported {} type '{}' of an input column '{}'.", format_name, arrow_column->type()->name(), column_name); + { + if (skip_columns_with_unsupported_types) + { + skipped = true; + return {}; + } + + throw Exception( + ErrorCodes::UNKNOWN_TYPE, + "Unsupported {} type '{}' of an input column '{}'. If it happens during schema inference and you want to skip columns with " + "unsupported types, you can enable setting input_format_{}_skip_columns_with_unsupported_types_in_schema_inference", + format_name, + arrow_column->type()->name(), + column_name, + boost::algorithm::to_lower_copy(format_name)); + } } } @@ -485,8 +524,9 @@ static void checkStatus(const arrow::Status & status, const String & column_name throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()}; } + Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( - const arrow::Schema & schema, const std::string & format_name, const Block * hint_header, bool ignore_case) + const arrow::Schema & schema, const std::string & format_name, bool skip_columns_with_unsupported_types, const Block * hint_header, bool ignore_case) { ColumnsWithTypeAndName sample_columns; std::unordered_set nested_table_names; @@ -512,9 +552,14 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( arrow::ArrayVector array_vector = {arrow_array}; auto arrow_column = std::make_shared(array_vector); std::unordered_map> dict_values; - ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values, false); - - sample_columns.emplace_back(std::move(sample_column)); + bool skipped = false; + bool allow_null_type = false; + if (hint_header && hint_header->has(field->name()) && hint_header->getByName(field->name()).type->isNullable()) + allow_null_type = true; + ColumnWithTypeAndName sample_column = readColumnFromArrowColumn( + arrow_column, field->name(), format_name, false, dict_values, false, allow_null_type, skip_columns_with_unsupported_types, skipped); + if (!skipped) + sample_columns.emplace_back(std::move(sample_column)); } return Block(std::move(sample_columns)); } @@ -559,6 +604,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & UInt64 num_rows = name_to_column_ptr.begin()->second->length(); columns_list.reserve(header.rows()); std::unordered_map nested_tables; + bool skipped = false; for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i) { const ColumnWithTypeAndName & header_column = header.getByPosition(column_i); @@ -582,7 +628,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & { std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; ColumnsWithTypeAndName cols - = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true)}; + = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true, true, false, skipped)}; Block block(cols); nested_tables[search_nested_table_name] = std::make_shared(Nested::flatten(block)); } @@ -615,7 +661,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & else { auto arrow_column = name_to_column_ptr[search_column_name]; - column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true); + column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true, true, false, skipped); } try @@ -642,7 +688,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & std::vector ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema & schema) const { std::vector missing_columns; - auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header, case_insensitive_matching); + auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, false, &header, case_insensitive_matching); auto flatten_block_from_arrow = Nested::flatten(block_from_arrow); for (size_t i = 0, columns = header.columns(); i < columns; ++i) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 0a712326941..695e14b7bba 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -38,7 +38,11 @@ public: /// Transform arrow schema to ClickHouse header. If hint_header is provided, /// we will skip columns in schema that are not in hint_header. static Block arrowSchemaToCHHeader( - const arrow::Schema & schema, const std::string & format_name, const Block * hint_header = nullptr, bool ignore_case = false); + const arrow::Schema & schema, + const std::string & format_name, + bool skip_columns_with_unsupported_types = false, + const Block * hint_header = nullptr, + bool ignore_case = false); private: const Block & header; diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index a372df41344..29429650c19 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -924,12 +924,12 @@ void registerInputFormatAvro(FormatFactory & factory) void registerAvroSchemaReader(FormatFactory & factory) { - factory.registerSchemaReader("Avro", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + factory.registerSchemaReader("Avro", [](ReadBuffer & buf, const FormatSettings & settings) { return std::make_shared(buf, false, settings); }); - factory.registerSchemaReader("AvroConfluent", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + factory.registerSchemaReader("AvroConfluent", [](ReadBuffer & buf, const FormatSettings & settings) { return std::make_shared(buf, true, settings); }); diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index 6918220feb4..d3de2fbf494 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -95,7 +95,7 @@ void BinaryFormatReader::skipField(size_t file_column) } BinaryWithNamesAndTypesSchemaReader::BinaryWithNamesAndTypesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) - : FormatWithNamesAndTypesSchemaReader(in_, 0, true, true, &reader), reader(in_, format_settings_) + : FormatWithNamesAndTypesSchemaReader(in_, format_settings_, true, true, &reader), reader(in_, format_settings_) { } @@ -119,7 +119,7 @@ void registerInputFormatRowBinary(FormatFactory & factory) void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory) { - factory.registerSchemaReader("RowBinaryWithNamesAndTypes", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + factory.registerSchemaReader("RowBinaryWithNamesAndTypes", [](ReadBuffer & buf, const FormatSettings & settings) { return std::make_shared(buf, settings); }); diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 216ec6b295a..f246d5c0a35 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include @@ -259,16 +258,15 @@ bool CSVFormatReader::readField( } -CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_, ContextPtr context_) +CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_) : FormatWithNamesAndTypesSchemaReader( in_, - format_setting_.max_rows_to_read_for_schema_inference, + format_setting_, with_names_, with_types_, &reader, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV)) , reader(in_, format_setting_) - , context(context_) { } @@ -279,7 +277,7 @@ DataTypes CSVSchemaReader::readRowAndGetDataTypes() return {}; auto fields = reader.readRow(); - return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV, context); + return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV); } @@ -382,9 +380,9 @@ void registerCSVSchemaReader(FormatFactory & factory) { auto register_func = [&](const String & format_name, bool with_names, bool with_types) { - factory.registerSchemaReader(format_name, [with_names, with_types](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context) + factory.registerSchemaReader(format_name, [with_names, with_types](ReadBuffer & buf, const FormatSettings & settings) { - return std::make_shared(buf, with_names, with_types, settings, context); + return std::make_shared(buf, with_names, with_types, settings); }); }; diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index ad9f6c4e492..ee45264d573 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -74,13 +74,12 @@ public: class CSVSchemaReader : public FormatWithNamesAndTypesSchemaReader { public: - CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_, ContextPtr context_); + CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_); private: DataTypes readRowAndGetDataTypes() override; CSVFormatReader reader; - ContextPtr context; }; } diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index d2e0d6e21a9..74c5fb1945a 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -289,17 +289,16 @@ void CustomSeparatedFormatReader::setReadBuffer(ReadBuffer & in_) } CustomSeparatedSchemaReader::CustomSeparatedSchemaReader( - ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_, ContextPtr context_) + ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_) : FormatWithNamesAndTypesSchemaReader( buf, - format_setting_.max_rows_to_read_for_schema_inference, + format_setting_, with_names_, with_types_, &reader, getDefaultDataTypeForEscapingRule(format_setting_.custom.escaping_rule)) , buf(in_) , reader(buf, ignore_spaces_, updateFormatSettings(format_setting_)) - , context(context_) { } @@ -315,7 +314,7 @@ DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypes() first_row = false; auto fields = reader.readRow(); - return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), context); + return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule()); } void registerInputFormatCustomSeparated(FormatFactory & factory) @@ -343,9 +342,9 @@ void registerCustomSeparatedSchemaReader(FormatFactory & factory) { auto register_func = [&](const String & format_name, bool with_names, bool with_types) { - factory.registerSchemaReader(format_name, [with_names, with_types, ignore_spaces](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context) + factory.registerSchemaReader(format_name, [with_names, with_types, ignore_spaces](ReadBuffer & buf, const FormatSettings & settings) { - return std::make_shared(buf, with_names, with_types, ignore_spaces, settings, context); + return std::make_shared(buf, with_names, with_types, ignore_spaces, settings); }); }; diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index a2f4509d307..d9e62a1b8e9 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -92,14 +92,13 @@ private: class CustomSeparatedSchemaReader : public FormatWithNamesAndTypesSchemaReader { public: - CustomSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_, ContextPtr context_); + CustomSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_); private: DataTypes readRowAndGetDataTypes() override; PeekableReadBuffer buf; CustomSeparatedFormatReader reader; - ContextPtr context; bool first_row = true; }; diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index 0496e3e41a8..854aefc7562 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -181,15 +181,10 @@ bool JSONCompactEachRowFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & return true; } -JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_) +JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader( + ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesSchemaReader( - in_, - format_settings_.max_rows_to_read_for_schema_inference, - with_names_, - with_types_, - &reader, - nullptr, - format_settings_.json.read_bools_as_numbers) + in_, format_settings_, with_names_, with_types_, &reader, nullptr, format_settings_.json.read_bools_as_numbers) , reader(in_, yield_strings_, format_settings_) { } @@ -239,7 +234,7 @@ void registerJSONCompactEachRowSchemaReader(FormatFactory & factory) { auto register_func = [&](const String & format_name, bool with_names, bool with_types) { - factory.registerSchemaReader(format_name, [=](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + factory.registerSchemaReader(format_name, [=](ReadBuffer & buf, const FormatSettings & settings) { return std::make_shared(buf, with_names, with_types, json_strings, settings); }); diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 2eb5b143107..674bad5a13d 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -387,12 +387,12 @@ void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory void registerJSONEachRowSchemaReader(FormatFactory & factory) { - factory.registerSchemaReader("JSONEachRow", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + factory.registerSchemaReader("JSONEachRow", [](ReadBuffer & buf, const FormatSettings & settings) { return std::make_unique(buf, false, settings); }); - factory.registerSchemaReader("JSONStringsEachRow", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + factory.registerSchemaReader("JSONStringsEachRow", [](ReadBuffer & buf, const FormatSettings & settings) { return std::make_unique(buf, true, settings); }); diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 722cedbab30..48502e7af3a 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -414,7 +414,7 @@ void MsgPackRowInputFormat::setReadBuffer(ReadBuffer & in_) } MsgPackSchemaReader::MsgPackSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) - : IRowSchemaReader(buf, format_settings_.max_rows_to_read_for_schema_inference), buf(in_), number_of_columns(format_settings_.msgpack.number_of_columns) + : IRowSchemaReader(buf, format_settings_), buf(in_), number_of_columns(format_settings_.msgpack.number_of_columns) { if (!number_of_columns) throw Exception(ErrorCodes::BAD_ARGUMENTS, "You must specify setting input_format_msgpack_number_of_columns to extract table schema from MsgPack data"); @@ -535,7 +535,7 @@ void registerInputFormatMsgPack(FormatFactory & factory) void registerMsgPackSchemaReader(FormatFactory & factory) { - factory.registerSchemaReader("MsgPack", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + factory.registerSchemaReader("MsgPack", [](ReadBuffer & buf, const FormatSettings & settings) { return std::make_shared(buf, settings); }); diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index bd95cfd6376..c1dc60022f5 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -133,7 +133,7 @@ void registerOutputFormatNative(FormatFactory & factory) void registerNativeSchemaReader(FormatFactory & factory) { - factory.registerSchemaReader("Native", [](ReadBuffer & buf, const FormatSettings &, ContextPtr) + factory.registerSchemaReader("Native", [](ReadBuffer & buf, const FormatSettings &) { return std::make_shared(buf); }); diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 8b8426132de..333129aee81 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -3,6 +3,7 @@ #if USE_ORC #include +#include #include #include #include @@ -183,8 +184,9 @@ NamesAndTypesList ORCSchemaReader::readSchema() std::shared_ptr schema; std::atomic is_stopped = 0; getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped); - auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema, "ORC"); - return header.getNamesAndTypesList(); + auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( + *schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference); + return getNamesAndRecursivelyNullableTypes(header); } void registerInputFormatORC(FormatFactory & factory) @@ -205,7 +207,7 @@ void registerORCSchemaReader(FormatFactory & factory) { factory.registerSchemaReader( "ORC", - [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + [](ReadBuffer & buf, const FormatSettings & settings) { return std::make_shared(buf, settings); } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 69e51e0dad2..af16d30bcfe 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -4,6 +4,7 @@ #if USE_PARQUET #include +#include #include #include #include @@ -176,8 +177,9 @@ NamesAndTypesList ParquetSchemaReader::readSchema() std::shared_ptr schema; std::atomic is_stopped = 0; getFileReaderAndSchema(in, file_reader, schema, format_settings, is_stopped); - auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema, "Parquet"); - return header.getNamesAndTypesList(); + auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( + *schema, "Parquet", format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference); + return getNamesAndRecursivelyNullableTypes(header); } void registerInputFormatParquet(FormatFactory & factory) @@ -198,7 +200,7 @@ void registerParquetSchemaReader(FormatFactory & factory) { factory.registerSchemaReader( "Parquet", - [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + [](ReadBuffer & buf, const FormatSettings & settings) { return std::make_shared(buf, settings); } diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index 4754b70d375..f18b6b0aaab 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -128,15 +128,14 @@ void RegexpRowInputFormat::setReadBuffer(ReadBuffer & in_) IInputFormat::setReadBuffer(*buf); } -RegexpSchemaReader::RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, ContextPtr context_) +RegexpSchemaReader::RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : IRowSchemaReader( buf, - format_settings_.max_rows_to_read_for_schema_inference, + format_settings_, getDefaultDataTypeForEscapingRule(format_settings_.regexp.escaping_rule)) , format_settings(format_settings_) , field_extractor(format_settings) , buf(in_) - , context(context_) { } @@ -152,7 +151,7 @@ DataTypes RegexpSchemaReader::readRowAndGetDataTypes() for (size_t i = 0; i != field_extractor.getMatchedFieldsSize(); ++i) { String field(field_extractor.getField(i)); - data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, format_settings.regexp.escaping_rule, context)); + data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, format_settings.regexp.escaping_rule)); } return data_types; @@ -203,9 +202,9 @@ void registerFileSegmentationEngineRegexp(FormatFactory & factory) void registerRegexpSchemaReader(FormatFactory & factory) { - factory.registerSchemaReader("Regexp", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context) + factory.registerSchemaReader("Regexp", [](ReadBuffer & buf, const FormatSettings & settings) { - return std::make_shared(buf, settings, context); + return std::make_shared(buf, settings); }); } diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h index 04f24bbb3e4..3cc6a3192fd 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h @@ -76,7 +76,7 @@ private: class RegexpSchemaReader : public IRowSchemaReader { public: - RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, ContextPtr context_); + RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings); private: DataTypes readRowAndGetDataTypes() override; @@ -85,7 +85,6 @@ private: const FormatSettings format_settings; RegexpFieldExtractor field_extractor; PeekableReadBuffer buf; - ContextPtr context; }; } diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index ceea174c0e8..26c7d1aced5 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -283,7 +283,7 @@ void registerInputFormatTSKV(FormatFactory & factory) } void registerTSKVSchemaReader(FormatFactory & factory) { - factory.registerSchemaReader("TSKV", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + factory.registerSchemaReader("TSKV", [](ReadBuffer & buf, const FormatSettings & settings) { return std::make_shared(buf, settings); }); diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index bb844ec68ea..b6c9438a57c 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -235,7 +235,7 @@ TabSeparatedSchemaReader::TabSeparatedSchemaReader( ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesSchemaReader( in_, - format_settings_.max_rows_to_read_for_schema_inference, + format_settings_, with_names_, with_types_, &reader, @@ -280,7 +280,7 @@ void registerTSVSchemaReader(FormatFactory & factory) { auto register_func = [&](const String & format_name, bool with_names, bool with_types) { - factory.registerSchemaReader(format_name, [with_names, with_types, is_raw](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + factory.registerSchemaReader(format_name, [with_names, with_types, is_raw](ReadBuffer & buf, const FormatSettings & settings) { return std::make_shared(buf, with_names, with_types, is_raw, settings); }); diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index 06d6ba06bcc..df4d49b172c 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -453,14 +453,12 @@ TemplateSchemaReader::TemplateSchemaReader( const ParsedTemplateFormatString & format_, const ParsedTemplateFormatString & row_format_, std::string row_between_delimiter, - const FormatSettings & format_settings_, - ContextPtr context_) - : IRowSchemaReader(buf, format_settings_.max_rows_to_read_for_schema_inference) + const FormatSettings & format_settings_) + : IRowSchemaReader(buf, format_settings_, getDefaultDataTypeForEscapingRules(row_format_.escaping_rules)) , buf(in_) , format(format_) , row_format(row_format_) , format_settings(format_settings_) - , context(context_) , format_reader(buf, ignore_spaces_, format, row_format, row_between_delimiter, format_settings) { setColumnNames(row_format.column_names); @@ -489,7 +487,7 @@ DataTypes TemplateSchemaReader::readRowAndGetDataTypes() format_settings.csv.delimiter = row_format.delimiters[i + 1].empty() ? format_settings.csv.delimiter : row_format.delimiters[i + 1].front(); field = readFieldByEscapingRule(buf, row_format.escaping_rules[i], format_settings); - data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i], context)); + data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i])); } format_reader.skipRowEndDelimiter(); @@ -564,12 +562,12 @@ void registerTemplateSchemaReader(FormatFactory & factory) { for (bool ignore_spaces : {false, true}) { - factory.registerSchemaReader(ignore_spaces ? "TemplateIgnoreSpaces" : "Template", [ignore_spaces](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context) + factory.registerSchemaReader(ignore_spaces ? "TemplateIgnoreSpaces" : "Template", [ignore_spaces](ReadBuffer & buf, const FormatSettings & settings) { size_t index = 0; auto idx_getter = [&](const String &) -> std::optional { return index++; }; auto row_format = fillRowFormat(settings, idx_getter, false); - return std::make_shared(buf, ignore_spaces, fillResultSetFormat(settings), row_format, settings.template_settings.row_between_delimiter, settings, context); + return std::make_shared(buf, ignore_spaces, fillResultSetFormat(settings), row_format, settings.template_settings.row_between_delimiter, settings); }); } } diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h index b5ced707ace..ab7043f057e 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -116,8 +116,7 @@ public: const ParsedTemplateFormatString & format_, const ParsedTemplateFormatString & row_format_, std::string row_between_delimiter, - const FormatSettings & format_settings_, - ContextPtr context_); + const FormatSettings & format_settings_); DataTypes readRowAndGetDataTypes() override; @@ -126,7 +125,6 @@ private: const ParsedTemplateFormatString format; const ParsedTemplateFormatString row_format; FormatSettings format_settings; - ContextPtr context; TemplateFormatReader format_reader; bool first_row = true; }; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index bf8feb077ed..e8b4c69bd19 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -571,8 +572,8 @@ void ValuesBlockInputFormat::setReadBuffer(ReadBuffer & in_) IInputFormat::setReadBuffer(*buf); } -ValuesSchemaReader::ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, ContextPtr context_) - : IRowSchemaReader(buf, format_settings_.max_rows_to_read_for_schema_inference), buf(in_), context(context_) +ValuesSchemaReader::ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) + : IRowSchemaReader(buf, format_settings_), buf(in_), format_settings(format_settings_) { } @@ -589,38 +590,25 @@ DataTypes ValuesSchemaReader::readRowAndGetDataTypes() return {}; assertChar('(', buf); - PeekableReadBufferCheckpoint checkpoint(buf); - skipToNextRow(&buf, 0, 1); - buf.makeContinuousMemoryFromCheckpointToPos(); - buf.rollbackToCheckpoint(); - - Tokens tokens(buf.position(), buf.buffer().end()); - IParser::Pos token_iterator(tokens, context->getSettingsRef().max_parser_depth); - + skipWhitespaceIfAny(buf); DataTypes data_types; - bool finish = false; - while (!finish) + String value; + while (!buf.eof() && *buf.position() != ')') { - Expected expected; - ASTPtr ast; + if (!data_types.empty()) + { + skipWhitespaceIfAny(buf); + assertChar(',', buf); + skipWhitespaceIfAny(buf); + } - bool parsed = parser.parse(token_iterator, ast, expected); - /// Consider delimiter after value (',' or ')') as part of expression - parsed &= token_iterator->type == TokenType::Comma || token_iterator->type == TokenType::ClosingRoundBracket; - - if (!parsed) - throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot parse expression here: {}, token: {}", - String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position())), String(token_iterator.get().begin, token_iterator.get().end)); - - std::pair result = evaluateConstantExpression(ast, context); - data_types.push_back(generalizeDataType(result.second)); - - if (token_iterator->type == TokenType::ClosingRoundBracket) - finish = true; - ++token_iterator; - buf.position() = const_cast(token_iterator->begin); + readQuotedFieldIntoString(value, buf); + auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); + data_types.push_back(std::move(type)); } + assertChar(')', buf); + skipWhitespaceIfAny(buf); if (!buf.eof() && *buf.position() == ',') ++buf.position(); @@ -642,9 +630,9 @@ void registerInputFormatValues(FormatFactory & factory) void registerValuesSchemaReader(FormatFactory & factory) { - factory.registerSchemaReader("Values", [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr context) + factory.registerSchemaReader("Values", [](ReadBuffer & buf, const FormatSettings & settings) { - return std::make_shared(buf, settings, context); + return std::make_shared(buf, settings); }); } diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h index e1521955472..77967181566 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h @@ -97,13 +97,13 @@ private: class ValuesSchemaReader : public IRowSchemaReader { public: - ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings, ContextPtr context_); + ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings); private: DataTypes readRowAndGetDataTypes() override; PeekableReadBuffer buf; - ContextPtr context; + const FormatSettings format_settings; ParserExpression parser; bool first_row = true; }; diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index 4e67007c943..2bc8409c70e 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -293,13 +293,13 @@ void RowInputFormatWithNamesAndTypes::setReadBuffer(ReadBuffer & in_) FormatWithNamesAndTypesSchemaReader::FormatWithNamesAndTypesSchemaReader( ReadBuffer & in_, - size_t max_rows_to_read_, + const FormatSettings & format_settings, bool with_names_, bool with_types_, FormatWithNamesAndTypesReader * format_reader_, DataTypePtr default_type_, bool allow_bools_as_numbers_) - : IRowSchemaReader(in_, max_rows_to_read_, default_type_, allow_bools_as_numbers_), with_names(with_names_), with_types(with_types_), format_reader(format_reader_) + : IRowSchemaReader(in_, format_settings, default_type_, allow_bools_as_numbers_), with_names(with_names_), with_types(with_types_), format_reader(format_reader_) { } diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index 8fbd426112c..6f64c6ad4a2 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -124,7 +124,7 @@ class FormatWithNamesAndTypesSchemaReader : public IRowSchemaReader public: FormatWithNamesAndTypesSchemaReader( ReadBuffer & in, - size_t max_rows_to_read_, + const FormatSettings & format_settings, bool with_names_, bool with_types_, FormatWithNamesAndTypesReader * format_reader_, diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 110d4308236..f4a30a9fee7 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -563,7 +563,7 @@ void RemoteQueryExecutor::sendExternalTables() { SelectQueryInfo query_info; auto metadata_snapshot = cur->getInMemoryMetadataPtr(); - auto storage_snapshot = cur->getStorageSnapshot(metadata_snapshot); + auto storage_snapshot = cur->getStorageSnapshot(metadata_snapshot, context); QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage( context, QueryProcessingStage::Complete, storage_snapshot, query_info); diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index eeaf5b32a92..7578f8afc1d 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -868,7 +868,7 @@ namespace query_text = std::move(*(query_info.mutable_query())); const char * begin = query_text.data(); const char * end = begin + query_text.size(); - ParserQuery parser(end); + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); ast = parseQuery(parser, begin, end, "", settings.max_query_size, settings.max_parser_depth); /// Choose input format. diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index 04e43ed63aa..489c47b3c31 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -275,7 +275,10 @@ void PostgreSQLHandler::processQuery() const auto & settings = session->sessionContext()->getSettingsRef(); std::vector queries; - auto parse_res = splitMultipartQuery(query->query, queries, settings.max_query_size, settings.max_parser_depth); + auto parse_res = splitMultipartQuery(query->query, queries, + settings.max_query_size, + settings.max_parser_depth, + settings.allow_settings_after_format_in_insert); if (!parse_res.second) throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index d7c732aee02..47490aae75b 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -679,7 +679,7 @@ bool StorageFileLog::streamToViews() throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist", ErrorCodes::LOGICAL_ERROR); auto metadata_snapshot = getInMemoryMetadataPtr(); - auto storage_snapshot = getStorageSnapshot(metadata_snapshot); + auto storage_snapshot = getStorageSnapshot(metadata_snapshot, getContext()); auto max_streams_number = std::min(filelog_settings->max_threads.value, file_infos.file_names.size()); /// No files to parse diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 17e9e55455c..013f7e97682 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -161,6 +161,11 @@ public: /// Returns true if the storage supports reading of subcolumns of complex types. virtual bool supportsSubcolumns() const { return false; } + /// Returns true if the storage supports transactions for SELECT, INSERT and ALTER queries. + /// Storage may throw an exception later if some query kind is not fully supported. + /// This method can return true for readonly engines that return the same rows for reading (such as SystemNumbers) + virtual bool supportsTransactions() const { return false; } + /// Returns true if the storage supports storing of dynamic subcolumns. /// For now it makes sense only for data type Object. virtual bool supportsDynamicSubcolumns() const { return false; } @@ -483,6 +488,16 @@ public: throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + virtual void waitForMutation(const String & /*mutation_id*/) + { + throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + + virtual void setMutationCSN(const String & /*mutation_id*/, UInt64 /*csn*/) + { + throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + /// Cancel a part move to shard. virtual CancellationCode killPartMoveToShard(const UUID & /*task_uuid*/) { @@ -553,11 +568,6 @@ public: /// Similar to above but checks for DETACH. It's only used for DICTIONARIES. virtual void checkTableCanBeDetached() const {} - /// Checks that Partition could be dropped right now - /// Otherwise - throws an exception with detailed information. - /// We do not use mutex because it is not very important that the size could change during the operation. - virtual void checkPartitionCanBeDropped(const ASTPtr & /*partition*/) {} - /// Returns true if Storage may store some data on disk. /// NOTE: may not be equivalent to !getDataPaths().empty() virtual bool storesDataOnDisk() const { return false; } @@ -610,15 +620,15 @@ public: virtual std::optional lifetimeBytes() const { return {}; } /// Creates a storage snapshot from given metadata. - virtual StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot) const + virtual StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const { return std::make_shared(*this, metadata_snapshot); } /// Creates a storage snapshot from given metadata and columns, which are used in query. - virtual StorageSnapshotPtr getStorageSnapshotForQuery(const StorageMetadataPtr & metadata_snapshot, const ASTPtr & /*query*/) const + virtual StorageSnapshotPtr getStorageSnapshotForQuery(const StorageMetadataPtr & metadata_snapshot, const ASTPtr & /*query*/, ContextPtr query_context) const { - return getStorageSnapshot(metadata_snapshot); + return getStorageSnapshot(metadata_snapshot, query_context); } private: diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 71a494c93fa..542eb392d01 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -647,7 +647,7 @@ bool StorageKafka::streamToViews() CurrentMetrics::Increment metric_increment{CurrentMetrics::KafkaBackgroundReads}; ProfileEvents::increment(ProfileEvents::KafkaBackgroundReads); - auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr()); + auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); // Create an INSERT query for streaming data auto insert = std::make_shared(); diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 4e7dcc60696..0dcccc33266 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -573,6 +573,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( auto volume = std::make_shared("volume_" + part_name, disk, 0); MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared(data, part_name, volume); + new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr); for (auto i = 0ul; i < projections; ++i) { @@ -601,7 +602,8 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( metadata_snapshot->projections.get(projection_name).metadata, block.getNamesAndTypesList(), {}, - CompressionCodecFactory::instance().get("NONE", {})); + CompressionCodecFactory::instance().get("NONE", {}), + NO_TRANSACTION_PTR); part_out.write(block); part_out.finalizePart(new_projection_part, false); @@ -625,7 +627,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( MergedBlockOutputStream part_out( new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {}, - CompressionCodecFactory::instance().get("NONE", {})); + CompressionCodecFactory::instance().get("NONE", {}), NO_TRANSACTION_PTR); part_out.write(block); part_out.finalizePart(new_data_part, false); @@ -753,6 +755,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( assertEOF(in); auto volume = std::make_shared("volume_" + part_name, disk, 0); MergeTreeData::MutableDataPartPtr new_data_part = data.createPart(part_name, volume, part_relative_path); + new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr); new_data_part->is_temp = true; new_data_part->modification_time = time(nullptr); new_data_part->loadColumnsChecksumsIndexes(true, false); @@ -842,6 +845,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta( assertEOF(in); MergeTreeData::MutableDataPartPtr new_data_part = data.createPart(part_name, volume, part_relative_path); + new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr); new_data_part->is_temp = true; new_data_part->modification_time = time(nullptr); new_data_part->loadColumnsChecksumsIndexes(true, false); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 0ed96f5dda4..d704d8fc435 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -29,6 +28,8 @@ #include #include #include +#include +#include namespace CurrentMetrics @@ -65,6 +66,12 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } +static std::unique_ptr openForReading(const DiskPtr & disk, const String & path) +{ + size_t file_size = disk->getFileSize(path); + return disk->readFile(path, ReadSettings().adjustBufferSize(file_size), file_size); +} + void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const PartMetadataManagerPtr & manager) { auto metadata_snapshot = data.getInMemoryMetadataPtr(); @@ -466,6 +473,7 @@ SerializationPtr IMergeTreeDataPart::getSerialization(const NameAndTypePair & co void IMergeTreeDataPart::removeIfNeeded() { + assert(assertHasValidVersionMetadata()); if (!is_temp && state != State::DeleteOnDestroy) return; @@ -790,10 +798,14 @@ NameSet IMergeTreeDataPart::getFileNamesWithoutChecksums() const NameSet result = {"checksums.txt", "columns.txt"}; String default_codec_path = fs::path(getFullRelativePath()) / DEFAULT_COMPRESSION_CODEC_FILE_NAME; + String txn_version_path = fs::path(getFullRelativePath()) / TXN_VERSION_METADATA_FILE_NAME; if (volume->getDisk()->exists(default_codec_path)) result.emplace(DEFAULT_COMPRESSION_CODEC_FILE_NAME); + if (volume->getDisk()->exists(txn_version_path)) + result.emplace(TXN_VERSION_METADATA_FILE_NAME); + return result; } @@ -1223,6 +1235,218 @@ void IMergeTreeDataPart::loadColumns(bool require) setSerializationInfos(infos); } +void IMergeTreeDataPart::assertHasVersionMetadata(MergeTreeTransaction * txn) const +{ + TransactionID expected_tid = txn ? txn->tid : Tx::PrehistoricTID; + if (version.creation_tid != expected_tid) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "CreationTID of part {} (table {}) is set to unexpected value {}, it's a bug. Current transaction: {}", + name, storage.getStorageID().getNameForLogs(), version.creation_tid, txn ? txn->dumpDescription() : ""); + + assert(!txn || storage.supportsTransactions()); + assert(!txn || volume->getDisk()->exists(fs::path(getFullRelativePath()) / TXN_VERSION_METADATA_FILE_NAME)); +} + +void IMergeTreeDataPart::storeVersionMetadata() const +{ + if (!wasInvolvedInTransaction()) + return; + + LOG_TEST(storage.log, "Writing version for {} (creation: {}, removal {})", name, version.creation_tid, version.removal_tid); + assert(storage.supportsTransactions()); + + if (!isStoredOnDisk()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Transactions are not supported for in-memory parts (table: {}, part: {})", + storage.getStorageID().getNameForLogs(), name); + + String version_file_name = fs::path(getFullRelativePath()) / TXN_VERSION_METADATA_FILE_NAME; + String tmp_version_file_name = version_file_name + ".tmp"; + DiskPtr disk = volume->getDisk(); + { + /// TODO IDisk interface does not allow to open file with O_EXCL flag (for DiskLocal), + /// so we create empty file at first (expecting that createFile throws if file already exists) + /// and then overwrite it. + disk->createFile(tmp_version_file_name); + auto out = disk->writeFile(tmp_version_file_name, 256, WriteMode::Rewrite); + version.write(*out); + out->finalize(); + out->sync(); + } + + SyncGuardPtr sync_guard; + if (storage.getSettings()->fsync_part_directory) + sync_guard = disk->getDirectorySyncGuard(getFullRelativePath()); + disk->replaceFile(tmp_version_file_name, version_file_name); +} + +void IMergeTreeDataPart::appendCSNToVersionMetadata(VersionMetadata::WhichCSN which_csn) const +{ + assert(!version.creation_tid.isEmpty()); + assert(!(which_csn == VersionMetadata::WhichCSN::CREATION && version.creation_tid.isPrehistoric())); + assert(!(which_csn == VersionMetadata::WhichCSN::CREATION && version.creation_csn == 0)); + assert(!(which_csn == VersionMetadata::WhichCSN::REMOVAL && (version.removal_tid.isPrehistoric() || version.removal_tid.isEmpty()))); + assert(!(which_csn == VersionMetadata::WhichCSN::REMOVAL && version.removal_csn == 0)); + assert(isStoredOnDisk()); + + /// Small enough appends to file are usually atomic, + /// so we append new metadata instead of rewriting file to reduce number of fsyncs. + /// We don't need to do fsync when writing CSN, because in case of hard restart + /// we will be able to restore CSN from transaction log in Keeper. + + String version_file_name = fs::path(getFullRelativePath()) / TXN_VERSION_METADATA_FILE_NAME; + DiskPtr disk = volume->getDisk(); + auto out = disk->writeFile(version_file_name, 256, WriteMode::Append); + version.writeCSN(*out, which_csn); + out->finalize(); +} + +void IMergeTreeDataPart::appendRemovalTIDToVersionMetadata(bool clear) const +{ + assert(!version.creation_tid.isEmpty()); + assert(version.removal_csn == 0); + assert(!version.removal_tid.isEmpty()); + assert(isStoredOnDisk()); + + if (version.creation_tid.isPrehistoric() && !clear) + { + /// Metadata file probably does not exist, because it was not written on part creation, because it was created without a transaction. + /// Let's create it (if needed). Concurrent writes are not possible, because creation_csn is prehistoric and we own removal_tid_lock. + storeVersionMetadata(); + return; + } + + if (clear) + LOG_TEST(storage.log, "Clearing removal TID for {} (creation: {}, removal {})", name, version.creation_tid, version.removal_tid); + else + LOG_TEST(storage.log, "Appending removal TID for {} (creation: {}, removal {})", name, version.creation_tid, version.removal_tid); + + String version_file_name = fs::path(getFullRelativePath()) / TXN_VERSION_METADATA_FILE_NAME; + DiskPtr disk = volume->getDisk(); + auto out = disk->writeFile(version_file_name, 256, WriteMode::Append); + version.writeRemovalTID(*out, clear); + out->finalize(); + + /// fsync is not required when we clearing removal TID, because after hard restart we will fix metadata + if (!clear) + out->sync(); +} + +void IMergeTreeDataPart::loadVersionMetadata() const +try +{ + String version_file_name = fs::path(getFullRelativePath()) / TXN_VERSION_METADATA_FILE_NAME; + String tmp_version_file_name = version_file_name + ".tmp"; + DiskPtr disk = volume->getDisk(); + + auto remove_tmp_file = [&]() + { + auto last_modified = disk->getLastModified(tmp_version_file_name); + auto buf = openForReading(disk, tmp_version_file_name); + String content; + readStringUntilEOF(content, *buf); + LOG_WARNING(storage.log, "Found file {} that was last modified on {}, has size {} and the following content: {}", + tmp_version_file_name, last_modified.epochTime(), content.size(), content); + disk->removeFile(tmp_version_file_name); + }; + + if (disk->exists(version_file_name)) + { + auto buf = openForReading(disk, version_file_name); + version.read(*buf); + if (disk->exists(tmp_version_file_name)) + remove_tmp_file(); + return; + } + + /// Four (?) cases are possible: + /// 1. Part was created without transactions. + /// 2. Version metadata file was not renamed from *.tmp on part creation. + /// 3. Version metadata were written to *.tmp file, but hard restart happened before fsync. + /// 4. Fsyncs in storeVersionMetadata() work incorrectly. + + if (!disk->exists(tmp_version_file_name)) + { + /// Case 1. + /// We do not have version metadata and transactions history for old parts, + /// so let's consider that such parts were created by some ancient transaction + /// and were committed with some prehistoric CSN. + /// NOTE It might be Case 3, but version metadata file is written on part creation before other files, + /// so it's not Case 3 if part is not broken. + version.setCreationTID(Tx::PrehistoricTID, nullptr); + version.creation_csn = Tx::PrehistoricCSN; + return; + } + + /// Case 2. + /// Content of *.tmp file may be broken, just use fake TID. + /// Transaction was not committed if *.tmp file was not renamed, so we should complete rollback by removing part. + version.setCreationTID(Tx::DummyTID, nullptr); + version.creation_csn = Tx::RolledBackCSN; + remove_tmp_file(); +} +catch (Exception & e) +{ + e.addMessage("While loading version metadata from table {} part {}", storage.getStorageID().getNameForLogs(), name); + throw; +} + +bool IMergeTreeDataPart::wasInvolvedInTransaction() const +{ + assert(!version.creation_tid.isEmpty() || (state == State::Temporary /* && std::uncaught_exceptions() */)); + bool created_by_transaction = !version.creation_tid.isPrehistoric(); + bool removed_by_transaction = version.isRemovalTIDLocked() && version.removal_tid_lock != Tx::PrehistoricTID.getHash(); + return created_by_transaction || removed_by_transaction; +} + +bool IMergeTreeDataPart::assertHasValidVersionMetadata() const +{ + /// We don't have many tests with server restarts and it's really inconvenient to write such tests. + /// So we use debug assertions to ensure that part version is written correctly. + /// This method is not supposed to be called in release builds. + + if (isProjectionPart()) + return true; + + if (!wasInvolvedInTransaction()) + return true; + + if (!isStoredOnDisk()) + return false; + + if (part_is_probably_removed_from_disk) + return true; + + DiskPtr disk = volume->getDisk(); + if (!disk->exists(getFullRelativePath())) + return true; + + String content; + String version_file_name = fs::path(getFullRelativePath()) / TXN_VERSION_METADATA_FILE_NAME; + try + { + auto buf = openForReading(disk, version_file_name); + readStringUntilEOF(content, *buf); + ReadBufferFromString str_buf{content}; + VersionMetadata file; + file.read(str_buf); + bool valid_creation_tid = version.creation_tid == file.creation_tid; + bool valid_removal_tid = version.removal_tid == file.removal_tid || version.removal_tid == Tx::PrehistoricTID; + bool valid_creation_csn = version.creation_csn == file.creation_csn || version.creation_csn == Tx::RolledBackCSN; + bool valid_removal_csn = version.removal_csn == file.removal_csn || version.removal_csn == Tx::PrehistoricCSN; + if (!valid_creation_tid || !valid_removal_tid || !valid_creation_csn || !valid_removal_csn) + throw Exception(ErrorCodes::CORRUPTED_DATA, "Invalid version metadata file"); + return true; + } + catch (...) + { + WriteBufferFromOwnString expected; + version.write(expected); + tryLogCurrentException(storage.log, fmt::format("File {} contains:\n{}\nexpected:\n{}", version_file_name, content, expected.str())); + return false; + } +} + + void IMergeTreeDataPart::appendFilesOfColumns(Strings & files) { files.push_back("columns.txt"); @@ -1337,6 +1561,9 @@ void IMergeTreeDataPart::initializePartMetadataManager() void IMergeTreeDataPart::remove() const { + assert(assertHasValidVersionMetadata()); + part_is_probably_removed_from_disk = true; + std::optional keep_shared_data = keepSharedDataInDecoupledStorage(); if (!keep_shared_data.has_value()) return; @@ -1444,6 +1671,7 @@ void IMergeTreeDataPart::remove() const request.emplace_back(fs::path(to) / DEFAULT_COMPRESSION_CODEC_FILE_NAME, true); request.emplace_back(fs::path(to) / DELETE_ON_DESTROY_MARKER_FILE_NAME, true); + request.emplace_back(fs::path(to) / TXN_VERSION_METADATA_FILE_NAME, true); disk->removeSharedFiles(request, *keep_shared_data); disk->removeDirectory(to); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 694be50d168..19df88c5466 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,7 @@ class IMergeTreeReader; class IMergeTreeDataPartWriter; class MarkCache; class UncompressedCache; +class MergeTreeTransaction; /// Description of the data part. class IMergeTreeDataPart : public std::enable_shared_from_this @@ -327,6 +329,8 @@ public: CompressionCodecPtr default_codec; + mutable VersionMetadata version; + /// For data in RAM ('index') UInt64 getIndexSizeInBytes() const; UInt64 getIndexSizeInAllocatedBytes() const; @@ -414,6 +418,8 @@ public: /// (number of rows, number of rows with default values, etc). static inline constexpr auto SERIALIZATION_FILE_NAME = "serialization.json"; + static inline constexpr auto TXN_VERSION_METADATA_FILE_NAME = "txn_version.txt"; + /// One of part files which is used to check how many references (I'd like /// to say hardlinks, but it will confuse even more) we have for the part /// for zero copy replication. Sadly it's very complex. @@ -435,12 +441,38 @@ public: /// Required for distinguish different copies of the same part on remote FS. String getUniqueId() const; + /// Ensures that creation_tid was correctly set after part creation. + void assertHasVersionMetadata(MergeTreeTransaction * txn) const; + + /// [Re]writes file with transactional metadata on disk + void storeVersionMetadata() const; + + /// Appends the corresponding CSN to file on disk (without fsync) + void appendCSNToVersionMetadata(VersionMetadata::WhichCSN which_csn) const; + + /// Appends removal TID to file on disk (with fsync) + void appendRemovalTIDToVersionMetadata(bool clear = false) const; + + /// Loads transactional metadata from disk + void loadVersionMetadata() const; + + /// Returns true if part was created or removed by a transaction + bool wasInvolvedInTransaction() const; + + /// Moar hardening: this method is supposed to be used for debug assertions + bool assertHasValidVersionMetadata() const; + + /// Return hardlink count for part. + /// Required for keep data on remote FS when part has shadow copies. + UInt32 getNumberOfRefereneces() const; + /// Get checksums of metadata file in part directory IMergeTreeDataPart::uint128 getActualChecksumByFile(const String & file_path) const; /// Check metadata in cache is consistent with actual metadata on disk(if use_metadata_cache is true) std::unordered_map checkMetadata() const; + protected: /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk @@ -551,6 +583,9 @@ private: CompressionCodecPtr detectDefaultCompressionCodec() const; mutable State state{State::Temporary}; + + /// This ugly flag is needed for debug assertions only + mutable bool part_is_probably_removed_from_disk = false; }; using MergeTreeDataPartState = IMergeTreeDataPart::State; diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 68ffb42a90a..9459849b90a 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -227,7 +227,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() future_merged_part, settings); - transaction_ptr = std::make_unique(storage); + transaction_ptr = std::make_unique(storage, NO_TRANSACTION_RAW); stopwatch_ptr = std::make_unique(); merge_task = storage.merger_mutator.mergePartsToTemporaryPart( @@ -241,7 +241,8 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() reserved_space, entry.deduplicate, entry.deduplicate_by_columns, - storage.merging_params); + storage.merging_params, + NO_TRANSACTION_PTR); /// Adjust priority @@ -264,7 +265,7 @@ bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrite /// Task is not needed merge_task.reset(); - storage.merger_mutator.renameMergedTemporaryPart(part, parts, transaction_ptr.get()); + storage.merger_mutator.renameMergedTemporaryPart(part, parts, NO_TRANSACTION_PTR, transaction_ptr.get()); try { diff --git a/src/Storages/MergeTree/MergeMutateSelectedEntry.h b/src/Storages/MergeTree/MergeMutateSelectedEntry.h index 64136205157..c420cbca12b 100644 --- a/src/Storages/MergeTree/MergeMutateSelectedEntry.h +++ b/src/Storages/MergeTree/MergeMutateSelectedEntry.h @@ -39,10 +39,13 @@ struct MergeMutateSelectedEntry FutureMergedMutatedPartPtr future_part; CurrentlyMergingPartsTaggerPtr tagger; MutationCommandsConstPtr commands; - MergeMutateSelectedEntry(FutureMergedMutatedPartPtr future_part_, CurrentlyMergingPartsTaggerPtr tagger_, MutationCommandsConstPtr commands_) + MergeTreeTransactionPtr txn; + MergeMutateSelectedEntry(FutureMergedMutatedPartPtr future_part_, CurrentlyMergingPartsTaggerPtr tagger_, + MutationCommandsConstPtr commands_, const MergeTreeTransactionPtr & txn_ = NO_TRANSACTION_PTR) : future_part(future_part_) , tagger(std::move(tagger_)) , commands(commands_) + , txn(txn_) {} }; diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index 14e43b2897e..0146ce4c7b3 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -107,14 +107,15 @@ void MergePlainMergeTreeTask::prepare() merge_mutate_entry->tagger->reserved_space, deduplicate, deduplicate_by_columns, - storage.merging_params); + storage.merging_params, + txn); } void MergePlainMergeTreeTask::finish() { new_part = merge_task->getFuture().get(); - storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, nullptr); + storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, nullptr); write_part_log({}); } diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h index 2c36386d32d..0f6d38d2cbf 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h @@ -39,6 +39,12 @@ public: StorageID getStorageID() override; UInt64 getPriority() override { return priority; } + void setCurrentTransaction(MergeTreeTransactionHolder && txn_holder_, MergeTreeTransactionPtr && txn_) + { + txn_holder = std::move(txn_holder_); + txn = std::move(txn_); + } + private: void prepare(); @@ -73,6 +79,9 @@ private: std::function write_part_log; IExecutableTask::TaskResultCallback task_result_callback; MergeTaskPtr merge_task{nullptr}; + + MergeTreeTransactionHolder txn_holder; + MergeTreeTransactionPtr txn; }; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index e8241ffe080..e3146f169a7 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -230,7 +230,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() case MergeAlgorithm::Vertical : { ctx->rows_sources_file = createTemporaryFile(ctx->tmp_disk->getPath()); - ctx->rows_sources_uncompressed_write_buf = ctx->tmp_disk->writeFile(fileName(ctx->rows_sources_file->path())); + ctx->rows_sources_uncompressed_write_buf = ctx->tmp_disk->writeFile(fileName(ctx->rows_sources_file->path()), DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, global_ctx->context->getWriteSettings()); ctx->rows_sources_write_buf = std::make_unique(*ctx->rows_sources_uncompressed_write_buf); MergeTreeDataPartInMemory::ColumnToSize local_merged_column_to_size; @@ -260,8 +260,10 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->merging_columns, MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()), ctx->compression_codec, + global_ctx->txn, /*reset_columns=*/ true, - ctx->blocks_are_granules_size); + ctx->blocks_are_granules_size, + global_ctx->context->getWriteSettings()); global_ctx->rows_written = 0; ctx->initial_reservation = global_ctx->space_reservation ? global_ctx->space_reservation->getSize() : 0; @@ -592,6 +594,7 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c projection_merging_params, global_ctx->new_data_part.get(), ".proj", + NO_TRANSACTION_PTR, global_ctx->data, global_ctx->mutator, global_ctx->merges_blocker, diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 04da9ad77c4..efab102bfe6 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -60,6 +60,7 @@ public: MergeTreeData::MergingParams merging_params_, const IMergeTreeDataPart * parent_part_, String suffix_, + MergeTreeTransactionPtr txn, MergeTreeData * data_, MergeTreeDataMergerMutator * mutator_, ActionBlocker * merges_blocker_, @@ -83,6 +84,7 @@ public: global_ctx->mutator = std::move(mutator_); global_ctx->merges_blocker = std::move(merges_blocker_); global_ctx->ttl_merges_blocker = std::move(ttl_merges_blocker_); + global_ctx->txn = std::move(txn); auto prepare_stage_ctx = std::make_shared(); @@ -164,6 +166,8 @@ private: std::promise promise{}; IMergedBlockOutputStream::WrittenOffsetColumns written_offset_columns{}; + + MergeTreeTransactionPtr txn; }; using GlobalRuntimeContextPtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 478c1570a23..a541822d6c1 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -30,8 +30,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -49,8 +51,6 @@ #include #include #include -#include -#include #include #include #include @@ -305,7 +305,7 @@ MergeTreeData::MergeTreeData( format_version = min_format_version; if (!version_file.second->isReadOnly()) { - auto buf = version_file.second->writeFile(version_file.first); + auto buf = version_file.second->writeFile(version_file.first, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, context_->getWriteSettings()); writeIntText(format_version.toUnderType(), *buf); if (getContext()->getSettingsRef().fsync_metadata) buf->sync(); @@ -1320,51 +1320,155 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) for (auto & part : duplicate_parts_to_remove) part->remove(); + auto deactivate_part = [&] (DataPartIteratorByStateAndInfo it) + { + + (*it)->remove_time.store((*it)->modification_time, std::memory_order_relaxed); + auto creation_csn = (*it)->version.creation_csn.load(std::memory_order_relaxed); + if (creation_csn != Tx::RolledBackCSN && creation_csn != Tx::PrehistoricCSN && !(*it)->version.isRemovalTIDLocked()) + { + /// It's possible that covering part was created without transaction, + /// but if covered part was created with transaction (i.e. creation_tid is not prehistoric), + /// then it must have removal tid in metadata file. + throw Exception(ErrorCodes::LOGICAL_ERROR, "Data part {} is Outdated and has creation TID {} and CSN {}, " + "but does not have removal tid. It's a bug or a result of manual intervention.", + (*it)->name, (*it)->version.creation_tid, creation_csn); + } + modifyPartState(it, DataPartState::Outdated); + removePartContributionToDataVolume(*it); + }; + + /// All parts are in "Active" state after loading + assert(std::find_if(data_parts_by_state_and_info.begin(), data_parts_by_state_and_info.end(), + [](const auto & part) + { + return part->getState() != DataPartState::Active; + }) == data_parts_by_state_and_info.end()); + + bool have_parts_with_version_metadata = false; + auto iter = data_parts_by_state_and_info.begin(); + while (iter != data_parts_by_state_and_info.end() && (*iter)->getState() == DataPartState::Active) + { + const DataPartPtr & part = *iter; + part->loadVersionMetadata(); + VersionMetadata & version = part->version; + if (part->wasInvolvedInTransaction()) + { + have_parts_with_version_metadata = true; + } + else + { + ++iter; + continue; + } + + /// Check if CSNs were witten after committing transaction, update and write if needed. + bool version_updated = false; + assert(!version.creation_tid.isEmpty()); + if (!part->version.creation_csn) + { + auto min = TransactionLog::getCSN(version.creation_tid); + if (!min) + { + /// Transaction that created this part was not committed. Remove part. + TransactionLog::assertTIDIsNotOutdated(version.creation_tid); + min = Tx::RolledBackCSN; + } + LOG_TRACE(log, "Will fix version metadata of {} after unclean restart: part has creation_tid={}, setting creation_csn={}", + part->name, version.creation_tid, min); + version.creation_csn = min; + version_updated = true; + } + if (!version.removal_tid.isEmpty() && !part->version.removal_csn) + { + auto max = TransactionLog::getCSN(version.removal_tid); + if (max) + { + LOG_TRACE(log, "Will fix version metadata of {} after unclean restart: part has removal_tid={}, setting removal_csn={}", + part->name, version.removal_tid, max); + version.removal_csn = max; + } + else + { + TransactionLog::assertTIDIsNotOutdated(version.removal_tid); + /// Transaction that tried to remove this part was not committed. Clear removal_tid. + LOG_TRACE(log, "Will fix version metadata of {} after unclean restart: clearing removal_tid={}", + part->name, version.removal_tid); + version.unlockRemovalTID(version.removal_tid, TransactionInfoContext{getStorageID(), part->name}); + } + version_updated = true; + } + + /// Sanity checks + bool csn_order = !version.removal_csn || version.creation_csn <= version.removal_csn || version.removal_csn == Tx::PrehistoricCSN; + bool min_start_csn_order = version.creation_tid.start_csn <= version.creation_csn; + bool max_start_csn_order = version.removal_tid.start_csn <= version.removal_csn; + bool creation_csn_known = version.creation_csn; + if (!csn_order || !min_start_csn_order || !max_start_csn_order || !creation_csn_known) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} has invalid version metadata: {}", part->name, version.toString()); + + if (version_updated) + part->storeVersionMetadata(); + + /// Deactivate part if creation was not committed or if removal was. + if (version.creation_csn == Tx::RolledBackCSN || version.removal_csn) + { + auto next_it = std::next(iter); + deactivate_part(iter); + iter = next_it; + } + else + { + ++iter; + } + } + + if (have_parts_with_version_metadata) + transactions_enabled.store(true); + /// Delete from the set of current parts those parts that are covered by another part (those parts that /// were merged), but that for some reason are still not deleted from the filesystem. /// Deletion of files will be performed later in the clearOldParts() method. - if (data_parts_indexes.size() >= 2) + auto active_parts_range = getDataPartsStateRange(DataPartState::Active); + auto prev_it = active_parts_range.begin(); + auto end_it = active_parts_range.end(); + + bool less_than_two_active_parts = prev_it == end_it || std::next(prev_it) == end_it; + + if (!less_than_two_active_parts) { - /// Now all parts are committed, so data_parts_by_state_and_info == committed_parts_range - auto prev_jt = data_parts_by_state_and_info.begin(); - auto curr_jt = std::next(prev_jt); + (*prev_it)->assertState({DataPartState::Active}); + auto curr_it = std::next(prev_it); - auto deactivate_part = [&] (DataPartIteratorByStateAndInfo it) + while (curr_it != data_parts_by_state_and_info.end() && (*curr_it)->getState() == DataPartState::Active) { - (*it)->remove_time.store((*it)->modification_time, std::memory_order_relaxed); - modifyPartState(it, DataPartState::Outdated); - removePartContributionToDataVolume(*it); - }; + (*curr_it)->assertState({DataPartState::Active}); - (*prev_jt)->assertState({DataPartState::Active}); - - while (curr_jt != data_parts_by_state_and_info.end() && (*curr_jt)->getState() == DataPartState::Active) - { /// Don't consider data parts belonging to different partitions. - if ((*curr_jt)->info.partition_id != (*prev_jt)->info.partition_id) + if ((*curr_it)->info.partition_id != (*prev_it)->info.partition_id) { - ++prev_jt; - ++curr_jt; + ++prev_it; + ++curr_it; continue; } - if ((*curr_jt)->contains(**prev_jt)) + if ((*curr_it)->contains(**prev_it)) { - deactivate_part(prev_jt); - prev_jt = curr_jt; - ++curr_jt; + deactivate_part(prev_it); + prev_it = curr_it; + ++curr_it; } - else if ((*prev_jt)->contains(**curr_jt)) + else if ((*prev_it)->contains(**curr_it)) { - auto next = std::next(curr_jt); - deactivate_part(curr_jt); - curr_jt = next; + auto next = std::next(curr_it); + deactivate_part(curr_it); + curr_it = next; } else { - ++prev_jt; - ++curr_jt; + ++prev_it; + ++curr_it; } } } @@ -1482,12 +1586,20 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) { const DataPartPtr & part = *it; + /// Do not remove outdated part if it may be visible for some transaction + if (!part->version.canBeRemoved()) + continue; + auto part_remove_time = part->remove_time.load(std::memory_order_relaxed); - if (part.unique() && /// Grab only parts that are not used by anyone (SELECTs for example). - ((part_remove_time < now && - now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds()) || force - || isInMemoryPart(part))) /// Remove in-memory parts immediately to not store excessive data in RAM + /// Grab only parts that are not used by anyone (SELECTs for example). + if (!part.unique()) + continue; + + if ((part_remove_time < now && now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds()) + || force + || isInMemoryPart(part) /// Remove in-memory parts immediately to not store excessive data in RAM + || (part->version.creation_csn == Tx::RolledBackCSN && getSettings()->remove_rolled_back_parts_immediately)) { parts_to_delete.emplace_back(it); } @@ -1578,7 +1690,7 @@ void MergeTreeData::flushAllInMemoryPartsIfNeeded() return; auto metadata_snapshot = getInMemoryMetadataPtr(); - DataPartsVector parts = getDataPartsVector(); + DataPartsVector parts = getDataPartsVectorForInternalUsage(); for (const auto & part : parts) { if (auto part_in_memory = asInMemoryPart(part)) @@ -1641,7 +1753,7 @@ void MergeTreeData::clearPartsFromFilesystem(const DataPartsVector & parts_to_re size_t MergeTreeData::clearOldWriteAheadLogs() { - DataPartsVector parts = getDataPartsVector(); + DataPartsVector parts = getDataPartsVectorForInternalUsage(); std::vector> all_block_numbers_on_disk; std::vector> block_numbers_on_disk; @@ -1706,14 +1818,19 @@ size_t MergeTreeData::clearEmptyParts() return 0; size_t cleared_count = 0; - auto parts = getDataPartsVector(); + auto parts = getDataPartsVectorForInternalUsage(); for (const auto & part : parts) { - if (part->rows_count == 0) - { - dropPartNoWaitNoThrow(part->name); - ++cleared_count; - } + if (part->rows_count != 0) + continue; + + /// Do not try to drop uncommitted parts. + if (!part->version.getCreationTID().isPrehistoric() && !part->version.isVisible(TransactionLog::instance().getLatestSnapshot())) + continue; + + LOG_TRACE(log, "Will drop empty part {}", part->name); + dropPartNoWaitNoThrow(part->name); + ++cleared_count; } return cleared_count; } @@ -2215,7 +2332,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context } } - for (const auto & part : getDataPartsVector()) + for (const auto & part : getDataPartsVectorForInternalUsage()) { bool at_least_one_column_rest = false; for (const auto & column : part->getColumns()) @@ -2500,6 +2617,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( bool MergeTreeData::renameTempPartAndAdd( MutableDataPartPtr & part, + MergeTreeTransaction * txn, SimpleIncrement * increment, Transaction * out_transaction, MergeTreeDeduplicationLog * deduplication_log, @@ -2512,7 +2630,7 @@ bool MergeTreeData::renameTempPartAndAdd( DataPartsVector covered_parts; { auto lock = lockParts(); - if (!renameTempPartAndReplace(part, increment, out_transaction, lock, &covered_parts, deduplication_log, deduplication_token)) + if (!renameTempPartAndReplace(part, txn, increment, out_transaction, lock, &covered_parts, deduplication_log, deduplication_token)) return false; } if (!covered_parts.empty()) @@ -2525,6 +2643,7 @@ bool MergeTreeData::renameTempPartAndAdd( bool MergeTreeData::renameTempPartAndReplace( MutableDataPartPtr & part, + MergeTreeTransaction * txn, SimpleIncrement * increment, Transaction * out_transaction, std::unique_lock & lock, @@ -2536,6 +2655,9 @@ bool MergeTreeData::renameTempPartAndReplace( throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", ErrorCodes::LOGICAL_ERROR); + if (txn) + transactions_enabled.store(true); + part->assertState({DataPartState::Temporary}); MergeTreePartInfo part_info = part->info; @@ -2578,7 +2700,6 @@ bool MergeTreeData::renameTempPartAndReplace( DataPartPtr covering_part; DataPartsVector covered_parts = getActivePartsToReplace(part_info, part_name, covering_part, lock); - DataPartsVector covered_parts_in_memory; if (covering_part) { @@ -2614,6 +2735,9 @@ bool MergeTreeData::renameTempPartAndReplace( part->renameTo(part_name, true); auto part_it = data_parts_indexes.insert(part).first; + /// FIXME Transactions: it's not the best place for checking and setting removal_tid, + /// because it's too optimistic. We should lock removal_tid of covered parts at the beginning of operation. + MergeTreeTransaction::addNewPartAndRemoveCovered(shared_from_this(), part, covered_parts, txn); if (out_transaction) { @@ -2669,7 +2793,8 @@ bool MergeTreeData::renameTempPartAndReplace( } MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( - MutableDataPartPtr & part, SimpleIncrement * increment, Transaction * out_transaction, MergeTreeDeduplicationLog * deduplication_log) + MutableDataPartPtr & part, MergeTreeTransaction * txn, SimpleIncrement * increment, + Transaction * out_transaction, MergeTreeDeduplicationLog * deduplication_log) { if (out_transaction && &out_transaction->data != this) throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", @@ -2678,18 +2803,25 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( DataPartsVector covered_parts; { auto lock = lockParts(); - renameTempPartAndReplace(part, increment, out_transaction, lock, &covered_parts, deduplication_log); + renameTempPartAndReplace(part, txn, increment, out_transaction, lock, &covered_parts, deduplication_log); } return covered_parts; } -void MergeTreeData::removePartsFromWorkingSet(const MergeTreeData::DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock) +void MergeTreeData::removePartsFromWorkingSet(MergeTreeTransaction * txn, const MergeTreeData::DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock) + { + if (txn) + transactions_enabled.store(true); + auto remove_time = clear_without_timeout ? 0 : time(nullptr); bool removed_active_part = false; for (const DataPartPtr & part : remove) { + if (part->version.creation_csn != Tx::RolledBackCSN) + MergeTreeTransaction::removeOldPart(shared_from_this(), part, txn); + if (part->getState() == IMergeTreeDataPart::State::Active) { removePartContributionToColumnAndSecondaryIndexSizes(part); @@ -2714,7 +2846,6 @@ void MergeTreeData::removePartsFromWorkingSet(const MergeTreeData::DataPartsVect void MergeTreeData::removePartsFromWorkingSetImmediatelyAndSetTemporaryState(const DataPartsVector & remove) { auto lock = lockParts(); - bool removed_active_part = false; for (const auto & part : remove) { @@ -2722,19 +2853,16 @@ void MergeTreeData::removePartsFromWorkingSetImmediatelyAndSetTemporaryState(con if (it_part == data_parts_by_info.end()) throw Exception("Part " + part->getNameWithState() + " not found in data_parts", ErrorCodes::LOGICAL_ERROR); - if (part->getState() == IMergeTreeDataPart::State::Active) - removed_active_part = true; + assert(part->getState() == IMergeTreeDataPart::State::PreActive); modifyPartState(part, IMergeTreeDataPart::State::Temporary); /// Erase immediately data_parts_indexes.erase(it_part); } - - if (removed_active_part) - resetObjectColumnsFromActiveParts(lock); } -void MergeTreeData::removePartsFromWorkingSet(const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock * acquired_lock) +void MergeTreeData::removePartsFromWorkingSet( + MergeTreeTransaction * txn, const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock * acquired_lock) { auto lock = (acquired_lock) ? DataPartsLock() : lockParts(); @@ -2746,11 +2874,12 @@ void MergeTreeData::removePartsFromWorkingSet(const DataPartsVector & remove, bo part->assertState({DataPartState::PreActive, DataPartState::Active, DataPartState::Outdated}); } - removePartsFromWorkingSet(remove, clear_without_timeout, lock); + removePartsFromWorkingSet(txn, remove, clear_without_timeout, lock); } -MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(const MergeTreePartInfo & drop_range, bool clear_without_timeout, - DataPartsLock & lock) +MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet( + MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, + bool clear_without_timeout, DataPartsLock & lock) { DataPartsVector parts_to_remove; @@ -2815,15 +2944,34 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(c part->name, drop_range.getPartName()); } - if (part->getState() != DataPartState::Deleting) - parts_to_remove.emplace_back(part); + if (part->getState() == DataPartState::Deleting) + continue; + + /// FIXME refactor removePartsFromWorkingSet(...), do not remove parts twice + if (txn) + { + if (!part->version.isVisible(*txn)) + continue; + } + + parts_to_remove.emplace_back(part); } - removePartsFromWorkingSet(parts_to_remove, clear_without_timeout, lock); + removePartsFromWorkingSet(txn, parts_to_remove, clear_without_timeout, lock); return parts_to_remove; } +void MergeTreeData::restoreAndActivatePart(const DataPartPtr & part, DataPartsLock * acquired_lock) +{ + auto lock = (acquired_lock) ? DataPartsLock() : lockParts(); //-V1018 + if (part->getState() == DataPartState::Active) + return; + addPartContributionToColumnAndSecondaryIndexSizes(part); + addPartContributionToDataVolume(part); + modifyPartState(part, DataPartState::Active); +} + void MergeTreeData::forgetPartAndMoveToDetached(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix, bool restore_covered) { if (prefix.empty()) @@ -3238,9 +3386,23 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & return getActiveContainingPart(part_info); } -MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorInPartition(MergeTreeData::DataPartState state, const String & partition_id) const +MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartition(ContextPtr local_context, const String & partition_id) const { - DataPartStateAndPartitionID state_with_partition{state, partition_id}; + if (const auto * txn = local_context->getCurrentTransaction().get()) + { + DataPartStateAndPartitionID active_parts{MergeTreeDataPartState::Active, partition_id}; + DataPartStateAndPartitionID outdated_parts{MergeTreeDataPartState::Outdated, partition_id}; + DataPartsVector res; + { + auto lock = lockParts(); + res.insert(res.end(), data_parts_by_state_and_info.lower_bound(active_parts), data_parts_by_state_and_info.upper_bound(active_parts)); + res.insert(res.end(), data_parts_by_state_and_info.lower_bound(outdated_parts), data_parts_by_state_and_info.upper_bound(outdated_parts)); + } + filterVisibleDataParts(res, txn->getSnapshot(), txn->tid); + return res; + } + + DataPartStateAndPartitionID state_with_partition{MergeTreeDataPartState::Active, partition_id}; auto lock = lockParts(); return DataPartsVector( @@ -3248,19 +3410,37 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorInPartition(Merg data_parts_by_state_and_info.upper_bound(state_with_partition)); } -MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorInPartitions(MergeTreeData::DataPartState state, const std::unordered_set & partition_ids) const +MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartitions(ContextPtr local_context, const std::unordered_set & partition_ids) const { - auto lock = lockParts(); + auto txn = local_context->getCurrentTransaction(); DataPartsVector res; - for (const auto & partition_id : partition_ids) { - DataPartStateAndPartitionID state_with_partition{state, partition_id}; - insertAtEnd( - res, - DataPartsVector( - data_parts_by_state_and_info.lower_bound(state_with_partition), - data_parts_by_state_and_info.upper_bound(state_with_partition))); + auto lock = lockParts(); + for (const auto & partition_id : partition_ids) + { + DataPartStateAndPartitionID active_parts{MergeTreeDataPartState::Active, partition_id}; + insertAtEnd( + res, + DataPartsVector( + data_parts_by_state_and_info.lower_bound(active_parts), + data_parts_by_state_and_info.upper_bound(active_parts))); + + if (txn) + { + DataPartStateAndPartitionID outdated_parts{MergeTreeDataPartState::Active, partition_id}; + + insertAtEnd( + res, + DataPartsVector( + data_parts_by_state_and_info.lower_bound(outdated_parts), + data_parts_by_state_and_info.upper_bound(outdated_parts))); + } + } } + + if (txn) + filterVisibleDataParts(res, txn->getSnapshot(), txn->tid); + return res; } @@ -3292,6 +3472,8 @@ static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part) part->loadColumnsChecksumsIndexes(false, true); part->modification_time = disk->getLastModified(full_part_path).epochTime(); + disk->removeFileIfExists(fs::path(full_part_path) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME); + disk->removeFileIfExists(fs::path(full_part_path) / IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME); } void MergeTreeData::calculateColumnAndSecondaryIndexSizesImpl() @@ -3398,16 +3580,16 @@ void MergeTreeData::checkAlterPartitionIsPossible( } } -void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition) +void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition, ContextPtr local_context) { DataPartsVector parts_to_remove; const auto * partition_ast = partition->as(); if (partition_ast && partition_ast->all) - parts_to_remove = getDataPartsVector(); + parts_to_remove = getVisibleDataPartsVector(local_context); else { - const String partition_id = getPartitionIDFromQuery(partition, getContext()); - parts_to_remove = getDataPartsVectorInPartition(MergeTreeDataPartState::Active, partition_id); + const String partition_id = getPartitionIDFromQuery(partition, local_context); + parts_to_remove = getVisibleDataPartsVectorInPartition(local_context, partition_id); } UInt64 partition_size = 0; @@ -3446,7 +3628,7 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & throw Exception("Part " + partition_id + " is not exists or not active", ErrorCodes::NO_SUCH_DATA_PART); } else - parts = getDataPartsVectorInPartition(MergeTreeDataPartState::Active, partition_id); + parts = getVisibleDataPartsVectorInPartition(local_context, partition_id); auto disk = getStoragePolicy()->getDiskByName(name); parts.erase(std::remove_if(parts.begin(), parts.end(), [&](auto part_ptr) @@ -3488,7 +3670,7 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String throw Exception("Part " + partition_id + " is not exists or not active", ErrorCodes::NO_SUCH_DATA_PART); } else - parts = getDataPartsVectorInPartition(MergeTreeDataPartState::Active, partition_id); + parts = getVisibleDataPartsVectorInPartition(local_context, partition_id); auto volume = getStoragePolicy()->getVolumeByName(name); if (!volume) @@ -3560,7 +3742,7 @@ Pipe MergeTreeData::alterPartition( } else { - checkPartitionCanBeDropped(command.partition); + checkPartitionCanBeDropped(command.partition, query_context); dropPartition(command.partition, command.detach, query_context); } } @@ -3609,7 +3791,7 @@ Pipe MergeTreeData::alterPartition( case PartitionCommand::REPLACE_PARTITION: { if (command.replace) - checkPartitionCanBeDropped(command.partition); + checkPartitionCanBeDropped(command.partition, query_context); String from_database = query_context->resolveDatabase(command.from_database); auto from_storage = DatabaseCatalog::instance().getTable({from_database, command.from_table}, query_context); replacePartitionFrom(from_storage, command.partition, command.replace, query_context); @@ -3668,9 +3850,9 @@ BackupEntries MergeTreeData::backupData(ContextPtr local_context, const ASTs & p { DataPartsVector data_parts; if (partitions.empty()) - data_parts = getDataPartsVector(); + data_parts = getVisibleDataPartsVector(local_context); else - data_parts = getDataPartsVectorInPartitions(MergeTreeDataPartState::Active, getPartitionIDsFromQuery(partitions, local_context)); + data_parts = getVisibleDataPartsVectorInPartitions(local_context, getPartitionIDsFromQuery(partitions, local_context)); return backupDataParts(data_parts); } @@ -3798,17 +3980,20 @@ private: for (const String & filename : filenames) { - auto backup_entry = backup->readFile(data_path_in_backup + part_name + "/" + filename); + auto backup_entry = backup->readFile(fs::path(data_path_in_backup) / part_name / filename); auto read_buffer = backup_entry->getReadBuffer(); - auto write_buffer = disk->writeFile(temp_part_dir + "/" + filename); + auto write_buffer = disk->writeFile(fs::path(temp_part_dir) / filename); copyData(*read_buffer, *write_buffer); reservation->update(reservation->getSize() - backup_entry->getSize()); } auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); auto part = storage->createPart(part_name, part_info, single_disk_volume, relative_temp_part_dir); + /// TODO Transactions: Decide what to do with version metadata (if any). Let's just remove it for now. + disk->removeFileIfExists(fs::path(temp_part_dir) / IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME); + part->version.setCreationTID(Tx::PrehistoricTID, nullptr); part->loadColumnsChecksumsIndexes(false, true); - storage->renameTempPartAndAdd(part, increment); + storage->renameTempPartAndAdd(part, NO_TRANSACTION_RAW, increment); return {}; } @@ -3928,6 +4113,86 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc return partition_id; } + +DataPartsVector MergeTreeData::getVisibleDataPartsVector(ContextPtr local_context) const +{ + DataPartsVector res; + if (const auto * txn = local_context->getCurrentTransaction().get()) + { + res = getDataPartsVectorForInternalUsage({DataPartState::Active, DataPartState::Outdated}); + filterVisibleDataParts(res, txn->getSnapshot(), txn->tid); + } + else + { + res = getDataPartsVectorForInternalUsage(); + } + return res; +} + +DataPartsVector MergeTreeData::getVisibleDataPartsVectorUnlocked(ContextPtr local_context, const DataPartsLock & lock) const +{ + DataPartsVector res; + if (const auto * txn = local_context->getCurrentTransaction().get()) + { + res = getDataPartsVectorForInternalUsage({DataPartState::Active, DataPartState::Outdated}, lock); + filterVisibleDataParts(res, txn->getSnapshot(), txn->tid); + } + else + { + res = getDataPartsVectorForInternalUsage({DataPartState::Active}, lock); + } + return res; +} + +MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVector(const MergeTreeTransactionPtr & txn) const +{ + DataPartsVector res; + if (txn) + { + res = getDataPartsVectorForInternalUsage({DataPartState::Active, DataPartState::Outdated}); + filterVisibleDataParts(res, txn->getSnapshot(), txn->tid); + } + else + { + res = getDataPartsVectorForInternalUsage(); + } + return res; +} + +MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVector(CSN snapshot_version, TransactionID current_tid) const +{ + auto res = getDataPartsVectorForInternalUsage({DataPartState::Active, DataPartState::Outdated}); + filterVisibleDataParts(res, snapshot_version, current_tid); + return res; +} + +void MergeTreeData::filterVisibleDataParts(DataPartsVector & maybe_visible_parts, CSN snapshot_version, TransactionID current_tid) const +{ + [[maybe_unused]] size_t total_size = maybe_visible_parts.size(); + + auto need_remove_pred = [snapshot_version, ¤t_tid] (const DataPartPtr & part) -> bool + { + return !part->version.isVisible(snapshot_version, current_tid); + }; + + auto new_end_it = std::remove_if(maybe_visible_parts.begin(), maybe_visible_parts.end(), need_remove_pred); + maybe_visible_parts.erase(new_end_it, maybe_visible_parts.end()); + [[maybe_unused]] size_t visible_size = maybe_visible_parts.size(); + + + auto get_part_names = [&maybe_visible_parts]() -> Strings + { + Strings visible_part_names; + for (const auto & p : maybe_visible_parts) + visible_part_names.push_back(p->name); + return visible_part_names; + }; + + LOG_TEST(log, "Got {} parts (of {}) visible in snapshot {} (TID {}): {}", + visible_size, total_size, snapshot_version, current_tid, fmt::join(get_part_names(), ", ")); +} + + std::unordered_set MergeTreeData::getPartitionIDsFromQuery(const ASTs & asts, ContextPtr local_context) const { std::unordered_set partition_ids; @@ -3958,7 +4223,7 @@ std::set MergeTreeData::getPartitionIdsAffectedByCommands( } -MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorUnlocked( +MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorForInternalUsage( const DataPartStates & affordable_states, const DataPartsLock & /*lock*/, DataPartStateVector * out_states, @@ -4005,13 +4270,13 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorUnlocked( return res; } -MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector( +MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorForInternalUsage( const DataPartStates & affordable_states, DataPartStateVector * out_states, bool require_projection_parts) const { auto lock = lockParts(); - return getDataPartsVectorUnlocked(affordable_states, lock, out_states, require_projection_parts); + return getDataPartsVectorForInternalUsage(affordable_states, lock, out_states, require_projection_parts); } MergeTreeData::DataPartsVector @@ -4371,14 +4636,14 @@ MergeTreeData::DataParts MergeTreeData::getDataParts(const DataPartStates & affo return res; } -MergeTreeData::DataParts MergeTreeData::getDataParts() const +MergeTreeData::DataParts MergeTreeData::getDataPartsForInternalUsage() const { return getDataParts({DataPartState::Active}); } -MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector() const +MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorForInternalUsage() const { - return getDataPartsVector({DataPartState::Active}); + return getDataPartsVectorForInternalUsage({DataPartState::Active}); } MergeTreeData::DataPartPtr MergeTreeData::getAnyPartInPartition( @@ -4422,7 +4687,19 @@ void MergeTreeData::Transaction::rollback() buf << "."; LOG_DEBUG(data.log, "Undoing transaction.{}", buf.str()); - data.removePartsFromWorkingSet( + if (!txn) + { + auto lock = data.lockParts(); + for (const auto & part : precommitted_parts) + { + DataPartPtr covering_part; + DataPartsVector covered_parts = data.getActivePartsToReplace(part->info, part->name, covering_part, lock); + for (auto & covered : covered_parts) + covered->version.unlockRemovalTID(Tx::PrehistoricTID, TransactionInfoContext{data.getStorageID(), covered->name}); + } + } + + data.removePartsFromWorkingSet(txn, DataPartsVector(precommitted_parts.begin(), precommitted_parts.end()), /* clear_without_timeout = */ true); } @@ -5184,7 +5461,9 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (const StorageReplicatedMergeTree * replicated = dynamic_cast(this)) max_added_blocks = std::make_shared(replicated->getMaxAddedBlocks()); } - auto parts = getDataPartsVector(); + + const auto & snapshot_data = assert_cast(*storage_snapshot->data); + const auto & parts = snapshot_data.parts; // If minmax_count_projection is a valid candidate, check its completeness. if (minmax_count_projection_candidate) @@ -5411,7 +5690,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPartOnSameDisk( const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info, - const StorageMetadataPtr & metadata_snapshot) + const StorageMetadataPtr & metadata_snapshot, + const MergeTreeTransactionPtr & txn) { /// Check that the storage policy contains the disk where the src_part is located. bool does_storage_policy_allow_same_disk = false; @@ -5429,6 +5709,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPartOnSameDisk( ErrorCodes::BAD_ARGUMENTS); String dst_part_name = src_part->getNewName(dst_part_info); + assert(!tmp_part_prefix.empty()); String tmp_dst_part_name = tmp_part_prefix + dst_part_name; auto reservation = reserveSpace(src_part->getBytesOnDisk(), src_part->volume->getDisk()); @@ -5449,12 +5730,18 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPartOnSameDisk( } LOG_DEBUG(log, "Cloning part {} to {}", fullPath(disk, src_part_path), fullPath(disk, dst_part_path)); - localBackup(disk, src_part_path, dst_part_path); + localBackup(disk, src_part_path, dst_part_path, /* make_source_readonly */ false); disk->removeFileIfExists(fs::path(dst_part_path) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME); + disk->removeFileIfExists(fs::path(dst_part_path) / IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME); auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); auto dst_data_part = createPart(dst_part_name, dst_part_info, single_disk_volume, tmp_dst_part_name); + /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. + TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID; + dst_data_part->version.setCreationTID(tid, nullptr); + dst_data_part->storeVersionMetadata(); + dst_data_part->is_temp = true; dst_data_part->loadColumnsChecksumsIndexes(require_part_metadata, true); @@ -5503,7 +5790,7 @@ void MergeTreeData::reportBrokenPart(MergeTreeData::DataPartPtr & data_part) con if (data_part->volume && data_part->volume->getDisk()->isBroken()) { auto disk = data_part->volume->getDisk(); - auto parts = getDataParts(); + auto parts = getDataPartsForInternalUsage(); LOG_WARNING(log, "Scanning parts to recover on broken disk {}.", disk->getName() + "@" + disk->getPath()); for (const auto & part : parts) { @@ -5578,7 +5865,7 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher( const String shadow_path = "shadow/"; /// Acquire a snapshot of active data parts to prevent removing while doing backup. - const auto data_parts = getDataParts(); + const auto data_parts = getVisibleDataPartsVector(local_context); String backup_name = (!with_name.empty() ? escapeForFileName(with_name) : toString(increment)); String backup_path = fs::path(shadow_path) / backup_name / ""; @@ -6321,12 +6608,12 @@ void MergeTreeData::updateObjectColumns(const DataPartPtr & part, const DataPart DB::updateObjectColumns(object_columns, part->getColumns()); } -StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot) const +StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const { auto snapshot_data = std::make_unique(); auto lock = lockParts(); - snapshot_data->parts = getDataPartsVectorUnlocked({DataPartState::Active}, lock); + snapshot_data->parts = getVisibleDataPartsVectorUnlocked(query_context, lock); return std::make_shared(*this, metadata_snapshot, object_columns, std::move(snapshot_data)); } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 44736fe2cc5..5ce5f30f0dc 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -49,6 +49,7 @@ class MutationCommands; class Context; using PartitionIdToMaxBlock = std::unordered_map; struct JobAndPool; +class MergeTreeTransaction; struct ZeroCopyLock; /// Auxiliary struct holding information about the future merged or mutated part. @@ -247,7 +248,7 @@ public: class Transaction : private boost::noncopyable { public: - explicit Transaction(MergeTreeData & data_) : data(data_) {} + Transaction(MergeTreeData & data_, MergeTreeTransaction * txn_) : data(data_), txn(txn_) {} DataPartsVector commit(MergeTreeData::DataPartsLock * acquired_parts_lock = nullptr); @@ -276,6 +277,7 @@ public: friend class MergeTreeData; MergeTreeData & data; + MergeTreeTransaction * txn; DataParts precommitted_parts; void clear() { precommitted_parts.clear(); } @@ -436,7 +438,7 @@ public: DataPartsVector parts; }; - StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot) const override; + StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const override; /// Load the set of data parts from disk. Call once - immediately after the object is created. void loadDataParts(bool skip_sanity_checks); @@ -445,10 +447,11 @@ public: Int64 getMaxBlockNumber() const; + /// Returns a copy of the list so that the caller shouldn't worry about locks. DataParts getDataParts(const DataPartStates & affordable_states) const; - DataPartsVector getDataPartsVectorUnlocked( + DataPartsVector getDataPartsVectorForInternalUsage( const DataPartStates & affordable_states, const DataPartsLock & lock, DataPartStateVector * out_states = nullptr, @@ -456,31 +459,28 @@ public: /// Returns sorted list of the parts with specified states /// out_states will contain snapshot of each part state - DataPartsVector getDataPartsVector( - const DataPartStates & affordable_states, - DataPartStateVector * out_states = nullptr, - bool require_projection_parts = false) const; + DataPartsVector getDataPartsVectorForInternalUsage( + const DataPartStates & affordable_states, DataPartStateVector * out_states = nullptr, bool require_projection_parts = false) const; + /// Returns absolutely all parts (and snapshot of their states) DataPartsVector getAllDataPartsVector( DataPartStateVector * out_states = nullptr, bool require_projection_parts = false) const; - /// Returns all detached parts - DetachedPartsInfo getDetachedParts() const; + /// Returns parts in Active state + DataParts getDataPartsForInternalUsage() const; + DataPartsVector getDataPartsVectorForInternalUsage() const; - static void validateDetachedPartName(const String & name); + void filterVisibleDataParts(DataPartsVector & maybe_visible_parts, CSN snapshot_version, TransactionID current_tid) const; - void dropDetached(const ASTPtr & partition, bool part, ContextPtr local_context); + /// Returns parts that visible with current snapshot + DataPartsVector getVisibleDataPartsVector(ContextPtr local_context) const; + DataPartsVector getVisibleDataPartsVectorUnlocked(ContextPtr local_context, const DataPartsLock & lock) const; + DataPartsVector getVisibleDataPartsVector(const MergeTreeTransactionPtr & txn) const; + DataPartsVector getVisibleDataPartsVector(CSN snapshot_version, TransactionID current_tid) const; - MutableDataPartsVector tryLoadPartsToAttach(const ASTPtr & partition, bool attach_part, - ContextPtr context, PartsTemporaryRename & renamed_parts); - - /// Returns Active parts - DataParts getDataParts() const; - DataPartsVector getDataPartsVector() const; - - /// Returns a committed part with the given name or a part containing it. If there is no such part, returns nullptr. + /// Returns a part in Active state with the given name or a part containing it. If there is no such part, returns nullptr. DataPartPtr getActiveContainingPart(const String & part_name) const; DataPartPtr getActiveContainingPart(const MergeTreePartInfo & part_info) const; DataPartPtr getActiveContainingPart(const MergeTreePartInfo & part_info, DataPartState state, DataPartsLock & lock) const; @@ -490,8 +490,8 @@ public: void swapActivePart(MergeTreeData::DataPartPtr part_copy); /// Returns all parts in specified partition - DataPartsVector getDataPartsVectorInPartition(DataPartState state, const String & partition_id) const; - DataPartsVector getDataPartsVectorInPartitions(DataPartState state, const std::unordered_set & partition_ids) const; + DataPartsVector getVisibleDataPartsVectorInPartition(ContextPtr local_context, const String & partition_id) const; + DataPartsVector getVisibleDataPartsVectorInPartitions(ContextPtr local_context, const std::unordered_set & partition_ids) const; /// Returns the part with the given name and state or nullptr if no such part. DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states); @@ -511,6 +511,18 @@ public: /// Makes sense only for ordinary MergeTree engines because for them block numbering doesn't depend on partition. std::optional getMinPartDataVersion() const; + + /// Returns all detached parts + DetachedPartsInfo getDetachedParts() const; + + static void validateDetachedPartName(const String & name); + + void dropDetached(const ASTPtr & partition, bool part, ContextPtr context); + + MutableDataPartsVector tryLoadPartsToAttach(const ASTPtr & partition, bool attach_part, + ContextPtr context, PartsTemporaryRename & renamed_parts); + + /// If the table contains too many active parts, sleep for a while to give them time to merge. /// If until is non-null, wake up from the sleep earlier if the event happened. void delayInsertOrThrowIfNeeded(Poco::Event * until = nullptr) const; @@ -524,6 +536,7 @@ public: /// Returns true if part was added. Returns false if part is covered by bigger part. bool renameTempPartAndAdd( MutableDataPartPtr & part, + MergeTreeTransaction * txn, SimpleIncrement * increment = nullptr, Transaction * out_transaction = nullptr, MergeTreeDeduplicationLog * deduplication_log = nullptr, @@ -533,11 +546,14 @@ public: /// Returns all parts covered by the added part (in ascending order). /// If out_transaction == nullptr, marks covered parts as Outdated. DataPartsVector renameTempPartAndReplace( - MutableDataPartPtr & part, SimpleIncrement * increment = nullptr, Transaction * out_transaction = nullptr, MergeTreeDeduplicationLog * deduplication_log = nullptr); + MutableDataPartPtr & part, MergeTreeTransaction * txn, SimpleIncrement * increment = nullptr, + Transaction * out_transaction = nullptr, MergeTreeDeduplicationLog * deduplication_log = nullptr); /// Low-level version of previous one, doesn't lock mutex + /// FIXME Transactions: remove add_to_txn flag, maybe merge MergeTreeTransaction and Transaction bool renameTempPartAndReplace( MutableDataPartPtr & part, + MergeTreeTransaction * txn, SimpleIncrement * increment, Transaction * out_transaction, DataPartsLock & lock, @@ -554,15 +570,18 @@ public: /// Parts in add must already be in data_parts with PreActive, Active, or Outdated states. /// If clear_without_timeout is true, the parts will be deleted at once, or during the next call to /// clearOldParts (ignoring old_parts_lifetime). - void removePartsFromWorkingSet(const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock * acquired_lock = nullptr); - void removePartsFromWorkingSet(const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock); + void removePartsFromWorkingSet(MergeTreeTransaction * txn, const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock * acquired_lock = nullptr); + void removePartsFromWorkingSet(MergeTreeTransaction * txn, const DataPartsVector & remove, bool clear_without_timeout, DataPartsLock & acquired_lock); /// Removes all parts from the working set parts /// for which (partition_id = drop_range.partition_id && min_block >= drop_range.min_block && max_block <= drop_range.max_block). /// Used in REPLACE PARTITION command; - DataPartsVector removePartsInRangeFromWorkingSet(const MergeTreePartInfo & drop_range, bool clear_without_timeout, + DataPartsVector removePartsInRangeFromWorkingSet(MergeTreeTransaction * txn, const MergeTreePartInfo & drop_range, bool clear_without_timeout, DataPartsLock & lock); + /// Restores Outdated part and adds it to working set + void restoreAndActivatePart(const DataPartPtr & part, DataPartsLock * acquired_lock = nullptr); + /// Renames the part to detached/_ and removes it from data_parts, //// so it will not be deleted in clearOldParts. /// If restore_covered is true, adds to the working set inactive parts, which were merged into the deleted part. @@ -697,7 +716,10 @@ public: /// Moves partition to specified Volume void movePartitionToVolume(const ASTPtr & partition, const String & name, bool moving_part, ContextPtr context); - void checkPartitionCanBeDropped(const ASTPtr & partition) override; + /// Checks that Partition could be dropped right now + /// Otherwise - throws an exception with detailed information. + /// We do not use mutex because it is not very important that the size could change during the operation. + void checkPartitionCanBeDropped(const ASTPtr & partition, ContextPtr local_context); void checkPartCanBeDropped(const String & part_name); @@ -743,7 +765,8 @@ public: MergeTreeData & checkStructureAndGetMergeTreeData(IStorage & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const; MergeTreeData::MutableDataPartPtr cloneAndLoadDataPartOnSameDisk( - const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info, const StorageMetadataPtr & metadata_snapshot); + const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info, + const StorageMetadataPtr & metadata_snapshot, const MergeTreeTransactionPtr & txn); virtual std::vector getMutationsStatus() const = 0; @@ -978,6 +1001,9 @@ protected: mutable std::shared_mutex pinned_part_uuids_mutex; PinnedPartUUIDsPtr pinned_part_uuids; + /// True if at least one part was created/removed with transaction. + mutable std::atomic_bool transactions_enabled = false; + /// Work with data parts struct TagByInfo{}; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 22a868f218e..31d52cfa8ff 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +54,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int ABORTED; } /// Do not start to merge parts, if free space is less than sum size of parts times specified coefficient. @@ -124,9 +127,70 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( size_t max_total_size_to_merge, const AllowedMergingPredicate & can_merge_callback, bool merge_with_ttl_allowed, + const MergeTreeTransactionPtr & txn, String * out_disable_reason) { - MergeTreeData::DataPartsVector data_parts = data.getDataPartsVector(); + MergeTreeData::DataPartsVector data_parts; + if (txn) + { + /// Merge predicate (for simple MergeTree) allows to merge two parts only if both parts are visible for merge transaction. + /// So at the first glance we could just get all active parts. + /// Active parts include uncommitted parts, but it's ok and merge predicate handles it. + /// However, it's possible that some transaction is trying to remove a part in the middle, for example, all_2_2_0. + /// If parts all_1_1_0 and all_3_3_0 are active and visible for merge transaction, then we would try to merge them. + /// But it's wrong, because all_2_2_0 may become active again if transaction will roll back. + /// That's why we must include some outdated parts into `data_part`, more precisely, such parts that removal is not committed. + MergeTreeData::DataPartsVector active_parts; + MergeTreeData::DataPartsVector outdated_parts; + + { + auto lock = data.lockParts(); + active_parts = data.getDataPartsVectorForInternalUsage({MergeTreeData::DataPartState::Active}, lock); + outdated_parts = data.getDataPartsVectorForInternalUsage({MergeTreeData::DataPartState::Outdated}, lock); + } + + ActiveDataPartSet active_parts_set{data.format_version}; + for (const auto & part : active_parts) + active_parts_set.add(part->name); + + for (const auto & part : outdated_parts) + { + /// We don't need rolled back parts. + /// NOTE When rolling back a transaction we set creation_csn to RolledBackCSN at first + /// and then remove part from working set, so there's no race condition + if (part->version.creation_csn == Tx::RolledBackCSN) + continue; + + /// We don't need parts that are finally removed. + /// NOTE There's a minor race condition: we may get UnknownCSN if a transaction has been just committed concurrently. + /// But it's not a problem if we will add such part to `data_parts`. + if (part->version.removal_csn != Tx::UnknownCSN) + continue; + + active_parts_set.add(part->name); + } + + /// Restore "active" parts set from selected active and outdated parts + auto remove_pred = [&](const MergeTreeData::DataPartPtr & part) -> bool + { + return active_parts_set.getContainingPart(part->info) != part->name; + }; + + auto new_end_it = std::remove_if(active_parts.begin(), active_parts.end(), remove_pred); + active_parts.erase(new_end_it, active_parts.end()); + + new_end_it = std::remove_if(outdated_parts.begin(), outdated_parts.end(), remove_pred); + outdated_parts.erase(new_end_it, outdated_parts.end()); + + std::merge(active_parts.begin(), active_parts.end(), + outdated_parts.begin(), outdated_parts.end(), + std::back_inserter(data_parts), MergeTreeData::LessDataPart()); + } + else + { + /// Simply get all active parts + data_parts = data.getDataPartsVectorForInternalUsage(); + } const auto data_settings = data.getSettings(); auto metadata_snapshot = data.getInMemoryMetadataPtr(); @@ -172,7 +236,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( * So we have to check if this part is currently being inserted with quorum and so on and so forth. * Obviously we have to check it manually only for the first part * of each partition because it will be automatically checked for a pair of parts. */ - if (!can_merge_callback(nullptr, part, nullptr)) + if (!can_merge_callback(nullptr, part, txn.get(), nullptr)) continue; /// This part can be merged only with next parts (no prev part exists), so start @@ -184,7 +248,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( { /// If we cannot merge with previous part we had to start new parts /// interval (in the same partition) - if (!can_merge_callback(*prev_part, part, nullptr)) + if (!can_merge_callback(*prev_part, part, txn.get(), nullptr)) { /// Now we have no previous part prev_part = nullptr; @@ -196,7 +260,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( /// for example, merge is already assigned for such parts, or they participate in quorum inserts /// and so on. /// Also we don't start new interval here (maybe all next parts cannot be merged and we don't want to have empty interval) - if (!can_merge_callback(nullptr, part, nullptr)) + if (!can_merge_callback(nullptr, part, txn.get(), nullptr)) continue; /// Starting new interval in the same partition @@ -307,6 +371,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti const String & partition_id, bool final, const StorageMetadataPtr & metadata_snapshot, + const MergeTreeTransactionPtr & txn, String * out_disable_reason, bool optimize_skip_merged_partitions) { @@ -343,7 +408,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti while (it != parts.end()) { /// For the case of one part, we check that it can be merged "with itself". - if ((it != parts.begin() || parts.size() == 1) && !can_merge(*prev_it, *it, out_disable_reason)) + if ((it != parts.begin() || parts.size() == 1) && !can_merge(*prev_it, *it, txn.get(), out_disable_reason)) { return SelectPartsDecision::CANNOT_SELECT; } @@ -390,7 +455,7 @@ MergeTreeData::DataPartsVector MergeTreeDataMergerMutator::selectAllPartsFromPar { MergeTreeData::DataPartsVector parts_from_partition; - MergeTreeData::DataParts data_parts = data.getDataParts(); + MergeTreeData::DataParts data_parts = data.getDataPartsForInternalUsage(); for (const auto & current_part : data_parts) { @@ -416,6 +481,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( bool deduplicate, const Names & deduplicate_by_columns, const MergeTreeData::MergingParams & merging_params, + const MergeTreeTransactionPtr & txn, const IMergeTreeDataPart * parent_part, const String & suffix) { @@ -432,6 +498,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( merging_params, parent_part, suffix, + txn, &data, this, &merges_blocker, @@ -446,6 +513,7 @@ MutateTaskPtr MergeTreeDataMergerMutator::mutatePartToTemporaryPart( MergeListEntry * merge_entry, time_t time_of_mutation, ContextPtr context, + const MergeTreeTransactionPtr & txn, ReservationSharedPtr space_reservation, TableLockHolder & holder) { @@ -458,6 +526,7 @@ MutateTaskPtr MergeTreeDataMergerMutator::mutatePartToTemporaryPart( context, space_reservation, holder, + txn, data, *this, merges_blocker @@ -508,10 +577,16 @@ MergeAlgorithm MergeTreeDataMergerMutator::chooseMergeAlgorithm( MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart( MergeTreeData::MutableDataPartPtr & new_data_part, const MergeTreeData::DataPartsVector & parts, + const MergeTreeTransactionPtr & txn, MergeTreeData::Transaction * out_transaction) { + /// Some of source parts was possibly created in transaction, so non-transactional merge may break isolation. + if (data.transactions_enabled.load(std::memory_order_relaxed) && !txn) + throw Exception(ErrorCodes::ABORTED, "Cancelling merge, because it was done without starting transaction," + "but transactions were enabled for this table"); + /// Rename new part, add to the set and remove original parts. - auto replaced_parts = data.renameTempPartAndReplace(new_data_part, nullptr, out_transaction); + auto replaced_parts = data.renameTempPartAndReplace(new_data_part, txn.get(), nullptr, out_transaction); /// Let's check that all original parts have been deleted and only them. if (replaced_parts.size() != parts.size()) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index e64c13ca6c3..9a60e4c6078 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -40,7 +40,10 @@ enum class ExecuteTTLType class MergeTreeDataMergerMutator { public: - using AllowedMergingPredicate = std::function; + using AllowedMergingPredicate = std::function; MergeTreeDataMergerMutator(MergeTreeData & data_, size_t max_tasks_count_); @@ -72,6 +75,7 @@ public: size_t max_total_size_to_merge, const AllowedMergingPredicate & can_merge, bool merge_with_ttl_allowed, + const MergeTreeTransactionPtr & txn, String * out_disable_reason = nullptr); /** Select all the parts in the specified partition for merge, if possible. @@ -85,6 +89,7 @@ public: const String & partition_id, bool final, const StorageMetadataPtr & metadata_snapshot, + const MergeTreeTransactionPtr & txn, String * out_disable_reason = nullptr, bool optimize_skip_merged_partitions = false); @@ -107,6 +112,7 @@ public: bool deduplicate, const Names & deduplicate_by_columns, const MergeTreeData::MergingParams & merging_params, + const MergeTreeTransactionPtr & txn, const IMergeTreeDataPart * parent_part = nullptr, const String & suffix = ""); @@ -118,12 +124,14 @@ public: MergeListEntry * merge_entry, time_t time_of_mutation, ContextPtr context, + const MergeTreeTransactionPtr & txn, ReservationSharedPtr space_reservation, TableLockHolder & table_lock_holder); MergeTreeData::DataPartPtr renameMergedTemporaryPart( MergeTreeData::MutableDataPartPtr & new_data_part, const MergeTreeData::DataPartsVector & parts, + const MergeTreeTransactionPtr & txn, MergeTreeData::Transaction * out_transaction = nullptr); diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index e4a174a7d29..b63d46ee463 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -91,7 +91,7 @@ void MergeTreeDataPartInMemory::flushToDisk(const String & base_path, const Stri auto compression_codec = storage.getContext()->chooseCompressionCodec(0, 0); auto indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices()); - MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, indices, compression_codec); + MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, indices, compression_codec, NO_TRANSACTION_PTR); out.write(block); const auto & projections = metadata_snapshot->getProjections(); for (const auto & [projection_name, projection] : projection_parts) @@ -122,7 +122,7 @@ void MergeTreeDataPartInMemory::flushToDisk(const String & base_path, const Stri auto projection_indices = MergeTreeIndexFactory::instance().getMany(desc.metadata->getSecondaryIndices()); MergedBlockOutputStream projection_out( projection_data_part, desc.metadata, projection_part->columns, projection_indices, - projection_compression_codec); + projection_compression_codec, NO_TRANSACTION_PTR); projection_out.write(projection_part->block); projection_out.finalizePart(projection_data_part, false); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index d7b8f2c4165..6caff7c683f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -24,12 +24,14 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( , plain_file(data_part->volume->getDisk()->writeFile( part_path + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION, settings.max_compress_block_size, - WriteMode::Rewrite)) + WriteMode::Rewrite, + settings_.query_write_settings)) , plain_hashing(*plain_file) , marks_file(data_part->volume->getDisk()->writeFile( part_path + MergeTreeDataPartCompact::DATA_FILE_NAME + marks_file_extension_, 4096, - WriteMode::Rewrite)) + WriteMode::Rewrite, + settings_.query_write_settings)) , marks(*marks_file) { const auto & storage_columns = metadata_snapshot->getColumns(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 8dca93f574f..6cba4db19e3 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -47,15 +47,16 @@ MergeTreeDataPartWriterOnDisk::Stream::Stream( const std::string & marks_path_, const std::string & marks_file_extension_, const CompressionCodecPtr & compression_codec_, - size_t max_compress_block_size_) : + size_t max_compress_block_size_, + const WriteSettings & query_write_settings) : escaped_column_name(escaped_column_name_), data_file_extension{data_file_extension_}, marks_file_extension{marks_file_extension_}, - plain_file(disk_->writeFile(data_path_ + data_file_extension, max_compress_block_size_, WriteMode::Rewrite)), + plain_file(disk_->writeFile(data_path_ + data_file_extension, max_compress_block_size_, WriteMode::Rewrite, query_write_settings)), plain_hashing(*plain_file), compressed_buf(plain_hashing, compression_codec_, max_compress_block_size_), compressed(compressed_buf), - marks_file(disk_->writeFile(marks_path_ + marks_file_extension, 4096, WriteMode::Rewrite)), marks(*marks_file) + marks_file(disk_->writeFile(marks_path_ + marks_file_extension, 4096, WriteMode::Rewrite, query_write_settings)), marks(*marks_file) { } @@ -156,7 +157,7 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() { if (metadata_snapshot->hasPrimaryKey()) { - index_file_stream = data_part->volume->getDisk()->writeFile(part_path + "primary.idx", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); + index_file_stream = data_part->volume->getDisk()->writeFile(part_path + "primary.idx", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings.query_write_settings); index_stream = std::make_unique(*index_file_stream); } } @@ -172,7 +173,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() data_part->volume->getDisk(), part_path + stream_name, index_helper->getSerializedFileExtension(), part_path + stream_name, marks_file_extension, - default_codec, settings.max_compress_block_size)); + default_codec, settings.max_compress_block_size, settings.query_write_settings)); skip_indices_aggregators.push_back(index_helper->createIndexAggregator()); skip_index_accumulated_marks.push_back(0); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 5af8cbc1650..67b51df7d56 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -55,7 +55,8 @@ public: const std::string & marks_path_, const std::string & marks_file_extension_, const CompressionCodecPtr & compression_codec_, - size_t max_compress_block_size_); + size_t max_compress_block_size_, + const WriteSettings & query_write_settings); String escaped_column_name; std::string data_file_extension; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index a3eec3e54bc..933814d27ba 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -115,7 +115,8 @@ void MergeTreeDataPartWriterWide::addStreams( part_path + stream_name, DATA_FILE_EXTENSION, part_path + stream_name, marks_file_extension, compression_codec, - settings.max_compress_block_size); + settings.max_compress_block_size, + settings.query_write_settings); }; ISerialization::SubstreamPath path; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index f8f69b19458..60eb11a4fc0 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -130,9 +130,11 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( return std::make_unique(); const auto & settings = context->getSettingsRef(); + const auto & metadata_for_reading = storage_snapshot->getMetadataForQuery(); const auto & snapshot_data = assert_cast(*storage_snapshot->data); + const auto & parts = snapshot_data.parts; if (!query_info.projection) diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index da43f5c2cb0..e39ae7a4037 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -433,7 +434,9 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( const auto & index_factory = MergeTreeIndexFactory::instance(); auto out = std::make_unique(new_data_part, metadata_snapshot, columns, - index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec); + index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec, + context->getCurrentTransaction(), false, false, context->getWriteSettings()); + out->writeWithPermutation(block, perm_ptr); @@ -448,7 +451,11 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( temp_part.streams.emplace_back(std::move(stream)); } } - auto finalizer = out->finalizePartAsync(new_data_part, data_settings->fsync_after_insert); + auto finalizer = out->finalizePartAsync( + new_data_part, + data_settings->fsync_after_insert, + nullptr, nullptr, + context->getWriteSettings()); temp_part.part = new_data_part; temp_part.streams.emplace_back(TemporaryPart::Stream{.stream = std::move(out), .finalizer = std::move(finalizer)}); @@ -561,7 +568,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( metadata_snapshot, columns, MergeTreeIndices{}, - compression_codec); + compression_codec, + NO_TRANSACTION_PTR); out->writeWithPermutation(block, perm_ptr); auto finalizer = out->finalizePartAsync(new_data_part, false); diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index 4fe424c98ff..85cf3e9eda6 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB @@ -30,6 +31,7 @@ struct MergeTreeWriterSettings MergeTreeWriterSettings( const Settings & global_settings, + const WriteSettings & query_write_settings_, const MergeTreeSettingsPtr & storage_settings, bool can_use_adaptive_granularity_, bool rewrite_primary_key_, @@ -42,6 +44,7 @@ struct MergeTreeWriterSettings , can_use_adaptive_granularity(can_use_adaptive_granularity_) , rewrite_primary_key(rewrite_primary_key_) , blocks_are_granules_size(blocks_are_granules_size_) + , query_write_settings(query_write_settings_) { } @@ -50,6 +53,7 @@ struct MergeTreeWriterSettings bool can_use_adaptive_granularity; bool rewrite_primary_key; bool blocks_are_granules_size; + WriteSettings query_write_settings; }; } diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index 0f71742fb09..bfa9129bd53 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -43,22 +44,34 @@ UInt64 MergeTreeMutationEntry::parseFileName(const String & file_name_) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse mutation version from file name, expected 'mutation_.txt', got '{}'", file_name_); } -MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number) +MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number, + const TransactionID & tid_, const WriteSettings & settings) : create_time(time(nullptr)) , commands(std::move(commands_)) , disk(std::move(disk_)) , path_prefix(path_prefix_) , file_name("tmp_mutation_" + toString(tmp_number) + ".txt") , is_temp(true) + , tid(tid_) { try { - auto out = disk->writeFile(path_prefix + file_name); + auto out = disk->writeFile(std::filesystem::path(path_prefix) / file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings); *out << "format version: 1\n" << "create time: " << LocalDateTime(create_time) << "\n"; *out << "commands: "; commands.writeText(*out); *out << "\n"; + if (tid.isPrehistoric()) + { + csn = Tx::PrehistoricCSN; + } + else + { + *out << "tid: "; + TransactionID::write(tid, *out); + *out << "\n"; + } out->sync(); } catch (...) @@ -90,6 +103,14 @@ void MergeTreeMutationEntry::removeFile() } } +void MergeTreeMutationEntry::writeCSN(CSN csn_) +{ + csn = csn_; + auto out = disk->writeFile(path_prefix + file_name, 256, WriteMode::Append); + *out << "csn: " << csn << "\n"; + out->finalize(); +} + MergeTreeMutationEntry::MergeTreeMutationEntry(DiskPtr disk_, const String & path_prefix_, const String & file_name_) : disk(std::move(disk_)) , path_prefix(path_prefix_) @@ -111,6 +132,23 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(DiskPtr disk_, const String & pat commands.readText(*buf); *buf >> "\n"; + if (buf->eof()) + { + tid = Tx::PrehistoricTID; + csn = Tx::PrehistoricCSN; + } + else + { + *buf >> "tid: "; + tid = TransactionID::read(*buf); + *buf >> "\n"; + + if (!buf->eof()) + { + *buf >> "csn: " >> csn >> "\n"; + } + } + assertEOF(*buf); } diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.h b/src/Storages/MergeTree/MergeTreeMutationEntry.h index 7554a03836e..3d4c4d0c4a1 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.h +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -28,8 +29,15 @@ struct MergeTreeMutationEntry time_t latest_fail_time = 0; String latest_fail_reason; + /// ID of transaction which has created mutation. + TransactionID tid = Tx::PrehistoricTID; + /// CSN of transaction which has created mutation + /// or UnknownCSN if it's not committed (yet) or RolledBackCSN if it's rolled back or PrehistoricCSN if there is no transaction. + CSN csn = Tx::UnknownCSN; + /// Create a new entry and write it to a temporary file. - MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk, const String & path_prefix_, UInt64 tmp_number); + MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk, const String & path_prefix_, UInt64 tmp_number, + const TransactionID & tid_, const WriteSettings & settings); MergeTreeMutationEntry(const MergeTreeMutationEntry &) = delete; MergeTreeMutationEntry(MergeTreeMutationEntry &&) = default; @@ -38,6 +46,8 @@ struct MergeTreeMutationEntry void removeFile(); + void writeCSN(CSN csn_); + static String versionToFileName(UInt64 block_number_); static UInt64 tryParseFileName(const String & file_name_); static UInt64 parseFileName(const String & file_name_); diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 4955a63387c..ddbb536d7da 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -384,16 +384,18 @@ void MergeTreePartition::load(const MergeTreeData & storage, const PartMetadataM std::unique_ptr MergeTreePartition::store(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); - const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage.getContext()).sample_block; - return store(partition_key_sample, disk, part_path, checksums); + const auto & context = storage.getContext(); + const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, context).sample_block; + return store(partition_key_sample, disk, part_path, checksums, context->getWriteSettings()); } -std::unique_ptr MergeTreePartition::store(const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const +std::unique_ptr MergeTreePartition::store( + const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const { if (!partition_key_sample) return nullptr; - auto out = disk->writeFile(part_path + "partition.dat"); + auto out = disk->writeFile(std::filesystem::path(part_path) / "partition.dat", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings); HashingWriteBuffer out_hashing(*out); for (size_t i = 0; i < value.size(); ++i) { diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h index e7328799e4c..1afb833498c 100644 --- a/src/Storages/MergeTree/MergeTreePartition.h +++ b/src/Storages/MergeTree/MergeTreePartition.h @@ -43,7 +43,7 @@ public: /// Store functions return write buffer with written but not finalized data. /// User must call finish() for returned object. [[nodiscard]] std::unique_ptr store(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const; - [[nodiscard]] std::unique_ptr store(const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const; + [[nodiscard]] std::unique_ptr store(const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const; void assign(const MergeTreePartition & other) { value = other.value; } diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index 9cc3ffe6e9e..83b58960ad1 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -94,7 +94,7 @@ bool MergeTreePartsMover::selectPartsForMove( unsigned parts_to_move_by_ttl_rules = 0; double parts_to_move_total_size_bytes = 0.0; - MergeTreeData::DataPartsVector data_parts = data->getDataPartsVector(); + MergeTreeData::DataPartsVector data_parts = data->getDataPartsVectorForInternalUsage(); if (data_parts.empty()) return false; @@ -231,6 +231,7 @@ MergeTreeData::DataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEnt LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part->getFullPath()); cloned_part->loadColumnsChecksumsIndexes(true, true); + cloned_part->loadVersionMetadata(); cloned_part->modification_time = disk->getLastModified(cloned_part->getFullRelativePath()).epochTime(); return cloned_part; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index a3d2d607873..0d984dc4dee 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -61,6 +61,7 @@ struct Settings; M(UInt64, merge_selecting_sleep_ms, 5000, "Sleep time for merge selecting when no part selected, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ M(UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60, "The period of executing the clear old temporary directories operation in background.", 0) \ M(UInt64, merge_tree_clear_old_parts_interval_seconds, 1, "The period of executing the clear old parts operation in background.", 0) \ + M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \ \ /** Inserts settings. */ \ M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \ @@ -78,7 +79,7 @@ struct Settings; M(Seconds, prefer_fetch_merged_part_time_threshold, 3600, "If time passed after replication log entry creation exceeds this threshold and sum size of parts is greater than \"prefer_fetch_merged_part_size_threshold\", prefer fetching merged part from replica instead of doing merge locally. To speed up very long merges.", 0) \ M(UInt64, prefer_fetch_merged_part_size_threshold, 10ULL * 1024 * 1024 * 1024, "If sum size of parts exceeds this threshold and time passed after replication log entry creation is greater than \"prefer_fetch_merged_part_time_threshold\", prefer fetching merged part from replica instead of doing merge locally. To speed up very long merges.", 0) \ M(Seconds, execute_merges_on_single_replica_time_threshold, 0, "When greater than zero only a single replica starts the merge immediately, others wait up to that amount of time to download the result instead of doing merges locally. If the chosen replica doesn't finish the merge during that amount of time, fallback to standard behavior happens.", 0) \ - M(Seconds, remote_fs_execute_merges_on_single_replica_time_threshold, 3 * 60 * 60, "When greater than zero only a single replica starts the merge immediatelys when merged part on shared storage and 'allow_remote_fs_zero_copy_replication' is enabled.", 0) \ + M(Seconds, remote_fs_execute_merges_on_single_replica_time_threshold, 3 * 60 * 60, "When greater than zero only a single replica starts the merge immediately if merged part on shared storage and 'allow_remote_fs_zero_copy_replication' is enabled.", 0) \ M(Seconds, try_fetch_recompressed_part_timeout, 7200, "Recompression works slow in most cases, so we don't start merge with recompression until this timeout and trying to fetch recompressed part from replica which assigned this merge with recompression.", 0) \ M(Bool, always_fetch_merged_part, false, "If true, replica never merge parts and always download merged parts from other replicas.", 0) \ M(UInt64, max_suspicious_broken_parts, 10, "Max broken parts, if more - deny automatic deletion.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 7a4ecae24b3..7e8ee3dcbef 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -50,7 +50,7 @@ struct MergeTreeSink::DelayedChunk void MergeTreeSink::consume(Chunk chunk) { auto block = getHeader().cloneWithColumns(chunk.detachColumns()); - auto storage_snapshot = storage.getStorageSnapshot(metadata_snapshot); + auto storage_snapshot = storage.getStorageSnapshot(metadata_snapshot, context); storage.writer.deduceTypesOfObjectColumns(storage_snapshot, block); auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context); @@ -134,7 +134,7 @@ void MergeTreeSink::finishDelayedChunk() auto & part = partition.temp_part.part; /// Part can be deduplicated, so increment counters and add to part log only if it's really added - if (storage.renameTempPartAndAdd(part, &storage.increment, nullptr, storage.getDeduplicationLog(), partition.block_dedup_token)) + if (storage.renameTempPartAndAdd(part, context->getCurrentTransaction().get(), &storage.increment, nullptr, storage.getDeduplicationLog(), partition.block_dedup_token)) { PartLog::addNewPart(storage.getContext(), part, partition.elapsed_ns); diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index d7cddfe9c14..bab0947a8ff 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -197,7 +197,8 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore(const Stor metadata_snapshot, block.getNamesAndTypesList(), {}, - CompressionCodecFactory::instance().get("NONE", {})); + CompressionCodecFactory::instance().get("NONE", {}), + NO_TRANSACTION_PTR); part->minmax_idx->update(block, storage.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); part->partition.create(metadata_snapshot, block, 0, context); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index f94c89e20bd..6acbfacd4c1 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -1,5 +1,6 @@ #include #include +#include #include @@ -18,14 +19,17 @@ MergedBlockOutputStream::MergedBlockOutputStream( const NamesAndTypesList & columns_list_, const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec_, + const MergeTreeTransactionPtr & txn, bool reset_columns_, - bool blocks_are_granules_size) + bool blocks_are_granules_size, + const WriteSettings & write_settings) : IMergedBlockOutputStream(data_part, metadata_snapshot_, columns_list_, reset_columns_) , columns_list(columns_list_) , default_codec(default_codec_) { MergeTreeWriterSettings writer_settings( storage.getContext()->getSettings(), + write_settings, storage.getSettings(), data_part->index_granularity_info.is_adaptive, /* rewrite_primary_key = */ true, @@ -34,6 +38,13 @@ MergedBlockOutputStream::MergedBlockOutputStream( if (!part_path.empty()) volume->getDisk()->createDirectories(part_path); + /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. + TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID; + /// NOTE do not pass context for writing to system.transactions_info_log, + /// because part may have temporary name (with temporary block numbers). Will write it later. + data_part->version.setCreationTID(tid, nullptr); + data_part->storeVersionMetadata(); + writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, default_codec, writer_settings); } @@ -122,7 +133,8 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( MergeTreeData::MutableDataPartPtr & new_part, bool sync, const NamesAndTypesList * total_columns_list, - MergeTreeData::DataPart::Checksums * additional_column_checksums) + MergeTreeData::DataPart::Checksums * additional_column_checksums, + const WriteSettings & write_settings) { /// Finish write and get checksums. MergeTreeData::DataPart::Checksums checksums; @@ -156,7 +168,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( auto finalizer = std::make_unique(*writer, new_part, files_to_remove_after_sync, sync); if (new_part->isStoredOnDisk()) - finalizer->written_files = finalizePartOnDisk(new_part, checksums); + finalizer->written_files = finalizePartOnDisk(new_part, checksums, write_settings); new_part->rows_count = rows_count; new_part->modification_time = time(nullptr); @@ -174,14 +186,15 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDisk( const MergeTreeData::DataPartPtr & new_part, - MergeTreeData::DataPart::Checksums & checksums) + MergeTreeData::DataPart::Checksums & checksums, + const WriteSettings & settings) { WrittenFiles written_files; if (new_part->isProjectionPart()) { if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part)) { - auto count_out = volume->getDisk()->writeFile(part_path + "count.txt", 4096); + auto count_out = volume->getDisk()->writeFile(part_path + "count.txt", 4096, WriteMode::Rewrite, settings); HashingWriteBuffer count_out_hashing(*count_out); writeIntText(rows_count, count_out_hashing); count_out_hashing.next(); @@ -195,7 +208,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis { if (new_part->uuid != UUIDHelpers::Nil) { - auto out = volume->getDisk()->writeFile(fs::path(part_path) / IMergeTreeDataPart::UUID_FILE_NAME, 4096); + auto out = volume->getDisk()->writeFile(fs::path(part_path) / IMergeTreeDataPart::UUID_FILE_NAME, 4096, WriteMode::Rewrite, settings); HashingWriteBuffer out_hashing(*out); writeUUIDText(new_part->uuid, out_hashing); checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count(); @@ -221,7 +234,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis } { - auto count_out = volume->getDisk()->writeFile(fs::path(part_path) / "count.txt", 4096); + auto count_out = volume->getDisk()->writeFile(fs::path(part_path) / "count.txt", 4096, WriteMode::Rewrite, settings); HashingWriteBuffer count_out_hashing(*count_out); writeIntText(rows_count, count_out_hashing); count_out_hashing.next(); @@ -235,7 +248,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (!new_part->ttl_infos.empty()) { /// Write a file with ttl infos in json format. - auto out = volume->getDisk()->writeFile(fs::path(part_path) / "ttl.txt", 4096); + auto out = volume->getDisk()->writeFile(fs::path(part_path) / "ttl.txt", 4096, WriteMode::Rewrite, settings); HashingWriteBuffer out_hashing(*out); new_part->ttl_infos.write(out_hashing); checksums.files["ttl.txt"].file_size = out_hashing.count(); @@ -246,7 +259,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (!new_part->getSerializationInfos().empty()) { - auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096); + auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, WriteMode::Rewrite, settings); HashingWriteBuffer out_hashing(*out); new_part->getSerializationInfos().writeJSON(out_hashing); checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_size = out_hashing.count(); @@ -257,7 +270,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis { /// Write a file with a description of columns. - auto out = volume->getDisk()->writeFile(fs::path(part_path) / "columns.txt", 4096); + auto out = volume->getDisk()->writeFile(fs::path(part_path) / "columns.txt", 4096, WriteMode::Rewrite, settings); new_part->getColumns().writeText(*out); out->preFinalize(); written_files.emplace_back(std::move(out)); @@ -265,7 +278,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis if (default_codec != nullptr) { - auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096); + auto out = volume->getDisk()->writeFile(part_path + IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, WriteMode::Rewrite, settings); DB::writeText(queryToString(default_codec->getFullCodecDesc()), *out); out->preFinalize(); written_files.emplace_back(std::move(out)); @@ -278,7 +291,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis { /// Write file with checksums. - auto out = volume->getDisk()->writeFile(fs::path(part_path) / "checksums.txt", 4096); + auto out = volume->getDisk()->writeFile(fs::path(part_path) / "checksums.txt", 4096, WriteMode::Rewrite, settings); checksums.write(*out); out->preFinalize(); written_files.emplace_back(std::move(out)); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index c17cfd22cd8..67dec1923e8 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -19,8 +20,10 @@ public: const NamesAndTypesList & columns_list_, const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec_, + const MergeTreeTransactionPtr & txn, bool reset_columns_ = false, - bool blocks_are_granules_size = false); + bool blocks_are_granules_size = false, + const WriteSettings & write_settings = {}); Block getHeader() const { return metadata_snapshot->getSampleBlock(); } @@ -54,7 +57,8 @@ public: MergeTreeData::MutableDataPartPtr & new_part, bool sync, const NamesAndTypesList * total_columns_list = nullptr, - MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr); + MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr, + const WriteSettings & settings = {}); void finalizePart( MergeTreeData::MutableDataPartPtr & new_part, @@ -71,7 +75,8 @@ private: using WrittenFiles = std::vector>; WrittenFiles finalizePartOnDisk( const MergeTreeData::DataPartPtr & new_part, - MergeTreeData::DataPart::Checksums & checksums); + MergeTreeData::DataPart::Checksums & checksums, + const WriteSettings & write_settings); NamesAndTypesList columns_list; IMergeTreeDataPart::MinMaxIndex minmax_idx; diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 5a706165000..4fb993bfcc7 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { @@ -26,6 +27,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( MergeTreeWriterSettings writer_settings( global_settings, + data_part->storage.getContext()->getWriteSettings(), storage_settings, index_granularity_info ? index_granularity_info->is_adaptive : data_part->storage.canUseAdaptiveGranularity(), /* rewrite_primary_key = */false); diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 309432e4675..de31fbe3c56 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -98,7 +98,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() RWLockImpl::NO_QUERY, storage_settings_ptr->lock_acquire_timeout_for_background_operations); StorageMetadataPtr metadata_snapshot = storage.getInMemoryMetadataPtr(); - transaction_ptr = std::make_unique(storage); + transaction_ptr = std::make_unique(storage, NO_TRANSACTION_RAW); future_mutated_part = std::make_shared(); future_mutated_part->name = entry.new_part_name; @@ -152,7 +152,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() mutate_task = storage.merger_mutator.mutatePartToTemporaryPart( future_mutated_part, metadata_snapshot, commands, merge_mutate_entry.get(), - entry.create_time, fake_query_context, reserved_space, table_lock_holder); + entry.create_time, fake_query_context, NO_TRANSACTION_PTR, reserved_space, table_lock_holder); /// Adjust priority for (auto & item : future_mutated_part->parts) @@ -171,7 +171,7 @@ bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrit { new_part = mutate_task->getFuture().get(); - storage.renameTempPartAndReplace(new_part, nullptr, transaction_ptr.get()); + storage.renameTempPartAndReplace(new_part, NO_TRANSACTION_RAW, nullptr, transaction_ptr.get()); try { diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index e3fa07dd0c0..80a33bfe0e3 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -1,7 +1,7 @@ #include #include - +#include namespace DB { @@ -55,7 +55,7 @@ void MutatePlainMergeTreeTask::prepare() mutate_task = storage.merger_mutator.mutatePartToTemporaryPart( future_part, metadata_snapshot, merge_mutate_entry->commands, merge_list_entry.get(), - time(nullptr), fake_query_context, merge_mutate_entry->tagger->reserved_space, table_lock_holder); + time(nullptr), fake_query_context, merge_mutate_entry->txn, merge_mutate_entry->tagger->reserved_space, table_lock_holder); } bool MutatePlainMergeTreeTask::executeStep() @@ -83,7 +83,8 @@ bool MutatePlainMergeTreeTask::executeStep() new_part = mutate_task->getFuture().get(); - storage.renameTempPartAndReplace(new_part); + /// FIXME Transactions: it's too optimistic, better to lock parts before starting transaction + storage.renameTempPartAndReplace(new_part, merge_mutate_entry->txn.get()); storage.updateMutationEntriesErrors(future_part, true, ""); write_part_log({}); @@ -92,7 +93,11 @@ bool MutatePlainMergeTreeTask::executeStep() } catch (...) { - storage.updateMutationEntriesErrors(future_part, false, getCurrentExceptionMessage(false)); + if (merge_mutate_entry->txn) + merge_mutate_entry->txn->onException(); + String exception_message = getCurrentExceptionMessage(false); + LOG_ERROR(&Poco::Logger::get("MutatePlainMergeTreeTask"), "{}", exception_message); + storage.updateMutationEntriesErrors(future_part, false, exception_message); write_part_log(ExecutionStatus::fromCurrentException()); tryLogCurrentException(__PRETTY_FUNCTION__); return false; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 1fe701c54ae..b9bebc665b2 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -419,14 +420,15 @@ void finalizeMutatedPart( const MergeTreeDataPartPtr & source_part, MergeTreeData::MutableDataPartPtr new_data_part, ExecuteTTLType execute_ttl_type, - const CompressionCodecPtr & codec) + const CompressionCodecPtr & codec, + ContextPtr context) { auto disk = new_data_part->volume->getDisk(); auto part_path = fs::path(new_data_part->getFullRelativePath()); if (new_data_part->uuid != UUIDHelpers::Nil) { - auto out = disk->writeFile(part_path / IMergeTreeDataPart::UUID_FILE_NAME, 4096); + auto out = disk->writeFile(part_path / IMergeTreeDataPart::UUID_FILE_NAME, 4096, WriteMode::Rewrite, context->getWriteSettings()); HashingWriteBuffer out_hashing(*out); writeUUIDText(new_data_part->uuid, out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count(); @@ -436,7 +438,7 @@ void finalizeMutatedPart( if (execute_ttl_type != ExecuteTTLType::NONE) { /// Write a file with ttl infos in json format. - auto out_ttl = disk->writeFile(part_path / "ttl.txt", 4096); + auto out_ttl = disk->writeFile(part_path / "ttl.txt", 4096, WriteMode::Rewrite, context->getWriteSettings()); HashingWriteBuffer out_hashing(*out_ttl); new_data_part->ttl_infos.write(out_hashing); new_data_part->checksums.files["ttl.txt"].file_size = out_hashing.count(); @@ -445,7 +447,7 @@ void finalizeMutatedPart( if (!new_data_part->getSerializationInfos().empty()) { - auto out = disk->writeFile(part_path / IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096); + auto out = disk->writeFile(part_path / IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, WriteMode::Rewrite, context->getWriteSettings()); HashingWriteBuffer out_hashing(*out); new_data_part->getSerializationInfos().writeJSON(out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_size = out_hashing.count(); @@ -454,18 +456,18 @@ void finalizeMutatedPart( { /// Write file with checksums. - auto out_checksums = disk->writeFile(part_path / "checksums.txt", 4096); + auto out_checksums = disk->writeFile(part_path / "checksums.txt", 4096, WriteMode::Rewrite, context->getWriteSettings()); new_data_part->checksums.write(*out_checksums); } /// close fd { - auto out = disk->writeFile(part_path / IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096); + auto out = disk->writeFile(part_path / IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, WriteMode::Rewrite, context->getWriteSettings()); DB::writeText(queryToString(codec->getFullCodecDesc()), *out); } { /// Write a file with a description of columns. - auto out_columns = disk->writeFile(part_path / "columns.txt", 4096); + auto out_columns = disk->writeFile(part_path / "columns.txt", 4096, WriteMode::Rewrite, context->getWriteSettings()); new_data_part->getColumns().writeText(*out_columns); } /// close fd @@ -545,6 +547,8 @@ struct MutationContext bool need_sync; ExecuteTTLType execute_ttl_type{ExecuteTTLType::NONE}; + + MergeTreeTransactionPtr txn; }; using MutationContextPtr = std::shared_ptr; @@ -650,6 +654,7 @@ public: false, // TODO Do we need deduplicate for projections {}, projection_merging_params, + NO_TRANSACTION_PTR, ctx->new_data_part.get(), ".tmp_proj"); @@ -971,7 +976,8 @@ private: ctx->metadata_snapshot, ctx->new_data_part->getColumns(), skip_part_indices, - ctx->compression_codec); + ctx->compression_codec, + ctx->txn); ctx->mutating_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); ctx->mutating_executor = std::make_unique(ctx->mutating_pipeline); @@ -1058,6 +1064,13 @@ private: ctx->disk->createDirectories(ctx->new_part_tmp_path); + /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. + TransactionID tid = ctx->txn ? ctx->txn->tid : Tx::PrehistoricTID; + /// NOTE do not pass context for writing to system.transactions_info_log, + /// because part may have temporary name (with temporary block numbers). Will write it later. + ctx->new_data_part->version.setCreationTID(tid, nullptr); + ctx->new_data_part->storeVersionMetadata(); + /// Create hardlinks for unchanged files for (auto it = ctx->disk->iterateDirectory(ctx->source_part->getFullRelativePath()); it->isValid(); it->next()) { @@ -1162,7 +1175,7 @@ private: } } - MutationHelpers::finalizeMutatedPart(ctx->source_part, ctx->new_data_part, ctx->execute_ttl_type, ctx->compression_codec); + MutationHelpers::finalizeMutatedPart(ctx->source_part, ctx->new_data_part, ctx->execute_ttl_type, ctx->compression_codec, ctx->context); } @@ -1192,6 +1205,7 @@ MutateTask::MutateTask( ContextPtr context_, ReservationSharedPtr space_reservation_, TableLockHolder & table_lock_holder_, + const MergeTreeTransactionPtr & txn, MergeTreeData & data_, MergeTreeDataMergerMutator & mutator_, ActionBlocker & merges_blocker_) @@ -1209,6 +1223,7 @@ MutateTask::MutateTask( ctx->metadata_snapshot = metadata_snapshot_; ctx->space_reservation = space_reservation_; ctx->storage_columns = metadata_snapshot_->getColumns().getAllPhysical(); + ctx->txn = txn; } @@ -1268,7 +1283,7 @@ bool MutateTask::prepare() storage_from_source_part, ctx->metadata_snapshot, ctx->commands_for_part, Context::createCopy(context_for_reading))) { LOG_TRACE(ctx->log, "Part {} doesn't change up to mutation version {}", ctx->source_part->name, ctx->future_part->part_info.mutation); - promise.set_value(ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, "tmp_clone_", ctx->future_part->part_info, ctx->metadata_snapshot)); + promise.set_value(ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, "tmp_clone_", ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn)); return false; } else @@ -1293,6 +1308,8 @@ bool MutateTask::prepare() } ctx->single_disk_volume = std::make_shared("volume_" + ctx->future_part->name, ctx->space_reservation->getDisk(), 0); + /// FIXME new_data_part is not used in the case when we clone part with cloneAndLoadDataPartOnSameDisk and return false + /// Is it possible to handle this case earlier? ctx->new_data_part = ctx->data->createPart( ctx->future_part->name, ctx->future_part->type, ctx->future_part->part_info, ctx->single_disk_volume, "tmp_mut_" + ctx->future_part->name); @@ -1357,7 +1374,7 @@ bool MutateTask::prepare() && ctx->files_to_rename.empty()) { LOG_TRACE(ctx->log, "Part {} doesn't change up to mutation version {} (optimized)", ctx->source_part->name, ctx->future_part->part_info.mutation); - promise.set_value(ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, "tmp_clone_", ctx->future_part->part_info, ctx->metadata_snapshot)); + promise.set_value(ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, "tmp_mut_", ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn)); return false; } diff --git a/src/Storages/MergeTree/MutateTask.h b/src/Storages/MergeTree/MutateTask.h index 79c3bff213a..aa38ee34b4a 100644 --- a/src/Storages/MergeTree/MutateTask.h +++ b/src/Storages/MergeTree/MutateTask.h @@ -32,6 +32,7 @@ public: ContextPtr context_, ReservationSharedPtr space_reservation_, TableLockHolder & table_lock_holder_, + const MergeTreeTransactionPtr & txn, MergeTreeData & data_, MergeTreeDataMergerMutator & mutator_, ActionBlocker & merges_blocker_); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 5f805c39ae2..c5798aaefe5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1981,6 +1981,7 @@ ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( bool ReplicatedMergeTreeMergePredicate::operator()( const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, + const MergeTreeTransaction *, String * out_reason) const { if (left) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 1d10c504b3c..ae0ca806344 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -483,6 +483,7 @@ public: /// Depending on the existence of left part checks a merge predicate for two parts or for single part. bool operator()(const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, + const MergeTreeTransaction * txn, String * out_reason = nullptr) const; /// Can we assign a merge with these two parts? diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 63fa2071056..187e4eb96c5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -150,7 +150,7 @@ void ReplicatedMergeTreeSink::consume(Chunk chunk) if (quorum) checkQuorumPrecondition(zookeeper); - auto storage_snapshot = storage.getStorageSnapshot(metadata_snapshot); + auto storage_snapshot = storage.getStorageSnapshot(metadata_snapshot, context); storage.writer.deduceTypesOfObjectColumns(storage_snapshot, block); auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context); @@ -287,6 +287,7 @@ void ReplicatedMergeTreeSink::writeExistingPart(MergeTreeData::MutableDataPartPt try { + part->version.setCreationTID(Tx::PrehistoricTID, nullptr); commitPart(zookeeper, part, ""); PartLog::addNewPart(storage.getContext(), part, watch.elapsed()); } @@ -471,12 +472,12 @@ void ReplicatedMergeTreeSink::commitPart( /// Information about the part. storage.getCommitPartOps(ops, part, block_id_path); - MergeTreeData::Transaction transaction(storage); /// If you can not add a part to ZK, we'll remove it back from the working set. + MergeTreeData::Transaction transaction(storage, NO_TRANSACTION_RAW); /// If you can not add a part to ZK, we'll remove it back from the working set. bool renamed = false; try { - renamed = storage.renameTempPartAndAdd(part, nullptr, &transaction); + renamed = storage.renameTempPartAndAdd(part, NO_TRANSACTION_RAW, nullptr, &transaction); } catch (const Exception & e) { diff --git a/src/Storages/MergeTree/localBackup.cpp b/src/Storages/MergeTree/localBackup.cpp index 1a04aa4b678..8bb3e4cf78a 100644 --- a/src/Storages/MergeTree/localBackup.cpp +++ b/src/Storages/MergeTree/localBackup.cpp @@ -14,7 +14,7 @@ namespace ErrorCodes } -static void localBackupImpl(const DiskPtr & disk, const String & source_path, const String & destination_path, size_t level, +static void localBackupImpl(const DiskPtr & disk, const String & source_path, const String & destination_path, bool make_source_readonly, size_t level, std::optional max_level) { if (max_level && level > *max_level) @@ -32,12 +32,13 @@ static void localBackupImpl(const DiskPtr & disk, const String & source_path, co if (!disk->isDirectory(source)) { - disk->setReadOnly(source); + if (make_source_readonly) + disk->setReadOnly(source); disk->createHardLink(source, destination); } else { - localBackupImpl(disk, source, destination, level + 1, max_level); + localBackupImpl(disk, source, destination, make_source_readonly, level + 1, max_level); } } } @@ -80,7 +81,7 @@ private: }; } -void localBackup(const DiskPtr & disk, const String & source_path, const String & destination_path, std::optional max_level) +void localBackup(const DiskPtr & disk, const String & source_path, const String & destination_path, bool make_source_readonly, std::optional max_level) { if (disk->exists(destination_path) && !disk->isDirectoryEmpty(destination_path)) { @@ -100,7 +101,7 @@ void localBackup(const DiskPtr & disk, const String & source_path, const String { try { - localBackupImpl(disk, source_path, destination_path, 0, max_level); + localBackupImpl(disk, source_path, destination_path, make_source_readonly, 0, max_level); } catch (const DB::ErrnoException & e) { diff --git a/src/Storages/MergeTree/localBackup.h b/src/Storages/MergeTree/localBackup.h index 066ba8f7489..c6a46620447 100644 --- a/src/Storages/MergeTree/localBackup.h +++ b/src/Storages/MergeTree/localBackup.h @@ -20,6 +20,6 @@ namespace DB * If max_level is specified, than only files which depth relative source_path less or equal max_level will be copied. * So, if max_level=0 than only direct file child are copied. */ -void localBackup(const DiskPtr & disk, const String & source_path, const String & destination_path, std::optional max_level = {}); +void localBackup(const DiskPtr & disk, const String & source_path, const String & destination_path, bool make_source_readonly = true, std::optional max_level = {}); } diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index d12e91f62e4..a126478857b 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -246,8 +246,9 @@ void StorageMaterializedPostgreSQL::dropInnerTableIfAny(bool no_delay, ContextPt return; replication_handler->shutdownFinal(); + replication_handler.reset(); - auto nested_table = getNested(); + auto nested_table = tryGetNested() != nullptr; if (nested_table) InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), local_context, getNestedStorageID(), no_delay); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index b5e120d9405..54a71f18ce1 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1026,7 +1026,7 @@ bool StorageRabbitMQ::streamToViews() InterpreterInsertQuery interpreter(insert, rabbitmq_context, false, true, true); auto block_io = interpreter.execute(); - auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr()); + auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); auto column_names = block_io.pipeline.getHeader().getNames(); auto sample_block = storage_snapshot->getSampleBlockForColumns(column_names); diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.cpp b/src/Storages/ReadFinalForExternalReplicaStorage.cpp index cf1c5c35629..a03ccb5cf43 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.cpp +++ b/src/Storages/ReadFinalForExternalReplicaStorage.cpp @@ -54,7 +54,7 @@ Pipe readFinalFromNestedStorage( filter_column_name = expressions->children.back()->getColumnName(); } - auto nested_snapshot = nested_storage->getStorageSnapshot(nested_metadata); + auto nested_snapshot = nested_storage->getStorageSnapshot(nested_metadata, context); Pipe pipe = nested_storage->read(require_columns_name, nested_snapshot, query_info, context, processed_stage, max_block_size, num_streams); pipe.addTableLock(lock); pipe.addStorageHolder(nested_storage); diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 801e1b80a20..a503e79dc2c 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -203,7 +203,7 @@ QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage( /// TODO: Find a way to support projections for StorageBuffer query_info.ignore_projections = true; const auto & destination_metadata = destination->getInMemoryMetadataPtr(); - return destination->getQueryProcessingStage(local_context, to_stage, destination->getStorageSnapshot(destination_metadata), query_info); + return destination->getQueryProcessingStage(local_context, to_stage, destination->getStorageSnapshot(destination_metadata, local_context), query_info); } return QueryProcessingStage::FetchColumns; @@ -248,7 +248,7 @@ void StorageBuffer::read( auto destination_lock = destination->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); auto destination_metadata_snapshot = destination->getInMemoryMetadataPtr(); - auto destination_snapshot = destination->getStorageSnapshot(destination_metadata_snapshot); + auto destination_snapshot = destination->getStorageSnapshot(destination_metadata_snapshot, local_context); const bool dst_has_same_structure = std::all_of(column_names.begin(), column_names.end(), [metadata_snapshot, destination_metadata_snapshot](const String& column_name) { diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 1a390f784a2..62ec2524a32 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -617,13 +617,13 @@ static bool requiresObjectColumns(const ColumnsDescription & all_columns, ASTPtr return false; } -StorageSnapshotPtr StorageDistributed::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot) const +StorageSnapshotPtr StorageDistributed::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const { - return getStorageSnapshotForQuery(metadata_snapshot, nullptr); + return getStorageSnapshotForQuery(metadata_snapshot, nullptr, query_context); } StorageSnapshotPtr StorageDistributed::getStorageSnapshotForQuery( - const StorageMetadataPtr & metadata_snapshot, const ASTPtr & query) const + const StorageMetadataPtr & metadata_snapshot, const ASTPtr & query, ContextPtr /*query_context*/) const { /// If query doesn't use columns of type Object, don't deduce /// concrete types for them, because it required extra round trip. diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 317463783ee..a890cabd8b1 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -69,9 +69,9 @@ public: ColumnsDescriptionByShardNum objects_by_shard; }; - StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot) const override; + StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const override; StorageSnapshotPtr getStorageSnapshotForQuery( - const StorageMetadataPtr & metadata_snapshot, const ASTPtr & query) const override; + const StorageMetadataPtr & metadata_snapshot, const ASTPtr & query, ContextPtr query_context) const override; QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; diff --git a/src/Storages/StorageGenerateRandom.h b/src/Storages/StorageGenerateRandom.h index 2894b17d409..ca12d9c2841 100644 --- a/src/Storages/StorageGenerateRandom.h +++ b/src/Storages/StorageGenerateRandom.h @@ -24,6 +24,7 @@ public: size_t max_block_size, unsigned num_streams) override; + bool supportsTransactions() const override { return true; } private: UInt64 max_array_length = 10; UInt64 max_string_length = 10; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 0c79c31eb7a..610d16c8ea8 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -143,7 +143,7 @@ QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( /// converting and use it just like a normal view. query_info.ignore_projections = true; const auto & target_metadata = getTargetTable()->getInMemoryMetadataPtr(); - return getTargetTable()->getQueryProcessingStage(local_context, to_stage, getTargetTable()->getStorageSnapshot(target_metadata), query_info); + return getTargetTable()->getQueryProcessingStage(local_context, to_stage, getTargetTable()->getStorageSnapshot(target_metadata, local_context), query_info); } Pipe StorageMaterializedView::read( @@ -175,7 +175,7 @@ void StorageMaterializedView::read( auto storage = getTargetTable(); auto lock = storage->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); auto target_metadata_snapshot = storage->getInMemoryMetadataPtr(); - auto target_storage_snapshot = storage->getStorageSnapshot(target_metadata_snapshot); + auto target_storage_snapshot = storage->getStorageSnapshot(target_metadata_snapshot, local_context); if (query_info.order_optimizer) query_info.input_order_info = query_info.order_optimizer->getInputOrder(target_metadata_snapshot, local_context); diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 41c97fbc4d8..35fe38058de 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -26,6 +26,7 @@ public: bool supportsIndexForIn() const override { return getTargetTable()->supportsIndexForIn(); } bool supportsParallelInsert() const override { return getTargetTable()->supportsParallelInsert(); } bool supportsSubcolumns() const override { return getTargetTable()->supportsSubcolumns(); } + bool supportsTransactions() const override { return getTargetTable()->supportsTransactions(); } bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & /* metadata_snapshot */) const override { auto target_table = getTargetTable(); diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index a371ac1ccf8..3e2fe996fe8 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -122,10 +122,11 @@ class MemorySink : public SinkToStorage public: MemorySink( StorageMemory & storage_, - const StorageMetadataPtr & metadata_snapshot_) + const StorageMetadataPtr & metadata_snapshot_, + ContextPtr context) : SinkToStorage(metadata_snapshot_->getSampleBlock()) , storage(storage_) - , storage_snapshot(storage_.getStorageSnapshot(metadata_snapshot_)) + , storage_snapshot(storage_.getStorageSnapshot(metadata_snapshot_, context)) { } @@ -201,7 +202,7 @@ StorageMemory::StorageMemory( setInMemoryMetadata(storage_metadata); } -StorageSnapshotPtr StorageMemory::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot) const +StorageSnapshotPtr StorageMemory::getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr /*query_context*/) const { auto snapshot_data = std::make_unique(); snapshot_data->blocks = data.get(); @@ -271,9 +272,9 @@ Pipe StorageMemory::read( } -SinkToStoragePtr StorageMemory::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) +SinkToStoragePtr StorageMemory::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) { - return std::make_shared(*this, metadata_snapshot); + return std::make_shared(*this, metadata_snapshot, context); } diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 20f47828846..d4e82ccb4fc 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -37,7 +37,7 @@ public: std::shared_ptr blocks; }; - StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot) const override; + StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const override; Pipe read( const Names & column_names, diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 96e6070e09e..8b71cfdb102 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -201,7 +201,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage( stage_in_source_tables = std::max( stage_in_source_tables, table->getQueryProcessingStage(local_context, to_stage, - table->getStorageSnapshot(table->getInMemoryMetadataPtr()), query_info)); + table->getStorageSnapshot(table->getInMemoryMetadataPtr(), local_context), query_info)); } iterator->next(); @@ -338,7 +338,7 @@ Pipe StorageMerge::read( Aliases aliases; auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr(); auto storage_columns = storage_metadata_snapshot->getColumns(); - auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot); + auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, local_context); auto modified_query_info = getModifiedQueryInfo(query_info, modified_context, storage->getStorageID(), storage->as()); auto syntax_result = TreeRewriter(local_context).analyzeSelect( @@ -377,7 +377,7 @@ Pipe StorageMerge::read( } syntax_result = TreeRewriter(local_context).analyze( - required_columns_expr_list, storage_columns.getAllPhysical(), storage, storage->getStorageSnapshot(storage_metadata_snapshot)); + required_columns_expr_list, storage_columns.getAllPhysical(), storage, storage->getStorageSnapshot(storage_metadata_snapshot, local_context)); auto alias_actions = ExpressionAnalyzer(required_columns_expr_list, syntax_result, local_context).getActionsDAG(true); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index e7e4528dc83..b25b47ac772 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,22 @@ namespace ActionLocks extern const StorageActionBlockType PartsMove; } +static MergeTreeTransactionPtr tryGetTransactionForMutation(const MergeTreeMutationEntry & mutation, Poco::Logger * log = nullptr) +{ + assert(!mutation.tid.isEmpty()); + if (mutation.tid.isPrehistoric()) + return {}; + + auto txn = TransactionLog::instance().tryGetRunningTransaction(mutation.tid.getHash()); + if (txn) + return txn; + + if (log) + LOG_WARNING(log, "Cannot find transaction {} which had started mutation {}, probably it finished", mutation.tid, mutation.file_name); + + return {}; +} + StorageMergeTree::StorageMergeTree( const StorageID & table_id_, @@ -89,7 +106,7 @@ StorageMergeTree::StorageMergeTree( { loadDataParts(has_force_restore_data_flag); - if (!attach && !getDataParts().empty()) + if (!attach && !getDataPartsForInternalUsage().empty()) throw Exception("Data directory for table already containing data parts - probably it was unclean DROP table or manual intervention. You must either clear directory by hand or use ATTACH TABLE instead of CREATE TABLE if you need to use that parts.", ErrorCodes::INCORRECT_DATA); increment.set(getMaxBlockNumber()); @@ -234,7 +251,7 @@ std::optional StorageMergeTree::totalRows(const Settings &) const std::optional StorageMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr local_context) const { - auto parts = getDataPartsVector({DataPartState::Active}); + auto parts = getVisibleDataPartsVector(local_context); return totalRowsByPartitionPredicateImpl(query_info, local_context, parts); } @@ -266,15 +283,15 @@ void StorageMergeTree::drop() dropAllData(); } -void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) +void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) { { /// Asks to complete merges and does not allow them to start. /// This protects against "revival" of data for a removed partition after completion of merge. auto merge_blocker = stopMergesAndWait(); - auto parts_to_remove = getDataPartsVector(); - removePartsFromWorkingSet(parts_to_remove, true); + auto parts_to_remove = getVisibleDataPartsVector(local_context); + removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), parts_to_remove, true); LOG_INFO(log, "Removed {} parts.", parts_to_remove.size()); } @@ -289,13 +306,15 @@ void StorageMergeTree::alter( ContextPtr local_context, AlterLockHolder & table_lock_holder) { + if (local_context->getCurrentTransaction() && local_context->getSettingsRef().throw_on_unsupported_query_inside_transaction) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ALTER METADATA is not supported inside transactions"); + auto table_id = getStorageID(); auto old_storage_settings = getSettings(); StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); auto maybe_mutation_commands = commands.getMutationCommands(new_metadata, local_context->getSettingsRef().materialize_ttl_after_modify, local_context); - String mutation_file_name; Int64 mutation_version = -1; commands.apply(new_metadata, local_context); @@ -317,13 +336,13 @@ void StorageMergeTree::alter( DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, new_metadata); if (!maybe_mutation_commands.empty()) - mutation_version = startMutation(maybe_mutation_commands, mutation_file_name); + mutation_version = startMutation(maybe_mutation_commands, local_context); } /// Always execute required mutations synchronously, because alters /// should be executed in sequential order. if (!maybe_mutation_commands.empty()) - waitForMutation(mutation_version, mutation_file_name); + waitForMutation(mutation_version); } { @@ -414,24 +433,35 @@ CurrentlyMergingPartsTagger::~CurrentlyMergingPartsTagger() storage.currently_processing_in_background_condition.notify_all(); } -Int64 StorageMergeTree::startMutation(const MutationCommands & commands, String & mutation_file_name) +Int64 StorageMergeTree::startMutation(const MutationCommands & commands, ContextPtr query_context) { /// Choose any disk, because when we load mutations we search them at each disk /// where storage can be placed. See loadMutations(). auto disk = getStoragePolicy()->getAnyDisk(); + TransactionID current_tid = Tx::PrehistoricTID; + String additional_info; + auto txn = query_context->getCurrentTransaction(); + if (txn) + { + current_tid = txn->tid; + additional_info = fmt::format(" (TID: {}; TIDH: {})", current_tid, current_tid.getHash()); + } + Int64 version; { std::lock_guard lock(currently_processing_in_background_mutex); - MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get()); + MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings()); version = increment.get(); entry.commit(version); - mutation_file_name = entry.file_name; + String mutation_id = entry.file_name; + if (txn) + txn->addMutation(shared_from_this(), mutation_id); bool inserted = current_mutations_by_version.try_emplace(version, std::move(entry)).second; if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", version); - LOG_INFO(log, "Added mutation: {}", mutation_file_name); + LOG_INFO(log, "Added mutation: {}{}", mutation_id, additional_info); } background_operations_assignee.trigger(); return version; @@ -477,9 +507,15 @@ void StorageMergeTree::updateMutationEntriesErrors(FutureMergedMutatedPartPtr re mutation_wait_event.notify_all(); } -void StorageMergeTree::waitForMutation(Int64 version, const String & file_name) +void StorageMergeTree::waitForMutation(Int64 version) { - LOG_INFO(log, "Waiting mutation: {}", file_name); + waitForMutation(MergeTreeMutationEntry::versionToFileName(version)); +} + +void StorageMergeTree::waitForMutation(const String & mutation_id) +{ + UInt64 version = MergeTreeMutationEntry::parseFileName(mutation_id); + LOG_INFO(log, "Waiting mutation: {}", mutation_id); { auto check = [version, this]() { @@ -495,20 +531,24 @@ void StorageMergeTree::waitForMutation(Int64 version, const String & file_name) /// At least we have our current mutation std::set mutation_ids; - mutation_ids.insert(file_name); + mutation_ids.insert(mutation_id); auto mutation_status = getIncompleteMutationsStatus(version, &mutation_ids); - try - { - checkMutationStatus(mutation_status, mutation_ids); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - throw; - } + checkMutationStatus(mutation_status, mutation_ids); - LOG_INFO(log, "Mutation {} done", file_name); + LOG_INFO(log, "Mutation {} done", mutation_id); +} + +void StorageMergeTree::setMutationCSN(const String & mutation_id, CSN csn) +{ + LOG_INFO(log, "Writing CSN {} for mutation {}", csn, mutation_id); + UInt64 version = MergeTreeMutationEntry::parseFileName(mutation_id); + + std::lock_guard lock(currently_processing_in_background_mutex); + auto it = current_mutations_by_version.find(version); + if (it == current_mutations_by_version.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find mutation {}", mutation_id); + it->second.writeCSN(csn); } void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr query_context) @@ -516,11 +556,10 @@ void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr quer /// Validate partition IDs (if any) before starting mutation getPartitionIdsAffectedByCommands(commands, query_context); - String mutation_file_name; - Int64 version = startMutation(commands, mutation_file_name); + Int64 version = startMutation(commands, query_context); - if (query_context->getSettingsRef().mutations_sync > 0) - waitForMutation(version, mutation_file_name); + if (query_context->getSettingsRef().mutations_sync > 0 || query_context->getCurrentTransaction()) + waitForMutation(version); } std::optional StorageMergeTree::getIncompleteMutationsStatus(Int64 mutation_version, std::set * mutation_ids) const @@ -536,7 +575,9 @@ std::optional StorageMergeTree::getIncompleteMutationsS const auto & mutation_entry = current_mutation_it->second; - auto data_parts = getDataPartsVector(); + auto txn = tryGetTransactionForMutation(mutation_entry, log); + assert(txn || mutation_entry.tid.isPrehistoric()); + auto data_parts = getVisibleDataPartsVector(txn); for (const auto & data_part : data_parts) { Int64 data_version = getUpdatedDataVersion(data_part, lock); @@ -560,6 +601,17 @@ std::optional StorageMergeTree::getIncompleteMutationsS mutation_ids->insert(it->second.file_name); } } + else if (txn) + { + /// Part is locked by concurrent transaction, most likely it will never be mutated + TIDHash part_locked = data_part->version.removal_tid_lock.load(); + if (part_locked && part_locked != mutation_entry.tid.getHash()) + { + result.latest_failed_part = data_part->name; + result.latest_fail_reason = fmt::format("Serialization error: part {} is locked by transaction {}", data_part->name, part_locked); + result.latest_fail_time = time(nullptr); + } + } return result; } @@ -635,6 +687,12 @@ CancellationCode StorageMergeTree::killMutation(const String & mutation_id) if (!to_kill) return CancellationCode::NotFound; + if (auto txn = tryGetTransactionForMutation(*to_kill, log)) + { + LOG_TRACE(log, "Cancelling transaction {} which had started mutation {}", to_kill->tid, mutation_id); + TransactionLog::instance().rollbackTransaction(txn); + } + getContext()->getMergeList().cancelPartMutations(getStorageID(), {}, to_kill->block_number); to_kill->removeFile(); LOG_TRACE(log, "Cancelled part mutations and removed mutation file {}", mutation_id); @@ -671,6 +729,24 @@ void StorageMergeTree::loadMutations() MergeTreeMutationEntry entry(disk, relative_data_path, it->name()); UInt64 block_number = entry.block_number; LOG_DEBUG(log, "Loading mutation: {} entry, commands size: {}", it->name(), entry.commands.size()); + + if (!entry.tid.isPrehistoric() && !entry.csn) + { + if (auto csn = TransactionLog::getCSN(entry.tid)) + { + /// Transaction is committed => mutation is finished, but let's load it anyway (so it will be shown in system.mutations) + entry.writeCSN(csn); + } + else + { + TransactionLog::assertTIDIsNotOutdated(entry.tid); + LOG_DEBUG(log, "Mutation entry {} was created by transaction {}, but it was not committed. Removing mutation entry", + it->name(), entry.tid); + disk->removeFile(it->path()); + continue; + } + } + auto inserted = current_mutations_by_version.try_emplace(block_number, std::move(entry)).second; if (!inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutation {} already exists, it's a bug", block_number); @@ -694,6 +770,7 @@ std::shared_ptr StorageMergeTree::selectPartsToMerge( String * out_disable_reason, TableLockHolder & /* table_lock_holder */, std::unique_lock & lock, + const MergeTreeTransactionPtr & txn, bool optimize_skip_merged_partitions, SelectPartsDecision * select_decision_out) { @@ -708,8 +785,24 @@ std::shared_ptr StorageMergeTree::selectPartsToMerge( CurrentlyMergingPartsTaggerPtr merging_tagger; MergeList::EntryPtr merge_entry; - auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, String *) -> bool + auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, const MergeTreeTransaction * tx, String *) -> bool { + if (tx) + { + /// Cannot merge parts if some of them are not visible in current snapshot + /// TODO Transactions: We can use simplified visibility rules (without CSN lookup) here + if (left && !left->version.isVisible(tx->getSnapshot(), Tx::EmptyTID)) + return false; + if (right && !right->version.isVisible(tx->getSnapshot(), Tx::EmptyTID)) + return false; + + /// Do not try to merge parts that are locked for removal (merge will probably fail) + if (left && left->version.isRemovalTIDLocked()) + return false; + if (right && right->version.isRemovalTIDLocked()) + return false; + } + /// This predicate is checked for the first part of each range. /// (left = nullptr, right = "first part of partition") if (!left) @@ -736,6 +829,7 @@ std::shared_ptr StorageMergeTree::selectPartsToMerge( max_source_parts_size, can_merge, merge_with_ttl_allowed, + txn, out_disable_reason); } else if (out_disable_reason) @@ -746,7 +840,7 @@ std::shared_ptr StorageMergeTree::selectPartsToMerge( while (true) { select_decision = merger_mutator.selectAllPartsToMergeWithinPartition( - future_part, can_merge, partition_id, final, metadata_snapshot, out_disable_reason, optimize_skip_merged_partitions); + future_part, can_merge, partition_id, final, metadata_snapshot, txn, out_disable_reason, optimize_skip_merged_partitions); auto timeout_ms = getSettings()->lock_acquire_timeout_for_background_operations.totalMilliseconds(); auto timeout = std::chrono::milliseconds(timeout_ms); @@ -804,6 +898,7 @@ bool StorageMergeTree::merge( bool final, bool deduplicate, const Names & deduplicate_by_columns, + const MergeTreeTransactionPtr & txn, String * out_disable_reason, bool optimize_skip_merged_partitions) { @@ -827,6 +922,7 @@ bool StorageMergeTree::merge( out_disable_reason, table_lock_holder, lock, + txn, optimize_skip_merged_partitions, &select_decision); } @@ -838,11 +934,12 @@ bool StorageMergeTree::merge( if (!merge_mutate_entry) return false; - /// Copying a vector of columns `deduplicate bu columns. auto task = std::make_shared( *this, metadata_snapshot, deduplicate, deduplicate_by_columns, merge_mutate_entry, table_lock_holder, [](bool){}); + task->setCurrentTransaction(MergeTreeTransactionHolder{}, MergeTreeTransactionPtr{txn}); + executeHere(task); return true; @@ -882,7 +979,7 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( } auto mutations_end_it = current_mutations_by_version.end(); - for (const auto & part : getDataPartsVector()) + for (const auto & part : getDataPartsVectorForInternalUsage()) { if (currently_merging_mutating_parts.count(part)) continue; @@ -902,12 +999,28 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( continue; } - auto commands = MutationCommands::create(); + TransactionID first_mutation_tid = mutations_begin_it->second.tid; + MergeTreeTransactionPtr txn = tryGetTransactionForMutation(mutations_begin_it->second, log); + assert(txn || first_mutation_tid.isPrehistoric()); + if (txn) + { + /// Mutate visible parts only + /// NOTE Do not mutate visible parts in Outdated state, because it does not make sense: + /// mutation will fail anyway due to serialization error. + if (!part->version.isVisible(*txn)) + continue; + } + + auto commands = MutationCommands::create(); size_t current_ast_elements = 0; auto last_mutation_to_apply = mutations_end_it; for (auto it = mutations_begin_it; it != mutations_end_it; ++it) { + /// Do not squash mutations from different transactions to be able to commit/rollback them independently. + if (first_mutation_tid != it->second.tid) + break; + size_t commands_size = 0; MutationCommands commands_for_size_validation; for (const auto & command : it->second.commands) @@ -994,13 +1107,14 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( future_part->type = part->getType(); tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}), *this, metadata_snapshot, true); - return std::make_shared(future_part, std::move(tagger), commands); + return std::make_shared(future_part, std::move(tagger), commands, txn); } } return {}; } + bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assignee) //-V657 { if (shutdown_called) @@ -1014,13 +1128,22 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign auto share_lock = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + MergeTreeTransactionHolder transaction_for_merge; + MergeTreeTransactionPtr txn; + if (transactions_enabled.load(std::memory_order_relaxed)) + { + /// TODO Transactions: avoid beginning transaction if there is nothing to merge. + txn = TransactionLog::instance().beginTransaction(); + transaction_for_merge = MergeTreeTransactionHolder{txn, /* autocommit = */ true}; + } + bool has_mutations = false; { std::unique_lock lock(currently_processing_in_background_mutex); if (merger_mutator.merges_blocker.isCancelled()) return false; - merge_entry = selectPartsToMerge(metadata_snapshot, false, {}, false, nullptr, share_lock, lock); + merge_entry = selectPartsToMerge(metadata_snapshot, false, {}, false, nullptr, share_lock, lock, txn); if (!merge_entry) mutate_entry = selectPartsToMutate(metadata_snapshot, nullptr, share_lock, lock, were_some_mutations_skipped); @@ -1038,6 +1161,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign if (merge_entry) { auto task = std::make_shared(*this, metadata_snapshot, false, Names{}, merge_entry, share_lock, common_assignee_trigger); + task->setCurrentTransaction(std::move(transaction_for_merge), std::move(txn)); assignee.scheduleMergeMutateTask(task); return true; } @@ -1112,54 +1236,52 @@ UInt64 StorageMergeTree::getCurrentMutationVersion( size_t StorageMergeTree::clearOldMutations(bool truncate) { - const auto settings = getSettings(); - if (!truncate && !settings->finished_mutations_to_keep) - return 0; + size_t finished_mutations_to_keep = truncate ? 0 : getSettings()->finished_mutations_to_keep; std::vector mutations_to_delete; { std::unique_lock lock(currently_processing_in_background_mutex); - if (!truncate && current_mutations_by_version.size() <= settings->finished_mutations_to_keep) + if (current_mutations_by_version.size() <= finished_mutations_to_keep) return 0; auto end_it = current_mutations_by_version.end(); auto begin_it = current_mutations_by_version.begin(); - size_t to_delete_count = std::distance(begin_it, end_it); - if (!truncate) + if (std::optional min_version = getMinPartDataVersion()) + end_it = current_mutations_by_version.upper_bound(*min_version); + + size_t done_count = std::distance(begin_it, end_it); + if (done_count <= finished_mutations_to_keep) + return 0; + + auto part_versions_with_names = getSortedPartVersionsWithNames(lock); + + for (auto it = begin_it; it != end_it; ++it) { - if (std::optional min_version = getMinPartDataVersion()) - end_it = current_mutations_by_version.upper_bound(*min_version); + const PartVersionWithName needle{static_cast(it->first), ""}; + auto versions_it = std::lower_bound( + part_versions_with_names.begin(), part_versions_with_names.end(), needle); - size_t done_count = std::distance(begin_it, end_it); - if (done_count <= settings->finished_mutations_to_keep) - return 0; - - auto part_versions_with_names = getSortedPartVersionsWithNames(lock); - - for (auto it = begin_it; it != end_it; ++it) + if (versions_it != part_versions_with_names.begin() || !it->second.tid.isPrehistoric()) { - const PartVersionWithName needle{static_cast(it->first), ""}; - auto versions_it = std::lower_bound( - part_versions_with_names.begin(), part_versions_with_names.end(), needle); - - if (versions_it != part_versions_with_names.begin()) - { - done_count = std::distance(begin_it, it); - break; - } + done_count = std::distance(begin_it, it); + break; } - - if (done_count <= settings->finished_mutations_to_keep) - return 0; - - to_delete_count = done_count - settings->finished_mutations_to_keep; } + if (done_count <= finished_mutations_to_keep) + return 0; + + size_t to_delete_count = done_count - finished_mutations_to_keep; + auto it = begin_it; for (size_t i = 0; i < to_delete_count; ++i) { + const auto & tid = it->second.tid; + if (!tid.isPrehistoric() && !TransactionLog::getCSN(tid)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot remove mutation {}, because transaction {} is not committed. It's a bug", + it->first, tid); mutations_to_delete.push_back(std::move(it->second)); it = current_mutations_by_version.erase(it); } @@ -1178,7 +1300,7 @@ std::vector StorageMergeTree::getSortedPa std::unique_lock & currently_processing_in_background_mutex_lock) const { std::vector part_versions_with_names; - auto data_parts = getDataPartsVector(); + auto data_parts = getDataPartsVectorForInternalUsage(); part_versions_with_names.reserve(data_parts.size()); for (const auto & part : data_parts) part_versions_with_names.emplace_back(PartVersionWithName{ @@ -1206,10 +1328,12 @@ bool StorageMergeTree::optimize( LOG_DEBUG(log, "DEDUPLICATE BY ('{}')", fmt::join(deduplicate_by_columns, "', '")); } + auto txn = local_context->getCurrentTransaction(); + String disable_reason; if (!partition && final) { - DataPartsVector data_parts = getDataPartsVector(); + DataPartsVector data_parts = getVisibleDataPartsVector(local_context); std::unordered_set partition_ids; for (const DataPartPtr & part : data_parts) @@ -1223,6 +1347,7 @@ bool StorageMergeTree::optimize( true, deduplicate, deduplicate_by_columns, + txn, &disable_reason, local_context->getSettingsRef().optimize_skip_merged_partitions)) { @@ -1249,6 +1374,7 @@ bool StorageMergeTree::optimize( final, deduplicate, deduplicate_by_columns, + txn, &disable_reason, local_context->getSettingsRef().optimize_skip_merged_partitions)) { @@ -1291,7 +1417,7 @@ ActionLock StorageMergeTree::stopMergesAndWait() } -MergeTreeDataPartPtr StorageMergeTree::outdatePart(const String & part_name, bool force) +MergeTreeDataPartPtr StorageMergeTree::outdatePart(MergeTreeTransaction * txn, const String & part_name, bool force) { if (force) @@ -1301,7 +1427,7 @@ MergeTreeDataPartPtr StorageMergeTree::outdatePart(const String & part_name, boo auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}); if (!part) throw Exception("Part " + part_name + " not found, won't try to drop it.", ErrorCodes::NO_SUCH_DATA_PART); - removePartsFromWorkingSet({part}, true); + removePartsFromWorkingSet(txn, {part}, true); return part; } else @@ -1320,22 +1446,22 @@ MergeTreeDataPartPtr StorageMergeTree::outdatePart(const String & part_name, boo if (currently_merging_mutating_parts.count(part)) return nullptr; - removePartsFromWorkingSet({part}, true); + removePartsFromWorkingSet(txn, {part}, true); return part; } } void StorageMergeTree::dropPartNoWaitNoThrow(const String & part_name) { - if (auto part = outdatePart(part_name, /*force=*/ false)) + if (auto part = outdatePart(NO_TRANSACTION_RAW, part_name, /*force=*/ false)) dropPartsImpl({part}, /*detach=*/ false); /// Else nothing to do, part was removed in some different way } -void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPtr /*query_context*/) +void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPtr query_context) { - if (auto part = outdatePart(part_name, /*force=*/ true)) + if (auto part = outdatePart(query_context->getCurrentTransaction().get(), part_name, /*force=*/ true)) dropPartsImpl({part}, detach); } @@ -1349,14 +1475,14 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont auto merge_blocker = stopMergesAndWait(); const auto * partition_ast = partition->as(); if (partition_ast && partition_ast->all) - parts_to_remove = getDataPartsVector(); + parts_to_remove = getVisibleDataPartsVector(local_context); else { String partition_id = getPartitionIDFromQuery(partition, local_context); - parts_to_remove = getDataPartsVectorInPartition(MergeTreeDataPartState::Active, partition_id); + parts_to_remove = getVisibleDataPartsVectorInPartition(local_context, partition_id); } /// TODO should we throw an exception if parts_to_remove is empty? - removePartsFromWorkingSet(parts_to_remove, true); + removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), parts_to_remove, true); } dropPartsImpl(std::move(parts_to_remove), detach); @@ -1405,8 +1531,14 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( for (size_t i = 0; i < loaded_parts.size(); ++i) { LOG_INFO(log, "Attaching part {} from {}", loaded_parts[i]->name, renamed_parts.old_and_new_names[i].new_name); + /// We should write version metadata on part creation to distinguish it from parts that were created without transaction. + auto txn = local_context->getCurrentTransaction(); + TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID; + loaded_parts[i]->version.setCreationTID(tid, nullptr); + loaded_parts[i]->storeVersionMetadata(); + String old_name = renamed_parts.old_and_new_names[i].old_name; - renameTempPartAndAdd(loaded_parts[i], &increment); + renameTempPartAndAdd(loaded_parts[i], local_context->getCurrentTransaction().get(), &increment); renamed_parts.old_and_new_names[i].old_name.clear(); results.push_back(PartitionCommandResultInfo{ @@ -1435,7 +1567,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, my_metadata_snapshot); String partition_id = getPartitionIDFromQuery(partition, local_context); - DataPartsVector src_parts = src_data.getDataPartsVectorInPartition(MergeTreeDataPartState::Active, partition_id); + DataPartsVector src_parts = src_data.getVisibleDataPartsVectorInPartition(local_context, partition_id); MutableDataPartsVector dst_parts; static const String TMP_PREFIX = "tmp_replace_from_"; @@ -1451,7 +1583,8 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con Int64 temp_index = insert_increment.get(); MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); - dst_parts.emplace_back(cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, my_metadata_snapshot)); + auto dst_part = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, my_metadata_snapshot, local_context->getCurrentTransaction()); + dst_parts.emplace_back(std::move(dst_part)); } /// ATTACH empty part set @@ -1473,19 +1606,19 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con { /// Here we use the transaction just like RAII since rare errors in renameTempPartAndReplace() are possible /// and we should be able to rollback already added (Precomitted) parts - Transaction transaction(*this); + Transaction transaction(*this, local_context->getCurrentTransaction().get()); auto data_parts_lock = lockParts(); /// Populate transaction for (MutableDataPartPtr & part : dst_parts) - renameTempPartAndReplace(part, &increment, &transaction, data_parts_lock); + renameTempPartAndReplace(part, local_context->getCurrentTransaction().get(), &increment, &transaction, data_parts_lock); transaction.commit(&data_parts_lock); /// If it is REPLACE (not ATTACH), remove all parts which max_block_number less then min_block_number of the first new block if (replace) - removePartsInRangeFromWorkingSet(drop_range, true, data_parts_lock); + removePartsInRangeFromWorkingSet(local_context->getCurrentTransaction().get(), drop_range, true, data_parts_lock); } PartLog::addNewParts(getContext(), dst_parts, watch.elapsed()); @@ -1520,7 +1653,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const MergeTreeData & src_data = dest_table_storage->checkStructureAndGetMergeTreeData(*this, metadata_snapshot, dest_metadata_snapshot); String partition_id = getPartitionIDFromQuery(partition, local_context); - DataPartsVector src_parts = src_data.getDataPartsVectorInPartition(MergeTreeDataPartState::Active, partition_id); + DataPartsVector src_parts = src_data.getVisibleDataPartsVectorInPartition(local_context, partition_id); MutableDataPartsVector dst_parts; static const String TMP_PREFIX = "tmp_move_from_"; @@ -1536,7 +1669,8 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const Int64 temp_index = insert_increment.get(); MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); - dst_parts.emplace_back(dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot)); + auto dst_part = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, local_context->getCurrentTransaction()); + dst_parts.emplace_back(std::move(dst_part)); } /// empty part set @@ -1547,7 +1681,7 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const try { { - Transaction transaction(*dest_table_storage); + Transaction transaction(*dest_table_storage, local_context->getCurrentTransaction().get()); auto src_data_parts_lock = lockParts(); auto dest_data_parts_lock = dest_table_storage->lockParts(); @@ -1556,9 +1690,9 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const DataPartsLock lock(mutex); for (MutableDataPartPtr & part : dst_parts) - dest_table_storage->renameTempPartAndReplace(part, &dest_table_storage->increment, &transaction, lock); + dest_table_storage->renameTempPartAndReplace(part, local_context->getCurrentTransaction().get(), &dest_table_storage->increment, &transaction, lock); - removePartsFromWorkingSet(src_parts, true, lock); + removePartsFromWorkingSet(local_context->getCurrentTransaction().get(), src_parts, true, lock); transaction.commit(&lock); } @@ -1600,10 +1734,10 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ if (const auto & check_query = query->as(); check_query.partition) { String partition_id = getPartitionIDFromQuery(check_query.partition, local_context); - data_parts = getDataPartsVectorInPartition(MergeTreeDataPartState::Active, partition_id); + data_parts = getVisibleDataPartsVectorInPartition(local_context, partition_id); } else - data_parts = getDataPartsVector(); + data_parts = getVisibleDataPartsVector(local_context); for (auto & part : data_parts) { @@ -1618,7 +1752,7 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ { auto calculated_checksums = checkDataPart(part, false); calculated_checksums.checkEqual(part->checksums, true); - auto out = disk->writeFile(tmp_checksums_path, 4096); + auto out = disk->writeFile(tmp_checksums_path, 4096, WriteMode::Rewrite, local_context->getWriteSettings()); part->checksums.write(*out); disk->moveFile(tmp_checksums_path, checksums_path); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index a1fc310d912..74fb954bb6d 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -41,6 +41,8 @@ public: bool supportsIndexForIn() const override { return true; } + bool supportsTransactions() const override { return true; } + Pipe read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, @@ -152,19 +154,29 @@ private: * If aggressive - when selects parts don't takes into account their ratio size and novelty (used for OPTIMIZE query). * Returns true if merge is finished successfully. */ - bool merge(bool aggressive, const String & partition_id, bool final, bool deduplicate, const Names & deduplicate_by_columns, String * out_disable_reason = nullptr, bool optimize_skip_merged_partitions = false); + bool merge( + bool aggressive, + const String & partition_id, + bool final, bool deduplicate, + const Names & deduplicate_by_columns, + const MergeTreeTransactionPtr & txn, + String * out_disable_reason = nullptr, + bool optimize_skip_merged_partitions = false); /// Make part state outdated and queue it to remove without timeout /// If force, then stop merges and block them until part state became outdated. Throw exception if part doesn't exists /// If not force, then take merges selector and check that part is not participating in background operations. - MergeTreeDataPartPtr outdatePart(const String & part_name, bool force); + MergeTreeDataPartPtr outdatePart(MergeTreeTransaction * txn, const String & part_name, bool force); ActionLock stopMergesAndWait(); /// Allocate block number for new mutation, write mutation to disk /// and into in-memory structures. Wake up merge-mutation task. - Int64 startMutation(const MutationCommands & commands, String & mutation_file_name); + Int64 startMutation(const MutationCommands & commands, ContextPtr query_context); /// Wait until mutation with version will finish mutation for all parts - void waitForMutation(Int64 version, const String & file_name); + void waitForMutation(Int64 version); + void waitForMutation(const String & mutation_id) override; + void setMutationCSN(const String & mutation_id, CSN csn) override; + friend struct CurrentlyMergingPartsTagger; @@ -187,6 +199,7 @@ private: String * disable_reason, TableLockHolder & table_lock_holder, std::unique_lock & lock, + const MergeTreeTransactionPtr & txn, bool optimize_skip_merged_partitions = false, SelectPartsDecision * select_decision_out = nullptr); @@ -236,7 +249,6 @@ private: std::unique_ptr getDefaultSettings() const override; - friend class MergeTreeProjectionBlockOutputStream; friend class MergeTreeSink; friend class MergeTreeData; friend class MergePlainMergeTreeTask; diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index d5af81ced3d..b1eb190bd1d 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -41,7 +41,7 @@ public: /// TODO: Find a way to support projections for StorageProxy info.ignore_projections = true; const auto & nested_metadata = getNested()->getInMemoryMetadataPtr(); - return getNested()->getQueryProcessingStage(context, to_stage, getNested()->getStorageSnapshot(nested_metadata), info); + return getNested()->getQueryProcessingStage(context, to_stage, getNested()->getStorageSnapshot(nested_metadata, context), info); } Pipe watch( @@ -149,7 +149,6 @@ public: CheckResults checkData(const ASTPtr & query , ContextPtr context) override { return getNested()->checkData(query, context); } void checkTableCanBeDropped() const override { getNested()->checkTableCanBeDropped(); } - void checkPartitionCanBeDropped(const ASTPtr & partition) override { getNested()->checkPartitionCanBeDropped(partition); } bool storesDataOnDisk() const override { return getNested()->storesDataOnDisk(); } Strings getDataPaths() const override { return getNested()->getDataPaths(); } StoragePolicyPtr getStoragePolicy() const override { return getNested()->getStoragePolicy(); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 39840f91325..66a5baf555b 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -375,7 +375,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( if (!attach) { - if (!getDataParts().empty()) + if (!getDataPartsForInternalUsage().empty()) throw Exception("Data directory for table already contains data parts" " - probably it was unclean DROP table or manual intervention." " You must either clear directory by hand or use ATTACH TABLE" @@ -1589,9 +1589,10 @@ bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry) { LOG_TRACE(log, "Found valid local part for {}, preparing the transaction", part->name); - Transaction transaction(*this); + Transaction transaction(*this, NO_TRANSACTION_RAW); - renameTempPartAndReplace(part, nullptr, &transaction); + part->version.setCreationTID(Tx::PrehistoricTID, nullptr); + renameTempPartAndReplace(part, NO_TRANSACTION_RAW, nullptr, &transaction); checkPartChecksumsAndCommit(transaction, part); writePartLog(PartLogElement::Type::NEW_PART, {}, 0 /** log entry is fake so we don't measure the time */, @@ -1885,7 +1886,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) DataPartsVector parts_to_remove; { auto data_parts_lock = lockParts(); - parts_to_remove = removePartsInRangeFromWorkingSet(drop_range_info, true, data_parts_lock); + parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range_info, true, data_parts_lock); if (parts_to_remove.empty()) { if (!drop_range_info.isFakeDropRangePart()) @@ -2018,7 +2019,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (parts_to_add.empty() && replace) { - parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, data_parts_lock); + parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, true, data_parts_lock); String parts_to_remove_str; for (const auto & part : parts_to_remove) { @@ -2198,7 +2199,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) throw Exception("Checksums of " + part_desc->src_table_part->name + " is suddenly changed", ErrorCodes::UNFINISHED); part_desc->res_part = cloneAndLoadDataPartOnSameDisk( - part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info, metadata_snapshot); + part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info, metadata_snapshot, NO_TRANSACTION_PTR); } else if (!part_desc->replica.empty()) { @@ -2238,12 +2239,12 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) { /// Commit parts auto zookeeper = getZooKeeper(); - Transaction transaction(*this); + Transaction transaction(*this, NO_TRANSACTION_RAW); Coordination::Requests ops; for (PartDescriptionPtr & part_desc : final_parts) { - renameTempPartAndReplace(part_desc->res_part, nullptr, &transaction); + renameTempPartAndReplace(part_desc->res_part, NO_TRANSACTION_RAW, nullptr, &transaction); getCommitPartOps(ops, part_desc->res_part); } @@ -2256,7 +2257,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) transaction.commit(&data_parts_lock); if (replace) { - parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, data_parts_lock); + parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, true, data_parts_lock); String parts_to_remove_str; for (const auto & part : parts_to_remove) { @@ -2510,7 +2511,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo removePartsFromZooKeeperWithRetries(parts_to_remove_from_zk); - auto local_active_parts = getDataParts(); + auto local_active_parts = getDataPartsForInternalUsage(); DataPartsVector parts_to_remove_from_working_set; @@ -2534,7 +2535,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo } } - removePartsFromWorkingSet(parts_to_remove_from_working_set, true); + removePartsFromWorkingSet(NO_TRANSACTION_RAW, parts_to_remove_from_working_set, true); std::unordered_set created_get_parts; @@ -3119,7 +3120,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() future_merged_part->uuid = UUIDHelpers::generateV4(); if (max_source_parts_size_for_merge > 0 && - merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred, merge_with_ttl_allowed, nullptr) == SelectPartsDecision::SELECTED) + merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred, merge_with_ttl_allowed, NO_TRANSACTION_PTR, nullptr) == SelectPartsDecision::SELECTED) { create_result = createLogEntryToMergeParts( zookeeper, @@ -3138,7 +3139,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() && merges_and_mutations_queued.mutations < storage_settings_ptr->max_replicated_mutations_in_queue) { /// Choose a part to mutate. - DataPartsVector data_parts = getDataPartsVector(); + DataPartsVector data_parts = getDataPartsVectorForInternalUsage(); for (const auto & part : data_parts) { if (part->getBytesOnDisk() > max_source_part_size_for_mutation) @@ -3397,7 +3398,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n /// It's quite dangerous, so clone covered parts to detached. auto broken_part_info = MergeTreePartInfo::fromPartName(part_name, format_version); - auto partition_range = getDataPartsVectorInPartition(MergeTreeDataPartState::Active, broken_part_info.partition_id); + auto partition_range = getVisibleDataPartsVectorInPartition(getContext(), broken_part_info.partition_id); for (const auto & part : partition_range) { if (!broken_part_info.contains(part->info)) @@ -3930,7 +3931,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora { get_part = [&, part_to_clone]() { - return cloneAndLoadDataPartOnSameDisk(part_to_clone, "tmp_clone_", part_info, metadata_snapshot); + return cloneAndLoadDataPartOnSameDisk(part_to_clone, "tmp_clone_", part_info, metadata_snapshot, NO_TRANSACTION_PTR); }; } else @@ -3973,8 +3974,8 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Stora if (!to_detached) { - Transaction transaction(*this); - renameTempPartAndReplace(part, nullptr, &transaction); + Transaction transaction(*this, NO_TRANSACTION_RAW); + renameTempPartAndReplace(part, NO_TRANSACTION_RAW, nullptr, &transaction); replaced_parts = checkPartChecksumsAndCommit(transaction, part); @@ -4254,7 +4255,7 @@ ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock StorageReplicatedMerg { ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock max_added_blocks; - for (const auto & data_part : getDataParts()) + for (const auto & data_part : getDataPartsForInternalUsage()) { max_added_blocks[data_part->info.partition_id] = std::max(max_added_blocks[data_part->info.partition_id], data_part->info.max_block); @@ -4368,6 +4369,7 @@ void StorageReplicatedMergeTree::foreachActiveParts(Func && func, bool select_se max_added_blocks = getMaxAddedBlocks(); auto lock = lockParts(); + /// TODO Transactions: should we count visible parts only? for (const auto & part : getDataPartsStateRange(DataPartState::Active)) { if (part->isEmpty()) @@ -4485,12 +4487,12 @@ bool StorageReplicatedMergeTree::optimize( { select_decision = merger_mutator.selectPartsToMerge( future_merged_part, /* aggressive */ true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, - can_merge, /* merge_with_ttl_allowed */ false, &disable_reason); + can_merge, /* merge_with_ttl_allowed */ false, NO_TRANSACTION_PTR, &disable_reason); } else { select_decision = merger_mutator.selectAllPartsToMergeWithinPartition( - future_merged_part, can_merge, partition_id, final, metadata_snapshot, + future_merged_part, can_merge, partition_id, final, metadata_snapshot, NO_TRANSACTION_PTR, &disable_reason, query_context->getSettingsRef().optimize_skip_merged_partitions); } @@ -4539,7 +4541,7 @@ bool StorageReplicatedMergeTree::optimize( bool assigned = false; if (!partition && final) { - DataPartsVector data_parts = getDataPartsVector(); + DataPartsVector data_parts = getVisibleDataPartsVector(query_context); std::unordered_set partition_ids; for (const DataPartPtr & part : data_parts) @@ -6330,7 +6332,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( String partition_id = getPartitionIDFromQuery(partition, query_context); /// NOTE: Some covered parts may be missing in src_all_parts if corresponding log entries are not executed yet. - DataPartsVector src_all_parts = src_data.getDataPartsVectorInPartition(MergeTreeDataPartState::Active, partition_id); + DataPartsVector src_all_parts = src_data.getVisibleDataPartsVectorInPartition(query_context, partition_id); LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size()); @@ -6402,7 +6404,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( UInt64 index = lock->getNumber(); MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - auto dst_part = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, metadata_snapshot); + auto dst_part = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, metadata_snapshot, NO_TRANSACTION_PTR); src_parts.emplace_back(src_part); dst_parts.emplace_back(dst_part); @@ -6458,12 +6460,12 @@ void StorageReplicatedMergeTree::replacePartitionFrom( ops.emplace_back(zkutil::makeSetRequest(fs::path(zookeeper_path) / "log", "", -1)); ops.emplace_back(zkutil::makeCreateRequest(fs::path(zookeeper_path) / "log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); - Transaction transaction(*this); + Transaction transaction(*this, NO_TRANSACTION_RAW); { auto data_parts_lock = lockParts(); for (MutableDataPartPtr & part : dst_parts) - renameTempPartAndReplace(part, nullptr, &transaction, data_parts_lock); + renameTempPartAndReplace(part, query_context->getCurrentTransaction().get(), nullptr, &transaction, data_parts_lock); } Coordination::Error code = zookeeper->tryMulti(ops, op_results); @@ -6483,10 +6485,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom( { auto data_parts_lock = lockParts(); - transaction.commit(&data_parts_lock); if (replace) - parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, data_parts_lock); + parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, true, data_parts_lock); } PartLog::addNewParts(getContext(), dst_parts, watch.elapsed()); @@ -6614,7 +6615,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta UInt64 index = lock->getNumber(); MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - auto dst_part = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot); + auto dst_part = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, NO_TRANSACTION_PTR); src_parts.emplace_back(src_part); dst_parts.emplace_back(dst_part); @@ -6676,7 +6677,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta entry.toString(), zkutil::CreateMode::PersistentSequential)); { - Transaction transaction(*dest_table_storage); + Transaction transaction(*dest_table_storage, NO_TRANSACTION_RAW); auto src_data_parts_lock = lockParts(); auto dest_data_parts_lock = dest_table_storage->lockParts(); @@ -6685,7 +6686,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta DataPartsLock lock(mutex); for (MutableDataPartPtr & part : dst_parts) - dest_table_storage->renameTempPartAndReplace(part, nullptr, &transaction, lock); + dest_table_storage->renameTempPartAndReplace(part, query_context->getCurrentTransaction().get(), nullptr, &transaction, lock); Coordination::Error code = zookeeper->tryMulti(ops, op_results); if (code == Coordination::Error::ZBADVERSION) @@ -6693,7 +6694,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta else zkutil::KeeperMultiException::check(code, ops, op_results); - parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, lock); + parts_to_remove = removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, true, lock); transaction.commit(&lock); } @@ -7153,10 +7154,10 @@ CheckResults StorageReplicatedMergeTree::checkData(const ASTPtr & query, Context if (const auto & check_query = query->as(); check_query.partition) { String partition_id = getPartitionIDFromQuery(check_query.partition, local_context); - data_parts = getDataPartsVectorInPartition(MergeTreeDataPartState::Active, partition_id); + data_parts = getVisibleDataPartsVectorInPartition(local_context, partition_id); } else - data_parts = getDataPartsVector(); + data_parts = getVisibleDataPartsVector(local_context); for (auto & part : data_parts) { @@ -7192,7 +7193,7 @@ void StorageReplicatedMergeTree::checkBrokenDisks() LOG_INFO(log, "Scanning parts to recover on broken disk {} with path {}", disk_ptr->getName(), disk_ptr->getPath()); if (!parts) - parts = std::make_unique(getDataPartsVector()); + parts = std::make_unique(getDataPartsVectorForInternalUsage()); for (auto & part : *parts) { @@ -7730,7 +7731,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP const auto & index_factory = MergeTreeIndexFactory::instance(); MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, - index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec); + index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec, NO_TRANSACTION_PTR); bool sync_on_insert = settings->fsync_after_insert; @@ -7742,8 +7743,8 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP try { - MergeTreeData::Transaction transaction(*this); - auto replaced_parts = renameTempPartAndReplace(new_data_part, nullptr, &transaction); + MergeTreeData::Transaction transaction(*this, NO_TRANSACTION_RAW); + auto replaced_parts = renameTempPartAndReplace(new_data_part, NO_TRANSACTION_RAW, nullptr, &transaction); if (!replaced_parts.empty()) { diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 4616421b24a..8bc1b160e77 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -104,7 +104,7 @@ public: for (const auto & c : column_names) cnames += c + " "; auto storage = getNested(); - auto nested_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr()); + auto nested_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context); auto pipe = storage->read(column_names, nested_snapshot, query_info, context, processed_stage, max_block_size, num_streams); if (!pipe.empty() && add_conversion) diff --git a/src/Storages/StorageValues.h b/src/Storages/StorageValues.h index 21156ec27cc..a4e18657f9f 100644 --- a/src/Storages/StorageValues.h +++ b/src/Storages/StorageValues.h @@ -31,6 +31,10 @@ public: { return virtuals; } + + /// FIXME probably it should return false, but StorageValues is used in ExecutingInnerQueryFromViewTransform (whatever it is) + bool supportsTransactions() const override { return true; } + private: Block res_block; NamesAndTypesList virtuals; diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp index 1df8b43515e..111ea343398 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp @@ -71,13 +71,18 @@ static String clusterNameFromDDLQuery(ContextPtr context, const DDLTask & task) { const char * begin = task.entry.query.data(); const char * end = begin + task.entry.query.size(); - String cluster_name; - ParserQuery parser_query(end); + const auto & settings = context->getSettingsRef(); + String description = fmt::format("from {}", task.entry_path); + ParserQuery parser_query(end, settings.allow_settings_after_format_in_insert); ASTPtr query = parseQuery(parser_query, begin, end, description, - context->getSettingsRef().max_query_size, context->getSettingsRef().max_parser_depth); + settings.max_query_size, + settings.max_parser_depth); + + String cluster_name; if (const auto * query_on_cluster = dynamic_cast(query.get())) cluster_name = query_on_cluster->cluster; + return cluster_name; } diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 3841abc2f2d..fb6a055c6e5 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { @@ -22,6 +23,7 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_) {"total_space", std::make_shared()}, {"keep_free_space", std::make_shared()}, {"type", std::make_shared()}, + {"cache_path", std::make_shared()}, })); setInMemoryMetadata(storage_metadata); } @@ -43,6 +45,7 @@ Pipe StorageSystemDisks::read( MutableColumnPtr col_total = ColumnUInt64::create(); MutableColumnPtr col_keep = ColumnUInt64::create(); MutableColumnPtr col_type = ColumnString::create(); + MutableColumnPtr col_cache_path = ColumnString::create(); for (const auto & [disk_name, disk_ptr] : context->getDisksMap()) { @@ -52,6 +55,12 @@ Pipe StorageSystemDisks::read( col_total->insert(disk_ptr->getTotalSpace()); col_keep->insert(disk_ptr->getKeepingFreeSpace()); col_type->insert(toString(disk_ptr->getType())); + + String cache_path; + if (disk_ptr->isRemote()) + cache_path = disk_ptr->getCacheBasePath(); + + col_cache_path->insert(cache_path); } Columns res_columns; @@ -61,6 +70,7 @@ Pipe StorageSystemDisks::read( res_columns.emplace_back(std::move(col_total)); res_columns.emplace_back(std::move(col_keep)); res_columns.emplace_back(std::move(col_type)); + res_columns.emplace_back(std::move(col_cache_path)); UInt64 num_rows = res_columns.at(0)->size(); Chunk chunk(std::move(res_columns), num_rows); diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp new file mode 100644 index 00000000000..f3ead8a95f0 --- /dev/null +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -0,0 +1,60 @@ +#include "StorageSystemFilesystemCache.h" +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +NamesAndTypesList StorageSystemFilesystemCache::getNamesAndTypes() +{ + return { + {"cache_base_path", std::make_shared()}, + {"cache_path", std::make_shared()}, + {"file_segment_range_begin", std::make_shared()}, + {"file_segment_range_end", std::make_shared()}, + {"size", std::make_shared()}, + {"state", std::make_shared()}, + {"cache_hits", std::make_shared()}, + {"references", std::make_shared()}, + {"downloaded_size", std::make_shared()}, + }; +} + +StorageSystemFilesystemCache::StorageSystemFilesystemCache(const StorageID & table_id_) + : IStorageSystemOneBlock(table_id_) +{ +} + +void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +{ + auto caches = FileCacheFactory::instance().getAll(); + + for (const auto & [cache_base_path, cache_data] : caches) + { + const auto & cache = cache_data.cache; + auto file_segments = cache->getSnapshot(); + + for (const auto & file_segment : file_segments) + { + res_columns[0]->insert(cache_base_path); + res_columns[1]->insert(cache->getPathInLocalCache(file_segment->key(), file_segment->offset())); + + const auto & range = file_segment->range(); + res_columns[2]->insert(range.left); + res_columns[3]->insert(range.right); + res_columns[4]->insert(range.size()); + res_columns[5]->insert(FileSegment::stateToString(file_segment->state())); + res_columns[6]->insert(file_segment->getHitsCount()); + res_columns[7]->insert(file_segment->getRefCount()); + res_columns[8]->insert(file_segment->getDownloadedSize()); + } + } +} + +} diff --git a/src/Storages/System/StorageSystemFilesystemCache.h b/src/Storages/System/StorageSystemFilesystemCache.h new file mode 100644 index 00000000000..1d9d28d7b50 --- /dev/null +++ b/src/Storages/System/StorageSystemFilesystemCache.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/** + * Usgae example. How to get mapping from local paths to remote paths: + * SELECT + * cache_path, + * cache_hits, + * remote_path, + * local_path, + * file_segment_range_begin, + * file_segment_range_end, + * size, + * state + * FROM + * ( + * SELECT + * arrayJoin(cache_paths) AS cache_path, + * local_path, + * remote_path + * FROM system.remote_data_paths + * ) AS data_paths + * INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path + * FORMAT Vertical + */ + +class StorageSystemFilesystemCache final : public shared_ptr_helper, + public IStorageSystemOneBlock +{ + friend struct shared_ptr_helper; +public: + std::string getName() const override { return "SystemFilesystemCache"; } + + static NamesAndTypesList getNamesAndTypes(); + +protected: + explicit StorageSystemFilesystemCache(const StorageID & table_id_); + + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; +}; + +} diff --git a/src/Storages/System/StorageSystemNumbers.h b/src/Storages/System/StorageSystemNumbers.h index 5f3a12c530d..043a0e7c0c2 100644 --- a/src/Storages/System/StorageSystemNumbers.h +++ b/src/Storages/System/StorageSystemNumbers.h @@ -40,6 +40,7 @@ public: bool hasEvenlyDistributedRead() const override { return true; } bool isSystemStorage() const override { return true; } + bool supportsTransactions() const override { return true; } private: bool multithreaded; diff --git a/src/Storages/System/StorageSystemOne.h b/src/Storages/System/StorageSystemOne.h index b0ca389b76f..1c3d5c9ab80 100644 --- a/src/Storages/System/StorageSystemOne.h +++ b/src/Storages/System/StorageSystemOne.h @@ -32,6 +32,8 @@ public: bool isSystemStorage() const override { return true; } + bool supportsTransactions() const override { return true; } + protected: explicit StorageSystemOne(const StorageID & table_id_); }; diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 2efb337b302..6674de06c07 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB { @@ -81,13 +82,19 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"rows_where_ttl_info.max", std::make_shared(std::make_shared())}, {"projections", std::make_shared(std::make_shared())}, + + {"visible", std::make_shared()}, + {"creation_tid", getTransactionIDDataType()}, + {"removal_tid", getTransactionIDDataType()}, + {"creation_csn", std::make_shared()}, + {"removal_csn", std::make_shared()}, } ) { } void StorageSystemParts::processNextStorage( - MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) + ContextPtr context, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) { using State = IMergeTreeDataPart::State; MergeTreeData::DataPartStateVector all_parts_state; @@ -272,6 +279,29 @@ void StorageSystemParts::processNextStorage( if (columns_mask[src_index++]) columns[res_index++]->insert(projections); + if (columns_mask[src_index++]) + { + auto txn = context->getCurrentTransaction(); + if (txn) + columns[res_index++]->insert(part->version.isVisible(*txn)); + else + columns[res_index++]->insert(part_state == State::Active); + } + + auto get_tid_as_field = [](const TransactionID & tid) -> Field + { + return Tuple{tid.start_csn, tid.local_tid, tid.host_id}; + }; + + if (columns_mask[src_index++]) + columns[res_index++]->insert(get_tid_as_field(part->version.creation_tid)); + if (columns_mask[src_index++]) + columns[res_index++]->insert(get_tid_as_field(part->version.getRemovalTID())); + if (columns_mask[src_index++]) + columns[res_index++]->insert(part->version.creation_csn.load(std::memory_order_relaxed)); + if (columns_mask[src_index++]) + columns[res_index++]->insert(part->version.removal_csn.load(std::memory_order_relaxed)); + /// _state column should be the latest. /// Do not use part->getState*, it can be changed from different thread if (has_state_column) diff --git a/src/Storages/System/StorageSystemParts.h b/src/Storages/System/StorageSystemParts.h index 69e957c5a1e..f7b069c9516 100644 --- a/src/Storages/System/StorageSystemParts.h +++ b/src/Storages/System/StorageSystemParts.h @@ -21,7 +21,7 @@ public: protected: explicit StorageSystemParts(const StorageID & table_id_); void processNextStorage( - MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) override; + ContextPtr context, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) override; }; } diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 26b1b151073..1462cc58a42 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -57,12 +57,12 @@ StoragesInfo::getParts(MergeTreeData::DataPartStateVector & state, bool has_stat { /// If has_state_column is requested, return all states. if (!has_state_column) - return data->getDataPartsVector({State::Active, State::Outdated}, &state, require_projection_parts); + return data->getDataPartsVectorForInternalUsage({State::Active, State::Outdated}, &state, require_projection_parts); return data->getAllDataPartsVector(&state, require_projection_parts); } - return data->getDataPartsVector({State::Active}, &state, require_projection_parts); + return data->getDataPartsVectorForInternalUsage({State::Active}, &state, require_projection_parts); } StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) @@ -268,7 +268,7 @@ Pipe StorageSystemPartsBase::read( while (StoragesInfo info = stream.next()) { - processNextStorage(res_columns, columns_mask, info, has_state_column); + processNextStorage(context, res_columns, columns_mask, info, has_state_column); } if (has_state_column) diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 0daa01a6b99..3eb8c7c8711 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -78,7 +78,7 @@ protected: StorageSystemPartsBase(const StorageID & table_id_, NamesAndTypesList && columns_); virtual void - processNextStorage(MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) = 0; + processNextStorage(ContextPtr context, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) = 0; }; } diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index a9341abb9cd..7f648054da2 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -76,7 +76,7 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_ } void StorageSystemPartsColumns::processNextStorage( - MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) + ContextPtr, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) { /// Prepare information about columns in storage. struct ColumnInfo diff --git a/src/Storages/System/StorageSystemPartsColumns.h b/src/Storages/System/StorageSystemPartsColumns.h index b8c52ca16ef..9cdd2befb40 100644 --- a/src/Storages/System/StorageSystemPartsColumns.h +++ b/src/Storages/System/StorageSystemPartsColumns.h @@ -23,7 +23,7 @@ public: protected: explicit StorageSystemPartsColumns(const StorageID & table_id_); void processNextStorage( - MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) override; + ContextPtr context, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) override; }; } diff --git a/src/Storages/System/StorageSystemProjectionParts.cpp b/src/Storages/System/StorageSystemProjectionParts.cpp index d15acc97cb1..591277c1a66 100644 --- a/src/Storages/System/StorageSystemProjectionParts.cpp +++ b/src/Storages/System/StorageSystemProjectionParts.cpp @@ -90,7 +90,7 @@ StorageSystemProjectionParts::StorageSystemProjectionParts(const StorageID & tab } void StorageSystemProjectionParts::processNextStorage( - MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) + ContextPtr, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) { using State = IMergeTreeDataPart::State; MergeTreeData::DataPartStateVector all_parts_state; diff --git a/src/Storages/System/StorageSystemProjectionParts.h b/src/Storages/System/StorageSystemProjectionParts.h index a8db87fbba4..be31d08b24e 100644 --- a/src/Storages/System/StorageSystemProjectionParts.h +++ b/src/Storages/System/StorageSystemProjectionParts.h @@ -21,6 +21,6 @@ public: protected: explicit StorageSystemProjectionParts(const StorageID & table_id_); void processNextStorage( - MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) override; + ContextPtr context, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) override; }; } diff --git a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp index 29c877733d8..8f6db9fcbe8 100644 --- a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp +++ b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp @@ -73,7 +73,7 @@ StorageSystemProjectionPartsColumns::StorageSystemProjectionPartsColumns(const S } void StorageSystemProjectionPartsColumns::processNextStorage( - MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) + ContextPtr, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) { /// Prepare information about columns in storage. struct ColumnInfo diff --git a/src/Storages/System/StorageSystemProjectionPartsColumns.h b/src/Storages/System/StorageSystemProjectionPartsColumns.h index 5679f5e9093..ade07b70a23 100644 --- a/src/Storages/System/StorageSystemProjectionPartsColumns.h +++ b/src/Storages/System/StorageSystemProjectionPartsColumns.h @@ -23,6 +23,6 @@ public: protected: explicit StorageSystemProjectionPartsColumns(const StorageID & table_id_); void processNextStorage( - MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) override; + ContextPtr context, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) override; }; } diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp new file mode 100644 index 00000000000..410d1ae6dd4 --- /dev/null +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -0,0 +1,100 @@ +#include "StorageSystemRemoteDataPaths.h" +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +StorageSystemRemoteDataPaths::StorageSystemRemoteDataPaths(const StorageID & table_id_) + : IStorage(table_id_) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription( + { + {"disk_name", std::make_shared()}, + {"path", std::make_shared()}, + {"cache_base_path", std::make_shared()}, + {"local_path", std::make_shared()}, + {"remote_path", std::make_shared()}, + {"cache_paths", std::make_shared(std::make_shared())}, + })); + setInMemoryMetadata(storage_metadata); +} + +Pipe StorageSystemRemoteDataPaths::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & /*query_info*/, + ContextPtr context, + QueryProcessingStage::Enum /*processed_stage*/, + const size_t /*max_block_size*/, + const unsigned /*num_streams*/) +{ + storage_snapshot->check(column_names); + + MutableColumnPtr col_disk_name = ColumnString::create(); + MutableColumnPtr col_base_path = ColumnString::create(); + MutableColumnPtr col_cache_base_path = ColumnString::create(); + MutableColumnPtr col_local_path = ColumnString::create(); + MutableColumnPtr col_remote_path = ColumnString::create(); + MutableColumnPtr col_cache_paths = ColumnArray::create(ColumnString::create()); + + auto disks = context->getDisksMap(); + for (const auto & [disk_name, disk] : disks) + { + if (disk->isRemote()) + { + std::vector remote_paths_by_local_path; + disk->getRemotePathsRecursive("store", remote_paths_by_local_path); + + FileCachePtr cache; + auto cache_base_path = disk->getCacheBasePath(); + if (!cache_base_path.empty()) + cache = FileCacheFactory::instance().get(cache_base_path); + + for (const auto & [local_path, remote_paths] : remote_paths_by_local_path) + { + for (const auto & remote_path : remote_paths) + { + col_disk_name->insert(disk_name); + col_base_path->insert(disk->getPath()); + col_cache_base_path->insert(cache_base_path); + col_local_path->insert(local_path); + col_remote_path->insert(remote_path); + + if (cache) + { + auto cache_paths = cache->tryGetCachePaths(cache->hash(remote_path)); + col_cache_paths->insert(Array(cache_paths.begin(), cache_paths.end())); + } + else + { + col_cache_paths->insertDefault(); + } + } + } + } + } + + Columns res_columns; + res_columns.emplace_back(std::move(col_disk_name)); + res_columns.emplace_back(std::move(col_base_path)); + res_columns.emplace_back(std::move(col_cache_base_path)); + res_columns.emplace_back(std::move(col_local_path)); + res_columns.emplace_back(std::move(col_remote_path)); + res_columns.emplace_back(std::move(col_cache_paths)); + + UInt64 num_rows = res_columns.at(0)->size(); + Chunk chunk(std::move(res_columns), num_rows); + + return Pipe(std::make_shared(storage_snapshot->metadata->getSampleBlock(), std::move(chunk))); +} + +} diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.h b/src/Storages/System/StorageSystemRemoteDataPaths.h new file mode 100644 index 00000000000..0057dcf12f2 --- /dev/null +++ b/src/Storages/System/StorageSystemRemoteDataPaths.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class StorageSystemRemoteDataPaths : public shared_ptr_helper, public IStorage +{ + friend struct shared_ptr_helper; +public: + std::string getName() const override { return "SystemRemoteDataPaths"; } + + bool isSystemStorage() const override { return true; } + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + +protected: + explicit StorageSystemRemoteDataPaths(const StorageID & table_id_); +}; + +} diff --git a/src/Storages/System/StorageSystemTransactions.cpp b/src/Storages/System/StorageSystemTransactions.cpp new file mode 100644 index 00000000000..396fc875f74 --- /dev/null +++ b/src/Storages/System/StorageSystemTransactions.cpp @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +static DataTypePtr getStateEnumType() +{ + return std::make_shared( + DataTypeEnum8::Values + { + {"RUNNING", static_cast(MergeTreeTransaction::State::RUNNING)}, + {"COMMITTED", static_cast(MergeTreeTransaction::State::COMMITTED)}, + {"ROLLED_BACK", static_cast(MergeTreeTransaction::State::ROLLED_BACK)}, + }); +} + +NamesAndTypesList StorageSystemTransactions::getNamesAndTypes() +{ + return { + {"tid", getTransactionIDDataType()}, + {"tid_hash", std::make_shared()}, + {"elapsed", std::make_shared()}, + {"is_readonly", std::make_shared()}, + {"state", getStateEnumType()}, + }; +} + +void StorageSystemTransactions::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const +{ + auto list = TransactionLog::instance().getTransactionsList(); + for (const auto & elem : list) + { + auto txn = elem.second; + size_t i = 0; + res_columns[i++]->insert(Tuple{txn->tid.start_csn, txn->tid.local_tid, txn->tid.host_id}); + res_columns[i++]->insert(txn->tid.getHash()); + res_columns[i++]->insert(txn->elapsedSeconds()); + res_columns[i++]->insert(txn->isReadOnly()); + res_columns[i++]->insert(txn->getState()); + } +} + +} diff --git a/src/Storages/System/StorageSystemTransactions.h b/src/Storages/System/StorageSystemTransactions.h new file mode 100644 index 00000000000..38244815549 --- /dev/null +++ b/src/Storages/System/StorageSystemTransactions.h @@ -0,0 +1,27 @@ +#pragma once +#include +#include + + +namespace DB +{ + +class Context; + +class StorageSystemTransactions final : public shared_ptr_helper, public IStorageSystemOneBlock +{ + friend struct shared_ptr_helper; +public: + String getName() const override { return "SystemTransactions"; } + + static NamesAndTypesList getNamesAndTypes(); + + static NamesAndAliases getNamesAndAliases() { return {}; } + +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; +}; + +} diff --git a/src/Storages/System/StorageSystemZeros.h b/src/Storages/System/StorageSystemZeros.h index bf72352b7be..067e6c7217d 100644 --- a/src/Storages/System/StorageSystemZeros.h +++ b/src/Storages/System/StorageSystemZeros.h @@ -31,6 +31,7 @@ public: bool hasEvenlyDistributedRead() const override { return true; } bool isSystemStorage() const override { return true; } + bool supportsTransactions() const override { return true; } private: bool multithreaded; diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 96ee000c0bf..6558890b8c4 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -68,6 +68,9 @@ #include #include #include +#include +#include +#include #ifdef OS_LINUX #include @@ -161,9 +164,14 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "replicated_fetches"); attach(context, system_database, "part_moves_between_shards"); attach(context, system_database, "asynchronous_inserts"); + attach(context, system_database, "filesystem_cache"); + attach(context, system_database, "remote_data_paths"); if (has_zookeeper) attach(context, system_database, "zookeeper"); + + if (context->getConfigRef().getInt("allow_experimental_transactions", 0)) + attach(context, system_database, "transactions"); } void attachSystemTablesAsync(ContextPtr context, IDatabase & system_database, AsynchronousMetrics & async_metrics) diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index 4cda9d6c9f5..66922afdd9c 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -117,7 +117,7 @@ std::string readData(DB::StoragePtr & table, const DB::ContextPtr context) { using namespace DB; auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto storage_snapshot = table->getStorageSnapshot(metadata_snapshot); + auto storage_snapshot = table->getStorageSnapshot(metadata_snapshot, context); Names column_names; column_names.push_back("a"); diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index e9d660156e1..74b2d3f89c3 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -6,6 +6,7 @@ import json import os import sys import time +from shutil import rmtree from typing import List, Optional, Tuple from env_helper import REPO_COPY, TEMP_PATH, CACHES_PATH, IMAGES_PATH @@ -64,22 +65,22 @@ def get_packager_cmd( ) if build_config["build_type"]: - cmd += " --build-type={}".format(build_config["build_type"]) + cmd += f" --build-type={build_config['build_type']}" if build_config["sanitizer"]: - cmd += " --sanitizer={}".format(build_config["sanitizer"]) + cmd += f" --sanitizer={build_config['sanitizer']}" if build_config["splitted"] == "splitted": cmd += " --split-binary" if build_config["tidy"] == "enable": cmd += " --clang-tidy" cmd += " --cache=ccache" - cmd += " --ccache_dir={}".format(ccache_path) + cmd += f" --ccache_dir={ccache_path}" if "additional_pkgs" in build_config and build_config["additional_pkgs"]: cmd += " --additional-pkgs" - cmd += " --docker-image-version={}".format(image_version) - cmd += " --version={}".format(build_version) + cmd += f" --docker-image-version={image_version}" + cmd += f" --version={build_version}" if _can_export_binaries(build_config): cmd += " --with-binaries=tests" @@ -136,7 +137,7 @@ def create_json_artifact( success: bool, ): subprocess.check_call( - f"echo 'BUILD_NAME=build_urls_{build_name}' >> $GITHUB_ENV", shell=True + f"echo 'BUILD_URLS=build_urls_{build_name}' >> $GITHUB_ENV", shell=True ) result = { @@ -149,16 +150,9 @@ def create_json_artifact( json_name = "build_urls_" + build_name + ".json" - print( - "Dump json report", - result, - "to", - json_name, - "with env", - "build_urls_{build_name}", - ) + print(f"Dump json report {result} to {json_name} with env build_urls_{build_name}") - with open(os.path.join(temp_path, json_name), "w") as build_links: + with open(os.path.join(temp_path, json_name), "w", encoding="utf-8") as build_links: json.dump(result, build_links) @@ -277,7 +271,12 @@ def main(): ccache_path = os.path.join(CACHES_PATH, build_name + "_ccache") logging.info("Will try to fetch cache for our build") - get_ccache_if_not_exists(ccache_path, s3_helper, pr_info.number, TEMP_PATH) + try: + get_ccache_if_not_exists(ccache_path, s3_helper, pr_info.number, TEMP_PATH) + except Exception as e: + # In case there are issues with ccache, remove the path and do not fail a build + logging.info("Failed to get ccache, building without it. Error: %s", e) + rmtree(ccache_path, ignore_errors=True) if not os.path.exists(ccache_path): logging.info("cache was not fetched, will create empty dir") @@ -337,7 +336,7 @@ def main(): print("::notice ::Build URLs: {}".format("\n".join(build_urls))) - print("::notice ::Log URL: {}".format(log_url)) + print(f"::notice ::Log URL: {log_url}") create_json_artifact( TEMP_PATH, build_name, log_url, build_urls, build_config, elapsed, success diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py index ab5702569d4..8deed464012 100644 --- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py +++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from collections import namedtuple +from typing import Any, Dict import json import time @@ -21,6 +22,8 @@ API_URL = "https://api.github.com/repos/ClickHouse/ClickHouse" MAX_RETRY = 5 +DEBUG_INFO = {} # type: Dict[str, Any] + def get_installation_id(jwt_token): headers = { @@ -110,6 +113,10 @@ def get_workflows_description_for_pull_request(pull_request_event): workflow_descriptions = [] for workflow in workflows_data: + DEBUG_INFO["workflow"] = workflow + # Some time workflow["head_repository"]["full_name"] is None + if workflow["head_repository"] is None: + continue # unfortunately we cannot filter workflows from forks in request to API # so doing it manually if ( @@ -162,7 +169,8 @@ def exec_workflow_url(urls_to_cancel, token): def main(event): token = get_token_from_aws() - event_data = json.loads(event["body"]) + DEBUG_INFO["event_body"] = event["body"] + event_data = event["body"] print("Got event for PR", event_data["number"]) action = event_data["action"] @@ -210,4 +218,9 @@ def main(event): def handler(event, _): - main(event) + try: + main(event) + except Exception: + for name, value in DEBUG_INFO.items(): + print(f"Value of {name}: ", value) + raise diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 789abc30b0c..0ddafc4b582 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -24,7 +24,7 @@ from version_helper import ( ClickHouseVersion, get_tagged_versions, get_version_from_repo, - get_version_from_string, + version_arg, ) TEMP_PATH = p.join(RUNNER_TEMP, "docker_images_check") @@ -49,7 +49,8 @@ def parse_args() -> argparse.Namespace: "--version", type=version_arg, default=get_version_from_repo().string, - help="a version to build", + help="a version to build, automaticaly got from version_helper, accepts either " + "tag ('refs/tags/' is removed automatically) or a normal 22.2.2.2 format", ) parser.add_argument( "--release-type", @@ -111,13 +112,6 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() -def version_arg(version: str) -> ClickHouseVersion: - try: - return get_version_from_string(version) - except ValueError as e: - raise argparse.ArgumentTypeError(e) - - def auto_release_type(version: ClickHouseVersion, release_type: str) -> str: if release_type != "auto": return release_type @@ -125,7 +119,7 @@ def auto_release_type(version: ClickHouseVersion, release_type: str) -> str: git_versions = get_tagged_versions() reference_version = git_versions[0] for i in reversed(range(len(git_versions))): - if git_versions[i] < version: + if git_versions[i] <= version: if i == len(git_versions) - 1: return "latest" reference_version = git_versions[i + 1] @@ -209,7 +203,7 @@ def build_and_push_image( result = [] if os != "ubuntu": tag += f"-{os}" - init_args = ["docker", "buildx", "build"] + init_args = ["docker", "buildx", "build", "--build-arg BUILDKIT_INLINE_CACHE=1"] if push: init_args.append("--push") init_args.append("--output=type=image,push-by-digest=true") diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 8a0c4a0dfce..cabf12082e8 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -9,7 +9,7 @@ from pr_info import PRInfo import docker_images_check as di with patch("git_helper.Git"): - from version_helper import get_version_from_string, get_tagged_versions + from version_helper import get_version_from_string import docker_server as ds # di.logging.basicConfig(level=di.logging.INFO) @@ -254,7 +254,8 @@ class TestDockerServer(unittest.TestCase): get_version_from_string("2.2.1.1"), get_version_from_string("2.2.2.1"), ] - cases = ( + + cases_less = ( (get_version_from_string("1.0.1.1"), "minor"), (get_version_from_string("1.1.2.1"), "minor"), (get_version_from_string("1.3.1.1"), "major"), @@ -263,8 +264,18 @@ class TestDockerServer(unittest.TestCase): (get_version_from_string("2.2.3.1"), "latest"), (get_version_from_string("2.3.1.1"), "latest"), ) - _ = get_tagged_versions() - for case in cases: + for case in cases_less: + release = ds.auto_release_type(case[0], "auto") + self.assertEqual(case[1], release) + + cases_equal = ( + (get_version_from_string("1.1.1.1"), "minor"), + (get_version_from_string("1.2.1.1"), "major"), + (get_version_from_string("2.1.1.1"), "minor"), + (get_version_from_string("2.2.1.1"), "patch"), + (get_version_from_string("2.2.2.1"), "latest"), + ) + for case in cases_equal: release = ds.auto_release_type(case[0], "auto") self.assertEqual(case[1], release) diff --git a/tests/ci/git_helper.py b/tests/ci/git_helper.py index 5c02ea53cf8..50414ffb470 100644 --- a/tests/ci/git_helper.py +++ b/tests/ci/git_helper.py @@ -93,7 +93,7 @@ class Git: if value == "": return if not self._tag_pattern.match(value): - raise Exception(f"last tag {value} doesn't match the pattern") + raise ValueError(f"last tag {value} doesn't match the pattern") @property def latest_tag(self) -> str: diff --git a/tests/ci/push_to_artifactory.py b/tests/ci/push_to_artifactory.py index ccbf1918602..be977bdd907 100755 --- a/tests/ci/push_to_artifactory.py +++ b/tests/ci/push_to_artifactory.py @@ -40,13 +40,12 @@ class Packages: "_".join((name, version, arch + ".deb")) for name, arch in self.packages ) - rev = "2" self.rpm = tuple( - "-".join((name, version, rev + "." + self.rpm_arch[arch] + ".rpm")) + "-".join((name, version + "." + self.rpm_arch[arch] + ".rpm")) for name, arch in self.packages ) - self.tgz = tuple(f"{name}-{version}.tgz" for name, _ in self.packages) + self.tgz = tuple(f"{name}-{version}-amd64.tgz" for name, _ in self.packages) def arch(self, deb_pkg: str) -> str: if deb_pkg not in self.deb: diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 6f00232be77..d76e28f96f9 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -79,6 +79,7 @@ TRUSTED_CONTRIBUTORS = { "ilejn", # Arenadata, responsible for Kerberized Kafka "thomoco", # ClickHouse "BoloniniD", # Seasoned contributor, HSE + "tonickkozlov", # Cloudflare ] } @@ -255,6 +256,9 @@ if __name__ == "__main__": elif SUBMODULE_CHANGED_LABEL in pr_info.labels: pr_labels_to_remove.append(SUBMODULE_CHANGED_LABEL) + print( + "change labels: add {}, remove {}".format(pr_labels_to_add, pr_labels_to_remove) + ) if pr_labels_to_add: post_labels(gh, pr_info, pr_labels_to_add) diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 7db96cfde7c..9c67191e4c3 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import logging import os.path as p -from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, ArgumentTypeError from typing import Dict, List, Tuple, Union from git_helper import Git, removeprefix @@ -150,6 +150,9 @@ class ClickHouseVersion: return False + def __le__(self, other: "ClickHouseVersion") -> bool: + return self == other or self < other + class VersionType: LTS = "lts" @@ -217,6 +220,20 @@ def get_version_from_tag(tag: str) -> ClickHouseVersion: return get_version_from_string(tag) +def version_arg(version: str) -> ClickHouseVersion: + version = removeprefix(version, "refs/tags/") + try: + return get_version_from_string(version) + except ValueError: + pass + try: + return get_version_from_tag(version) + except ValueError: + pass + + raise ArgumentTypeError(f"version {version} does not match tag of plain version") + + def get_tagged_versions() -> List[ClickHouseVersion]: versions = [] for tag in git.get_tags(): diff --git a/tests/ci/version_test.py b/tests/ci/version_test.py new file mode 100644 index 00000000000..86a2d58c3c8 --- /dev/null +++ b/tests/ci/version_test.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python + +import unittest +from argparse import ArgumentTypeError + +import version_helper as vh + + +class TestFunctions(unittest.TestCase): + def test_version_arg(self): + cases = ( + ("0.0.0.0", vh.get_version_from_string("0.0.0.0")), + ("1.1.1.2", vh.get_version_from_string("1.1.1.2")), + ("v1.1.1.2-lts", vh.get_version_from_string("1.1.1.2")), + ("v1.1.1.2-prestable", vh.get_version_from_string("1.1.1.2")), + ("v1.1.1.2-stable", vh.get_version_from_string("1.1.1.2")), + ("v1.1.1.2-testing", vh.get_version_from_string("1.1.1.2")), + ("refs/tags/v1.1.1.2-testing", vh.get_version_from_string("1.1.1.2")), + ) + for case in cases: + version = vh.version_arg(case[0]) + self.assertEqual(case[1], version) + error_cases = ( + "0.0.0", + "1.1.1.a", + "1.1.1.1.1", + "1.1.1.2-testing", + "v1.1.1.2-testin", + "refs/tags/v1.1.1.2-testin", + ) + for case in error_cases: + with self.assertRaises(ArgumentTypeError): + version = vh.version_arg(case[0]) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 3efb37cc27d..b93416beda6 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -204,6 +204,17 @@ def get_processlist(args): else: return clickhouse_execute_json(args, 'SHOW PROCESSLIST') +def get_transactions_list(args): + try: + if args.replicated_database: + return clickhouse_execute_json(args, """ + SELECT materialize((hostName(), tcpPort())) as host, * + FROM clusterAllReplicas('test_cluster_database_replicated', system.transactions) + """) + else: + return clickhouse_execute_json(args, 'select * from system.transactions') + except Exception as e: + return f"Cannot get list of transactions: {e}" def get_processlist_after_test(args): log_comment = args.testcase_basename @@ -1395,6 +1406,7 @@ def main(args): if processlist: print(colored("\nFound hung queries in processlist:", args, "red", attrs=["bold"])) print(json.dumps(processlist, indent=4)) + print(get_transactions_list(args)) print_stacktraces() exit_code.value = 1 diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 2e43f735605..3dd4811b1bf 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -7,7 +7,9 @@ clickhouse clickhouse 1 + 0 22548578304 + 1 diff --git a/tests/config/config.d/transactions.xml b/tests/config/config.d/transactions.xml new file mode 100644 index 00000000000..19810986ea1 --- /dev/null +++ b/tests/config/config.d/transactions.xml @@ -0,0 +1,13 @@ + + 42 + + + + + + system + transactions_info_log
+ 7500 +
+ +
diff --git a/tests/config/install.sh b/tests/config/install.sh index c499ffa88f7..8f28fb386ff 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -34,6 +34,7 @@ ln -sf $SRC_PATH/config.d/merge_tree.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/tcp_with_proxy.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/top_level_domains_lists.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/top_level_domains_path.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/transactions.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/encryption.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/CORS.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/zookeeper_log.xml $DEST_SERVER_PATH/config.d/ @@ -85,6 +86,7 @@ fi if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/ + ln -sf $SRC_PATH/users.d/s3_cache.xml $DEST_SERVER_PATH/users.d/ fi if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then diff --git a/tests/config/users.d/s3_cache.xml b/tests/config/users.d/s3_cache.xml new file mode 100644 index 00000000000..69b24ecbbc4 --- /dev/null +++ b/tests/config/users.d/s3_cache.xml @@ -0,0 +1,8 @@ + + + + 1 + 1 + + + diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 50c9e0d894a..78274e0232e 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2919,7 +2919,8 @@ class ClickHouseInstance: else: params = params.copy() - params["query"] = sql + if sql is not None: + params["query"] = sql auth = None if user and password: diff --git a/tests/integration/test_MemoryTracking/configs/no_system_log.xml b/tests/integration/test_MemoryTracking/configs/no_system_log.xml index bd1b9f9a49e..3218dae4dc7 100644 --- a/tests/integration/test_MemoryTracking/configs/no_system_log.xml +++ b/tests/integration/test_MemoryTracking/configs/no_system_log.xml @@ -14,4 +14,5 @@ + diff --git a/tests/integration/test_format_schema_on_server/test.py b/tests/integration/test_format_schema_on_server/test.py index 7001d53ccf2..0b7d8837ad3 100644 --- a/tests/integration/test_format_schema_on_server/test.py +++ b/tests/integration/test_format_schema_on_server/test.py @@ -29,7 +29,7 @@ def create_simple_table(): def test_protobuf_format_input(started_cluster): create_simple_table() instance.http_query( - "INSERT INTO test.simple FORMAT Protobuf SETTINGS format_schema='simple:KeyValuePair'", + "INSERT INTO test.simple SETTINGS format_schema='simple:KeyValuePair' FORMAT Protobuf", "\x07\x08\x01\x12\x03abc\x07\x08\x02\x12\x03def", ) assert instance.query("SELECT * from test.simple") == "1\tabc\n2\tdef\n" diff --git a/tests/integration/test_storage_postgresql_replica/test.py b/tests/integration/test_storage_postgresql_replica/test.py index e51a9335a65..aca33816d75 100644 --- a/tests/integration/test_storage_postgresql_replica/test.py +++ b/tests/integration/test_storage_postgresql_replica/test.py @@ -24,25 +24,25 @@ postgres_table_template = """ """ queries = [ - "INSERT INTO postgresql_replica select i, i from generate_series(0, 10000) as t(i);", - "DELETE FROM postgresql_replica WHERE (value*value) % 3 = 0;", - "UPDATE postgresql_replica SET value = value + 125 WHERE key % 2 = 0;", - "UPDATE postgresql_replica SET key=key+20000 WHERE key%2=0", - "INSERT INTO postgresql_replica select i, i from generate_series(40000, 50000) as t(i);", - "DELETE FROM postgresql_replica WHERE key % 10 = 0;", - "UPDATE postgresql_replica SET value = value + 101 WHERE key % 2 = 1;", - "UPDATE postgresql_replica SET key=key+80000 WHERE key%2=1", - "DELETE FROM postgresql_replica WHERE value % 2 = 0;", - "UPDATE postgresql_replica SET value = value + 2000 WHERE key % 5 = 0;", - "INSERT INTO postgresql_replica select i, i from generate_series(200000, 250000) as t(i);", - "DELETE FROM postgresql_replica WHERE value % 3 = 0;", - "UPDATE postgresql_replica SET value = value * 2 WHERE key % 3 = 0;", - "UPDATE postgresql_replica SET key=key+500000 WHERE key%2=1", - "INSERT INTO postgresql_replica select i, i from generate_series(1000000, 1050000) as t(i);", - "DELETE FROM postgresql_replica WHERE value % 9 = 2;", - "UPDATE postgresql_replica SET key=key+10000000", - "UPDATE postgresql_replica SET value = value + 2 WHERE key % 3 = 1;", - "DELETE FROM postgresql_replica WHERE value%5 = 0;", + "INSERT INTO {} select i, i from generate_series(0, 10000) as t(i);", + "DELETE FROM {} WHERE (value*value) % 3 = 0;", + "UPDATE {} SET value = value + 125 WHERE key % 2 = 0;", + "UPDATE {} SET key=key+20000 WHERE key%2=0", + "INSERT INTO {} select i, i from generate_series(40000, 50000) as t(i);", + "DELETE FROM {} WHERE key % 10 = 0;", + "UPDATE {} SET value = value + 101 WHERE key % 2 = 1;", + "UPDATE {} SET key=key+80000 WHERE key%2=1", + "DELETE FROM {} WHERE value % 2 = 0;", + "UPDATE {} SET value = value + 2000 WHERE key % 5 = 0;", + "INSERT INTO {} select i, i from generate_series(200000, 250000) as t(i);", + "DELETE FROM {} WHERE value % 3 = 0;", + "UPDATE {} SET value = value * 2 WHERE key % 3 = 0;", + "UPDATE {} SET key=key+500000 WHERE key%2=1", + "INSERT INTO {} select i, i from generate_series(1000000, 1050000) as t(i);", + "DELETE FROM {} WHERE value % 9 = 2;", + "UPDATE {} SET key=key+10000000", + "UPDATE {} SET value = value + 2 WHERE key % 3 = 1;", + "DELETE FROM {} WHERE value%5 = 0;", ] @@ -50,20 +50,17 @@ queries = [ def check_tables_are_synchronized( table_name, order_by="key", postgres_database="postgres_database" ): - expected = instance.query( - "select * from {}.{} order by {};".format( - postgres_database, table_name, order_by + while True: + expected = instance.query( + "select * from {}.{} order by {};".format( + postgres_database, table_name, order_by + ) ) - ) - result = instance.query( - "select * from test.{} order by {};".format(table_name, order_by) - ) - - while result != expected: - time.sleep(0.5) result = instance.query( "select * from test.{} order by {};".format(table_name, order_by) ) + if result == expected: + break assert result == expected @@ -103,15 +100,13 @@ def create_clickhouse_postgres_db(ip, port, name="postgres_database"): ) -def create_materialized_table(ip, port): +def create_materialized_table(ip, port, table_name="postgresql_replica"): instance.query( - """ - CREATE TABLE test.postgresql_replica (key UInt64, value UInt64) + f""" + CREATE TABLE test.{table_name} (key Int64, value Int64) ENGINE = MaterializedPostgreSQL( - '{}:{}', 'postgres_database', 'postgresql_replica', 'postgres', 'mysecretpassword') - PRIMARY KEY key; """.format( - ip, port - ) + '{ip}:{port}', 'postgres_database', '{table_name}', 'postgres', 'mysecretpassword') + PRIMARY KEY key; """ ) @@ -176,6 +171,7 @@ def test_initial_load_from_snapshot(started_cluster): cursor.execute("DROP TABLE postgresql_replica;") postgresql_replica_check_result(result, True) + instance.query(f"DROP TABLE test.postgresql_replica NO DELAY") @pytest.mark.timeout(320) @@ -212,6 +208,7 @@ def test_no_connection_at_startup(started_cluster): result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") cursor.execute("DROP TABLE postgresql_replica;") postgresql_replica_check_result(result, True) + instance.query(f"DROP TABLE test.postgresql_replica NO DELAY") @pytest.mark.timeout(320) @@ -250,6 +247,7 @@ def test_detach_attach_is_ok(started_cluster): cursor.execute("DROP TABLE postgresql_replica;") postgresql_replica_check_result(result, True) + instance.query(f"DROP TABLE test.postgresql_replica NO DELAY") @pytest.mark.timeout(320) @@ -303,6 +301,7 @@ def test_replicating_insert_queries(started_cluster): result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;") cursor.execute("DROP TABLE postgresql_replica;") postgresql_replica_check_result(result, True) + instance.query(f"DROP TABLE test.postgresql_replica NO DELAY") @pytest.mark.timeout(320) @@ -659,6 +658,7 @@ def test_virtual_columns(started_cluster): ) print(result) cursor.execute("DROP TABLE postgresql_replica;") + instance.query(f"DROP TABLE test.postgresql_replica NO DELAY") def test_abrupt_connection_loss_while_heavy_replication(started_cluster): @@ -669,17 +669,18 @@ def test_abrupt_connection_loss_while_heavy_replication(started_cluster): database=True, ) cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") + table_name = "postgresql_replica" + create_postgres_table(cursor, table_name) - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") + instance.query(f"DROP TABLE IF EXISTS test.{table_name}") create_materialized_table( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port ) for i in range(len(queries)): - query = queries[i] + query = queries[i].format(table_name) cursor.execute(query) - print("query {}".format(query)) + print("query {}".format(query.format(table_name))) started_cluster.pause_container("postgres1") @@ -692,6 +693,7 @@ def test_abrupt_connection_loss_while_heavy_replication(started_cluster): result = instance.query("SELECT count() FROM test.postgresql_replica") print(result) # Just debug + instance.query(f"DROP TABLE test.postgresql_replica NO DELAY") def test_abrupt_server_restart_while_heavy_replication(started_cluster): @@ -701,26 +703,38 @@ def test_abrupt_server_restart_while_heavy_replication(started_cluster): database=True, ) cursor = conn.cursor() - create_postgres_table(cursor, "postgresql_replica") + table_name = "postgresql_replica_697" + create_postgres_table(cursor, table_name) - instance.query("DROP TABLE IF EXISTS test.postgresql_replica") + instance.query(f"INSERT INTO postgres_database.{table_name} SELECT -1, 1") + instance.query(f"DROP TABLE IF EXISTS test.{table_name} NO DELAY") create_materialized_table( - ip=started_cluster.postgres_ip, port=started_cluster.postgres_port + ip=started_cluster.postgres_ip, + port=started_cluster.postgres_port, + table_name=table_name, ) + n = 1 + while int(instance.query(f"select count() from test.{table_name}")) != 1: + sleep(1) + n += 1 + if n > 10: + break + for query in queries: - cursor.execute(query) - print("query {}".format(query)) + cursor.execute(query.format(table_name)) + print("query {}".format(query.format(table_name))) instance.restart_clickhouse() - result = instance.query("SELECT count() FROM test.postgresql_replica") + result = instance.query(f"SELECT count() FROM test.{table_name}") print(result) # Just debug - check_tables_are_synchronized("postgresql_replica") + check_tables_are_synchronized(table_name) - result = instance.query("SELECT count() FROM test.postgresql_replica") + result = instance.query(f"SELECT count() FROM test.{table_name}") print(result) # Just debug + instance.query(f"DROP TABLE test.{table_name} NO DELAY") def test_drop_table_immediately(started_cluster): @@ -744,7 +758,7 @@ def test_drop_table_immediately(started_cluster): ip=started_cluster.postgres_ip, port=started_cluster.postgres_port ) check_tables_are_synchronized("postgresql_replica") - instance.query("DROP TABLE test.postgresql_replica") + instance.query(f"DROP TABLE test.postgresql_replica NO DELAY") if __name__ == "__main__": diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 71371f13d1e..e32ddd2782b 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -162,7 +162,7 @@ def test_put(started_cluster, maybe_auth, positive, compression): values_csv = "1,2,3\n3,2,1\n78,43,45\n" filename = "test.csv" put_query = f"""insert into table function s3('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{filename}', - {maybe_auth}'CSV', '{table_format}', '{compression}') values settings s3_truncate_on_insert=1 {values}""" + {maybe_auth}'CSV', '{table_format}', '{compression}') settings s3_truncate_on_insert=1 values {values}""" try: run_query(instance, put_query) @@ -362,7 +362,7 @@ def test_put_csv(started_cluster, maybe_auth, positive): instance = started_cluster.instances["dummy"] # type: ClickHouseInstance table_format = "column1 UInt32, column2 UInt32, column3 UInt32" filename = "test.csv" - put_query = "insert into table function s3('http://{}:{}/{}/{}', {}'CSV', '{}') format CSV settings s3_truncate_on_insert=1".format( + put_query = "insert into table function s3('http://{}:{}/{}/{}', {}'CSV', '{}') settings s3_truncate_on_insert=1 format CSV".format( started_cluster.minio_ip, MINIO_INTERNAL_PORT, bucket, @@ -392,7 +392,7 @@ def test_put_get_with_redirect(started_cluster): values = "(1, 1, 1), (1, 1, 1), (11, 11, 11)" values_csv = "1,1,1\n1,1,1\n11,11,11\n" filename = "test.csv" - query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values settings s3_truncate_on_insert=1 {}".format( + query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') settings s3_truncate_on_insert=1 values {}".format( started_cluster.minio_redirect_host, started_cluster.minio_redirect_port, bucket, @@ -431,7 +431,7 @@ def test_put_with_zero_redirect(started_cluster): filename = "test.csv" # Should work without redirect - query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values settings s3_truncate_on_insert=1 {}".format( + query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') settings s3_truncate_on_insert=1 values {}".format( started_cluster.minio_ip, MINIO_INTERNAL_PORT, bucket, @@ -442,7 +442,7 @@ def test_put_with_zero_redirect(started_cluster): run_query(instance, query) # Should not work with redirect - query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values settings s3_truncate_on_insert=1 {}".format( + query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') settings s3_truncate_on_insert=1 values {}".format( started_cluster.minio_redirect_host, started_cluster.minio_redirect_port, bucket, diff --git a/tests/integration/test_transactions/__init__.py b/tests/integration/test_transactions/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_transactions/configs/transactions.xml b/tests/integration/test_transactions/configs/transactions.xml new file mode 100644 index 00000000000..a8d3e8fbf6d --- /dev/null +++ b/tests/integration/test_transactions/configs/transactions.xml @@ -0,0 +1,14 @@ + + 42 + + + 100500 + 0 + + + + system + transactions_info_log
+ 7500 +
+
diff --git a/tests/integration/test_transactions/test.py b/tests/integration/test_transactions/test.py new file mode 100644 index 00000000000..8983e70b4cb --- /dev/null +++ b/tests/integration/test_transactions/test.py @@ -0,0 +1,120 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/transactions.xml"], + stay_alive=True, + with_zookeeper=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def tx(session, query): + params = {"session_id": "session_{}".format(session)} + return node.http_query(None, data=query, params=params) + + +def test_rollback_unfinished_on_restart(start_cluster): + node.query( + "create table mt (n int, m int) engine=MergeTree order by n partition by n % 2" + ) + node.query("insert into mt values (1, 10), (2, 20)") + tid0 = "(1,1,'00000000-0000-0000-0000-000000000000')" + + # it will hold a snapshot and avoid parts cleanup + tx(0, "begin transaction") + + tx(4, "begin transaction") + + tx(1, "begin transaction") + tid1 = tx(1, "select transactionID()").strip() + tx(1, "alter table mt drop partition id '1'") + tx(1, "commit") + + tx(1, "begin transaction") + tid2 = tx(1, "select transactionID()").strip() + tx(1, "insert into mt values (3, 30), (4, 40)") + tx(1, "commit") + + node.query("system flush logs") + csn1 = node.query( + "select csn from system.transactions_info_log where type='Commit' and tid={}".format( + tid1 + ) + ).strip() + csn2 = node.query( + "select csn from system.transactions_info_log where type='Commit' and tid={}".format( + tid2 + ) + ).strip() + + # insert a part before starting mutation and check that it will not be mutated + tx(4, "insert into mt values (9, 90)") + + # check that uncommitted mutation will be rolled back on restart + tx(1, "begin transaction") + tid3 = tx(1, "select transactionID()").strip() + tx(1, "insert into mt values (5, 50)") + tx(1, "alter table mt update m = m+n in partition id '1' where 1") + + # check that uncommitted merge will be rolled back on restart + tx(2, "begin transaction") + tid4 = tx(2, "select transactionID()").strip() + tx( + 2, + "optimize table mt partition id '0' final settings optimize_throw_if_noop = 1", + ) + + # check that uncommitted insert will be rolled back on restart + tx(3, "begin transaction") + tid5 = tx(3, "select transactionID()").strip() + tx(3, "insert into mt values (6, 70)") + + tid6 = tx(4, "select transactionID()").strip() + tx(4, "commit") + node.query("system flush logs") + csn6 = node.query( + "select csn from system.transactions_info_log where type='Commit' and tid={}".format( + tid6 + ) + ).strip() + + node.restart_clickhouse(kill=True) + + assert ( + node.query("select *, _part from mt order by n") + == "2\t20\t0_2_2_0\n3\t30\t1_3_3_0\n4\t40\t0_4_4_0\n9\t90\t1_5_5_0\n" + ) + res = node.query( + "select name, active, creation_tid, 'csn' || toString(creation_csn) || '_', removal_tid, 'csn' || toString(removal_csn) || '_' from system.parts where table='mt' order by name" + ) + res = res.replace(tid0, "tid0") + res = res.replace(tid1, "tid1").replace("csn" + csn1 + "_", "csn_1") + res = res.replace(tid2, "tid2").replace("csn" + csn2 + "_", "csn_2") + res = res.replace(tid3, "tid3") + res = res.replace(tid4, "tid4") + res = res.replace(tid5, "tid5") + res = res.replace(tid6, "tid6").replace("csn" + csn6 + "_", "csn_6") + assert ( + res + == "0_2_2_0\t1\ttid0\tcsn1_\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" + "0_2_4_1\t0\ttid4\tcsn18446744073709551615_\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" + "0_4_4_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" + "0_8_8_0\t0\ttid5\tcsn18446744073709551615_\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" + "1_1_1_0\t0\ttid0\tcsn1_\ttid1\tcsn_1\n" + "1_3_3_0\t1\ttid2\tcsn_2\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" + "1_3_3_0_7\t0\ttid3\tcsn18446744073709551615_\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" + "1_5_5_0\t1\ttid6\tcsn_6\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" + "1_6_6_0\t0\ttid3\tcsn18446744073709551615_\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" + "1_6_6_0_7\t0\ttid3\tcsn18446744073709551615_\t(0,0,'00000000-0000-0000-0000-000000000000')\tcsn0_\n" + ) diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh index 5c2804bdcae..3cd842a10ba 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh @@ -31,7 +31,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format # Check the input in the protobuf format (now the table contains the same data twice). echo -$CLICKHOUSE_CLIENT --query "INSERT INTO array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_array_3dim:ABC'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO array_3dim_protobuf_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_array_3dim:ABC' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825" rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh index bd208195acc..76c5a63c4f2 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh @@ -36,7 +36,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format # Check the input in the protobuf format (now the table contains the same data twice). echo -$CLICKHOUSE_CLIENT --query "INSERT INTO array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_array_of_arrays:AA'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO array_of_arrays_protobuf_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_array_of_arrays:AA' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825" rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh index 8d9e2689e26..1258230610d 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh @@ -33,7 +33,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format # Check the input in the protobuf format (now the table contains the same data twice). echo -$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_enum_mapping:EnumMessage'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_enum_mapping:EnumMessage' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825" rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.sh b/tests/queries/0_stateless/00825_protobuf_format_map.sh index 2a84772bc9f..81d1cf2e305 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_map.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_map.sh @@ -34,7 +34,7 @@ hexdump -C $BINARY_FILE_PATH # Check the input in the protobuf format (now the table contains the same data twice). echo -$CLICKHOUSE_CLIENT --query "INSERT INTO map_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_map:Message'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO map_protobuf_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_map:Message' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM map_protobuf_00825" rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_in_nested.sh b/tests/queries/0_stateless/00825_protobuf_format_nested_in_nested.sh index f1567128cf4..b0a16c2fbba 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_nested_in_nested.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_nested_in_nested.sh @@ -30,7 +30,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format # Check the input in the protobuf format (now the table contains the same data twice). echo -$CLICKHOUSE_CLIENT --query "INSERT INTO nested_in_nested_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_nested_in_nested:MessageType'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO nested_in_nested_protobuf_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_nested_in_nested:MessageType' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM nested_in_nested_protobuf_00825" rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh index 1b94ebd79f2..cf9c47f5ea9 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh @@ -37,7 +37,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format # Check the input in the protobuf format (now the table contains the same data twice). echo -$CLICKHOUSE_CLIENT --query "INSERT INTO nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_nested_optional:Message'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO nested_optional_protobuf_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_nested_optional:Message' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825" rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_no_length_delimiter.sh b/tests/queries/0_stateless/00825_protobuf_format_no_length_delimiter.sh index a1bbdc318d5..0f168c38395 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_no_length_delimiter.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_no_length_delimiter.sh @@ -38,7 +38,7 @@ echo echo echo "Roundtrip:" $CLICKHOUSE_CLIENT --query "CREATE TABLE roundtrip_no_length_delimiter_protobuf_00825 AS no_length_delimiter_protobuf_00825" -$CLICKHOUSE_CLIENT --query "INSERT INTO roundtrip_no_length_delimiter_protobuf_00825 FORMAT ProtobufSingle SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_no_length_delimiter:Message'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO roundtrip_no_length_delimiter_protobuf_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_no_length_delimiter:Message' FORMAT ProtobufSingle" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM roundtrip_no_length_delimiter_protobuf_00825" rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_persons.sh b/tests/queries/0_stateless/00825_protobuf_format_persons.sh index 465b27aa683..df8b149d7be 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_persons.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_persons.sh @@ -68,7 +68,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format echo echo "Roundtrip:" $CLICKHOUSE_CLIENT --query "CREATE TABLE roundtrip_persons_00825 AS persons_00825" -$CLICKHOUSE_CLIENT --query "INSERT INTO roundtrip_persons_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons:Person'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO roundtrip_persons_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons:Person' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM roundtrip_persons_00825 ORDER BY name" rm "$BINARY_FILE_PATH" @@ -82,7 +82,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format echo echo "Roundtrip:" $CLICKHOUSE_CLIENT --query "CREATE TABLE alt_persons_00825 AS persons_00825" -$CLICKHOUSE_CLIENT --query "INSERT INTO alt_persons_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons:AltPerson'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO alt_persons_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons:AltPerson' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM alt_persons_00825 ORDER BY name" rm "$BINARY_FILE_PATH" @@ -96,7 +96,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format echo echo "Roundtrip:" $CLICKHOUSE_CLIENT --query "CREATE TABLE str_persons_00825 AS persons_00825" -$CLICKHOUSE_CLIENT --query "INSERT INTO str_persons_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons:StrPerson'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO str_persons_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons:StrPerson' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM str_persons_00825 ORDER BY name" rm "$BINARY_FILE_PATH" @@ -110,7 +110,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format echo echo "Roundtrip:" $CLICKHOUSE_CLIENT --query "CREATE TABLE syntax2_persons_00825 AS persons_00825" -$CLICKHOUSE_CLIENT --query "INSERT INTO syntax2_persons_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons_syntax2:Syntax2Person'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO syntax2_persons_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_persons_syntax2:Syntax2Person' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM syntax2_persons_00825 ORDER BY name" rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh index ed35df5e98b..1c1dde82b4a 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh @@ -48,7 +48,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format # Check the input in the protobuf format (now the table contains the same data twice). echo -$CLICKHOUSE_CLIENT --query "INSERT INTO table_skipped_column_in_nested_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO table_skipped_column_in_nested_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM table_skipped_column_in_nested_00825 ORDER BY unused1" rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_splitted_nested.sh b/tests/queries/0_stateless/00825_protobuf_format_splitted_nested.sh index 0cf33c91465..f4525ba609c 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_splitted_nested.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_splitted_nested.sh @@ -67,7 +67,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format # Check the input in the protobuf format (now the table contains the same data twice). echo -$CLICKHOUSE_CLIENT --query "INSERT INTO splitted_nested_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_splitted_nested:Some'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO splitted_nested_protobuf_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_splitted_nested:Some' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM splitted_nested_protobuf_00825" rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_squares.sh b/tests/queries/0_stateless/00825_protobuf_format_squares.sh index cc54a62e07f..f04cf60d78b 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_squares.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_squares.sh @@ -28,7 +28,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format # Check the input in the protobuf format (now the table contains the same data twice). echo -$CLICKHOUSE_CLIENT --query "INSERT INTO squares_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_squares:NumberAndSquare'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO squares_protobuf_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_squares:NumberAndSquare' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM squares_protobuf_00825" rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_table_default.sh b/tests/queries/0_stateless/00825_protobuf_format_table_default.sh index 09271e81ab1..5701ee2cb4d 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_table_default.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.sh @@ -34,7 +34,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format # Check the input in the protobuf format (now the table contains the same data twice). echo -$CLICKHOUSE_CLIENT --query "INSERT INTO table_default_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_table_default:Message'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO table_default_protobuf_00825 SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_table_default:Message' FORMAT Protobuf" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z" rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00851_http_insert_json_defaults.sh b/tests/queries/0_stateless/00851_http_insert_json_defaults.sh index 8838200271f..46523173410 100755 --- a/tests/queries/0_stateless/00851_http_insert_json_defaults.sh +++ b/tests/queries/0_stateless/00851_http_insert_json_defaults.sh @@ -8,10 +8,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS defaults" $CLICKHOUSE_CLIENT --query="CREATE TABLE defaults (x UInt32, y UInt32, a DEFAULT x + y, b Float32 DEFAULT round(log(1 + x + y), 5), c UInt32 DEFAULT 42, e MATERIALIZED x + y, f ALIAS x + y) ENGINE = Memory" -echo -ne '{"x":1, "y":1}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT%20INTO%20defaults%20FORMAT%20JSONEachRow%20SETTINGS%20input_format_defaults_for_omitted_fields=1" --data-binary @- -echo -ne '{"x":2, "y":2, "c":2}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+defaults+FORMAT+JSONEachRow+SETTINGS+input_format_defaults_for_omitted_fields=1" --data-binary @- -echo -ne '{"x":3, "y":3, "a":3, "b":3, "c":3}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database=${CLICKHOUSE_DATABASE}&query=INSERT+INTO+defaults+FORMAT+JSONEachRow+SETTINGS+input_format_defaults_for_omitted_fields=1" --data-binary @- -echo -ne '{"x":4} {"y":5, "c":5} {"a":6, "b":6, "c":6}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database=${CLICKHOUSE_DATABASE}&query=INSERT+INTO+defaults+FORMAT+JSONEachRow+SETTINGS+input_format_defaults_for_omitted_fields=1" --data-binary @- +echo -ne '{"x":1, "y":1}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT%20INTO%20defaults%20SETTINGS%20input_format_defaults_for_omitted_fields=1%20FORMAT%20JSONEachRow" --data-binary @- +echo -ne '{"x":2, "y":2, "c":2}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+defaults+SETTINGS+input_format_defaults_for_omitted_fields=1+FORMAT+JSONEachRow" --data-binary @- +echo -ne '{"x":3, "y":3, "a":3, "b":3, "c":3}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database=${CLICKHOUSE_DATABASE}&query=INSERT+INTO+defaults+SETTINGS+input_format_defaults_for_omitted_fields=1+FORMAT+JSONEachRow" --data-binary @- +echo -ne '{"x":4} {"y":5, "c":5} {"a":6, "b":6, "c":6}\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database=${CLICKHOUSE_DATABASE}&query=INSERT+INTO+defaults+SETTINGS+input_format_defaults_for_omitted_fields=1+FORMAT+JSONEachRow" --data-binary @- $CLICKHOUSE_CLIENT --query="SELECT * FROM defaults ORDER BY x, y FORMAT JSONEachRow" $CLICKHOUSE_CLIENT --query="DROP TABLE defaults" diff --git a/tests/queries/0_stateless/00900_orc_arrow_parquet_nested.sh b/tests/queries/0_stateless/00900_orc_arrow_parquet_nested.sh index a5294f06272..e07c8fcff09 100755 --- a/tests/queries/0_stateless/00900_orc_arrow_parquet_nested.sh +++ b/tests/queries/0_stateless/00900_orc_arrow_parquet_nested.sh @@ -20,17 +20,15 @@ for ((i = 0; i < 3; i++)) do echo ${formats[i]} ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE nested_table" - cat $CUR_DIR/data_orc_arrow_parquet_nested/nested_table.${format_files[i]} | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nested_table FORMAT ${formats[i]} SETTINGS input_format_${format_files[i]}_import_nested = 1" + cat $CUR_DIR/data_orc_arrow_parquet_nested/nested_table.${format_files[i]} | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nested_table SETTINGS input_format_${format_files[i]}_import_nested = 1 FORMAT ${formats[i]}" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM nested_table" ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE nested_nested_table" - cat $CUR_DIR/data_orc_arrow_parquet_nested/nested_nested_table.${format_files[i]} | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nested_nested_table FORMAT ${formats[i]} SETTINGS input_format_${format_files[i]}_import_nested = 1" - + cat $CUR_DIR/data_orc_arrow_parquet_nested/nested_nested_table.${format_files[i]} | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nested_nested_table SETTINGS input_format_${format_files[i]}_import_nested = 1 FORMAT ${formats[i]}" + ${CLICKHOUSE_CLIENT} --query="SELECT * FROM nested_nested_table" - - done ${CLICKHOUSE_CLIENT} --query="DROP TABLE nested_table" diff --git a/tests/queries/0_stateless/00938_template_input_format.sh b/tests/queries/0_stateless/00938_template_input_format.sh index 9218f4bebca..e99f59614da 100755 --- a/tests/queries/0_stateless/00938_template_input_format.sh +++ b/tests/queries/0_stateless/00938_template_input_format.sh @@ -22,10 +22,11 @@ cv bn m\", d: 2016-01-01 ; n: 456, s1: as\"df\\'gh , s2: '', s3: \"zx\\ncv\\tbn m\", s4: \"qwe,rty\", d: 2016-01-02 ; n: 9876543210, s1: , s2: 'zx\\ncv\\tbn m', s3: \"qwe,rty\", s4: \"as\"\"df'gh\", d: 2016-01-03 ; n: 789, s1: zx\\ncv\\tbn m , s2: 'qwe,rty', s3: \"as\\\"df'gh\", s4: \"\", d: 2016-01-04"$'\t'" - $ suffix $" | $CLICKHOUSE_CLIENT --query="INSERT INTO template1 FORMAT Template SETTINGS \ + $ suffix $" | $CLICKHOUSE_CLIENT --query="INSERT INTO template1 SETTINGS \ format_template_resultset = '$CURDIR/00938_template_input_format_resultset.tmp', \ format_template_row = '$CURDIR/00938_template_input_format_row.tmp', \ -format_template_rows_between_delimiter = ';\n'"; +format_template_rows_between_delimiter = ';\n' \ +FORMAT Template"; $CLICKHOUSE_CLIENT --query="SELECT * FROM template1 ORDER BY n FORMAT CSV"; @@ -33,10 +34,11 @@ echo "==== parse json (sophisticated template) ====" echo -ne '{${:}"meta"${:}:${:}[${:}{${:}"name"${:}:${:}"s1"${:},${:}"type"${:}:${:}"String"${:}}${:},${:}{${:}"name"${:}:${:}"s2"${:},${:}"type"${:}:${:}"String"${:}}${:},${:}{${:}"name"${:}:${:}"s3"${:},${:}"type"${:}:${:}"String"${:}}${:},${:}{${:}"name"${:}:${:}"s4"${:},${:}"type"${:}:${:}"String"${:}}${:},${:}{${:}"name"${:}:${:}"n"${:},${:}"type"${:}:${:}"UInt64"${:}}${:},${:}{${:}"name"${:}:${:}"d"${:},${:}"type"${:}:${:}"Date"${:}}${:}]${:},${:}"data"${:}:${:}[${data}]${:},${:}"rows"${:}:${:}${:CSV}${:},${:}"statistics"${:}:${:}{${:}"elapsed"${:}:${:}${:CSV}${:},${:}"rows_read"${:}:${:}${:CSV}${:},${:}"bytes_read"${:}:${:}${:CSV}${:}}${:}}' > "$CURDIR"/00938_template_input_format_resultset.tmp echo -ne '{${:}"s1"${:}:${:}${s1:JSON}${:},${:}"s2"${:}:${:}${s2:JSON}${:},${:}"s3"${:}:${:}${s3:JSON}${:},${:}"s4"${:}:${:}${s4:JSON}${:},${:}"n"${:}:${:}${n:JSON}${:},${:}"d"${:}:${:}${d:JSON}${:}${:}}' > "$CURDIR"/00938_template_input_format_row.tmp -$CLICKHOUSE_CLIENT --query="SELECT * FROM template1 ORDER BY n FORMAT JSON" | $CLICKHOUSE_CLIENT --query="INSERT INTO template2 FORMAT TemplateIgnoreSpaces SETTINGS \ +$CLICKHOUSE_CLIENT --query="SELECT * FROM template1 ORDER BY n FORMAT JSON" | $CLICKHOUSE_CLIENT --query="INSERT INTO template2 SETTINGS \ format_template_resultset = '$CURDIR/00938_template_input_format_resultset.tmp', \ format_template_row = '$CURDIR/00938_template_input_format_row.tmp', \ -format_template_rows_between_delimiter = ','"; +format_template_rows_between_delimiter = ',' \ +FORMAT TemplateIgnoreSpaces"; $CLICKHOUSE_CLIENT --query="SELECT * FROM template2 ORDER BY n FORMAT CSV"; $CLICKHOUSE_CLIENT --query="TRUNCATE TABLE template2"; @@ -45,10 +47,11 @@ echo "==== parse json ====" echo -ne '{${:}"meta"${:}:${:JSON},${:}"data"${:}:${:}[${data}]${:},${:}"rows"${:}:${:JSON},${:}"statistics"${:}:${:JSON}${:}}' > "$CURDIR"/00938_template_input_format_resultset.tmp echo -ne '{${:}"s1"${:}:${:}${s3:JSON}${:},${:}"s2"${:}:${:}${:JSON}${:},${:}"s3"${:}:${:}${s1:JSON}${:},${:}"s4"${:}:${:}${:JSON}${:},${:}"n"${:}:${:}${n:JSON}${:},${:}"d"${:}:${:}${d:JSON}${:}${:}}' > "$CURDIR"/00938_template_input_format_row.tmp -$CLICKHOUSE_CLIENT --query="SELECT * FROM template1 ORDER BY n FORMAT JSON" | $CLICKHOUSE_CLIENT --query="INSERT INTO template2 FORMAT TemplateIgnoreSpaces SETTINGS \ +$CLICKHOUSE_CLIENT --query="SELECT * FROM template1 ORDER BY n FORMAT JSON" | $CLICKHOUSE_CLIENT --query="INSERT INTO template2 SETTINGS \ format_template_resultset = '$CURDIR/00938_template_input_format_resultset.tmp', \ format_template_row = '$CURDIR/00938_template_input_format_row.tmp', \ -format_template_rows_between_delimiter = ','"; +format_template_rows_between_delimiter = ',' \ +FORMAT TemplateIgnoreSpaces"; $CLICKHOUSE_CLIENT --query="SELECT * FROM template2 ORDER BY n FORMAT CSV"; @@ -66,10 +69,11 @@ cv bn m\", d: 2016-01-01 ; n: 456, s1: as\"df\\'gh , s2: '', s3: \"zx\\ncv\\tbn m\", s4: \"qwe,rty\", d: 2016-01-02 ; n: 9876543210, s1: , s2: 'zx\\ncv\\tbn m', s3: \"qwe,rty\", s4: \"as\"\"df'gh\", d: 2016-01-03 ; n: 789, s1: zx\cv\bn m , s2: 'qwe,rty', s3: \"as\\\"df'gh\", s4: \"\", d: 2016-01-04"$'\t'" - $ suffix $" | $CLICKHOUSE_CLIENT --query="INSERT INTO template1 FORMAT Template SETTINGS \ + $ suffix $" | $CLICKHOUSE_CLIENT --query="INSERT INTO template1 SETTINGS \ format_template_resultset = '$CURDIR/00938_template_input_format_resultset.tmp', \ format_template_row = '$CURDIR/00938_template_input_format_row.tmp', \ -format_template_rows_between_delimiter = ';\n'"; +format_template_rows_between_delimiter = ';\n' \ +FORMAT Template"; $CLICKHOUSE_CLIENT --query="SELECT * FROM template1 ORDER BY n FORMAT CSV"; diff --git a/tests/queries/0_stateless/01014_format_custom_separated.sh b/tests/queries/0_stateless/01014_format_custom_separated.sh index 42599bcc944..4e88419d125 100755 --- a/tests/queries/0_stateless/01014_format_custom_separated.sh +++ b/tests/queries/0_stateless/01014_format_custom_separated.sh @@ -23,12 +23,13 @@ echo '0, "2019-09-24", "hello" 1, 2019-09-25, "world" 2, "2019-09-26", custom 3, 2019-09-27, separated -end' | $CLICKHOUSE_CLIENT --query="INSERT INTO custom_separated FORMAT CustomSeparated SETTINGS \ +end' | $CLICKHOUSE_CLIENT --query="INSERT INTO custom_separated SETTINGS \ format_custom_escaping_rule = 'CSV', \ format_custom_field_delimiter = ', ', \ format_custom_row_after_delimiter = '\n', \ format_custom_row_between_delimiter = '', \ -format_custom_result_after_delimiter = 'end\n'" +format_custom_result_after_delimiter = 'end\n' +FORMAT CustomSeparated" $CLICKHOUSE_CLIENT --query="SELECT * FROM custom_separated ORDER BY n FORMAT CSV" diff --git a/tests/queries/0_stateless/01085_regexp_input_format.sh b/tests/queries/0_stateless/01085_regexp_input_format.sh index 5736d031c08..217a2fbe8b7 100755 --- a/tests/queries/0_stateless/01085_regexp_input_format.sh +++ b/tests/queries/0_stateless/01085_regexp_input_format.sh @@ -9,19 +9,19 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE regexp (id UInt32, array Array(UInt32), echo 'id: 1 array: [1,2,3] string: str1 date: 2020-01-01 id: 2 array: [1,2,3] string: str2 date: 2020-01-02 -id: 3 array: [1,2,3] string: str3 date: 2020-01-03' | $CLICKHOUSE_CLIENT --query="INSERT INTO regexp FORMAT Regexp SETTINGS format_regexp='id: (.+?) array: (.+?) string: (.+?) date: (.+?)', format_regexp_escaping_rule='Escaped'"; +id: 3 array: [1,2,3] string: str3 date: 2020-01-03' | $CLICKHOUSE_CLIENT --query="INSERT INTO regexp SETTINGS format_regexp='id: (.+?) array: (.+?) string: (.+?) date: (.+?)', format_regexp_escaping_rule='Escaped' FORMAT Regexp "; echo 'id: 4 array: "[1,2,3]" string: "str4" date: "2020-01-04" id: 5 array: "[1,2,3]" string: "str5" date: "2020-01-05" -id: 6 array: "[1,2,3]" string: "str6" date: "2020-01-06"' | $CLICKHOUSE_CLIENT --query="INSERT INTO regexp FORMAT Regexp SETTINGS format_regexp='id: (.+?) array: (.+?) string: (.+?) date: (.+?)', format_regexp_escaping_rule='CSV'"; +id: 6 array: "[1,2,3]" string: "str6" date: "2020-01-06"' | $CLICKHOUSE_CLIENT --query="INSERT INTO regexp SETTINGS format_regexp='id: (.+?) array: (.+?) string: (.+?) date: (.+?)', format_regexp_escaping_rule='CSV' FORMAT Regexp"; echo "id: 7 array: [1,2,3] string: 'str7' date: '2020-01-07' id: 8 array: [1,2,3] string: 'str8' date: '2020-01-08' -id: 9 array: [1,2,3] string: 'str9' date: '2020-01-09'" | $CLICKHOUSE_CLIENT --query="INSERT INTO regexp FORMAT Regexp SETTINGS format_regexp='id: (.+?) array: (.+?) string: (.+?) date: (.+?)', format_regexp_escaping_rule='Quoted'"; +id: 9 array: [1,2,3] string: 'str9' date: '2020-01-09'" | $CLICKHOUSE_CLIENT --query="INSERT INTO regexp SETTINGS format_regexp='id: (.+?) array: (.+?) string: (.+?) date: (.+?)', format_regexp_escaping_rule='Quoted' FORMAT Regexp"; echo 'id: 10 array: [1,2,3] string: "str10" date: "2020-01-10" id: 11 array: [1,2,3] string: "str11" date: "2020-01-11" -id: 12 array: [1,2,3] string: "str12" date: "2020-01-12"' | $CLICKHOUSE_CLIENT --query="INSERT INTO regexp FORMAT Regexp SETTINGS format_regexp='id: (.+?) array: (.+?) string: (.+?) date: (.+?)', format_regexp_escaping_rule='JSON'"; +id: 12 array: [1,2,3] string: "str12" date: "2020-01-12"' | $CLICKHOUSE_CLIENT --query="INSERT INTO regexp SETTINGS format_regexp='id: (.+?) array: (.+?) string: (.+?) date: (.+?)', format_regexp_escaping_rule='JSON' FORMAT Regexp"; $CLICKHOUSE_CLIENT --query="SELECT * FROM regexp ORDER BY id"; $CLICKHOUSE_CLIENT --query="DROP TABLE regexp"; diff --git a/tests/queries/0_stateless/01086_regexp_input_format_skip_unmatched.sh b/tests/queries/0_stateless/01086_regexp_input_format_skip_unmatched.sh index c96aed7d3ee..8db27891006 100755 --- a/tests/queries/0_stateless/01086_regexp_input_format_skip_unmatched.sh +++ b/tests/queries/0_stateless/01086_regexp_input_format_skip_unmatched.sh @@ -10,7 +10,7 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE regexp (id UInt32, string String) ENGIN echo 'id: 1 string: str1 id: 2 string: str2 id=3, string=str3 -id: 4 string: str4' | $CLICKHOUSE_CLIENT --query="INSERT INTO regexp FORMAT Regexp SETTINGS format_regexp='id: (.+?) string: (.+?)', format_regexp_escaping_rule='Escaped', format_regexp_skip_unmatched=1"; +id: 4 string: str4' | $CLICKHOUSE_CLIENT --query="INSERT INTO regexp SETTINGS format_regexp='id: (.+?) string: (.+?)', format_regexp_escaping_rule='Escaped', format_regexp_skip_unmatched=1 FORMAT Regexp"; $CLICKHOUSE_CLIENT --query="SELECT * FROM regexp"; $CLICKHOUSE_CLIENT --query="DROP TABLE regexp"; diff --git a/tests/queries/0_stateless/01167_isolation_hermitage.reference b/tests/queries/0_stateless/01167_isolation_hermitage.reference new file mode 100644 index 00000000000..4488809f3ed --- /dev/null +++ b/tests/queries/0_stateless/01167_isolation_hermitage.reference @@ -0,0 +1,59 @@ +Serialization error +INVALID_TRANSACTION +INVALID_TRANSACTION +1 1 11 +1 2 21 +tx4 2 1 10 +tx4 2 2 20 +tx4 3 1 10 +tx4 3 2 20 +4 1 10 +4 2 20 +tx6 5 1 10 +tx6 5 2 20 +tx6 6 1 10 +tx6 6 2 20 +7 1 11 +7 2 20 +Serialization error +tx7 8 1 11 +tx7 8 2 20 +INVALID_TRANSACTION +INVALID_TRANSACTION +10 1 11 +10 2 20 +Serialization error +tx11 11 1 10 +tx11 11 2 20 +INVALID_TRANSACTION +tx11 12 1 10 +tx11 12 2 20 +INVALID_TRANSACTION +13 1 11 +13 2 19 +16 1 10 +16 2 20 +16 3 30 +Serialization error +INVALID_TRANSACTION +INVALID_TRANSACTION +18 1 20 +18 2 30 +tx16 19 1 10 +tx16 19 2 20 +tx17 20 1 10 +tx17 20 2 20 +Serialization error +INVALID_TRANSACTION +21 1 11 +21 2 20 +tx18 22 1 10 +tx19 23 1 10 +tx19 24 2 20 +tx18 25 2 20 +26 1 12 +26 2 18 +29 1 10 +29 2 20 +29 3 30 +29 4 42 diff --git a/tests/queries/0_stateless/01167_isolation_hermitage.sh b/tests/queries/0_stateless/01167_isolation_hermitage.sh new file mode 100755 index 00000000000..7f495801dd0 --- /dev/null +++ b/tests/queries/0_stateless/01167_isolation_hermitage.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash +# Tags: long, no-fasttest, no-replicated-database +# Looks like server does not listen https port in fasttest +# FIXME Replicated database executes ALTERs in separate context, so transaction info is lost + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./transactions.lib +. "$CURDIR"/transactions.lib +set -e + +# https://github.com/ept/hermitage + +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT -q "create table test (id int, value int) engine=MergeTree order by id" + +function reset_table() +{ + $CLICKHOUSE_CLIENT -q "truncate table test;" + $CLICKHOUSE_CLIENT -q "insert into test (id, value) values (1, 10);" + $CLICKHOUSE_CLIENT -q "insert into test (id, value) values (2, 20);" +} + +# TODO update test after implementing Read Committed + +# G0 +reset_table +tx 1 "begin transaction" +tx 2 "begin transaction" +tx 1 "alter table test update value=11 where id=1" +tx 2 "alter table test update value=12 where id=1" | grep -Eo "Serialization error" | uniq +tx 1 "alter table test update value=21 where id=2" +tx 1 "commit" +tx 2 "alter table test update value=22 where id=2" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 2 "commit" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 2 "rollback" +$CLICKHOUSE_CLIENT -q "select 1, * from test order by id" + +# G1a +reset_table +tx_async 3 "begin transaction" +tx_async 4 "begin transaction" +tx_async 3 "alter table test update value=101 where id=1" +tx_async 4 "select 2, * from test order by id" +tx_async 3 "alter table test update value=11 where id=1" +tx_async 3 "rollback" +tx_async 4 "select 3, * from test order by id" +tx_async 4 "commit" +tx_wait 3 +tx_wait 4 +$CLICKHOUSE_CLIENT -q "select 4, * from test order by id" + +# G1b +reset_table +tx_async 5 "begin transaction" +tx_async 6 "begin transaction" +tx_async 5 "alter table test update value=101 where id=1" +tx_async 6 "select 5, * from test order by id" +tx_async 5 "alter table test update value=11 where id=1" +tx_async 5 "commit" +tx_async 6 "select 6, * from test order by id" +tx_async 6 "commit" +tx_wait 5 +tx_wait 6 +$CLICKHOUSE_CLIENT -q "select 7, * from test order by id" + +# G1c +# NOTE both transactions will succeed if we implement skipping of unaffected partitions/parts +reset_table +tx 7 "begin transaction" +tx 8 "begin transaction" +tx 7 "alter table test update value=11 where id=1" +tx 8 "alter table test update value=22 where id=2" | grep -Eo "Serialization error" | uniq +tx 7 "select 8, * from test order by id" +tx 8 "select 9, * from test order by id" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 7 "commit" +tx 8 "commit" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 8 "rollback" +$CLICKHOUSE_CLIENT -q "select 10, * from test order by id" + +# OTV +reset_table +tx 9 "begin transaction" +tx 10 "begin transaction" +tx 11 "begin transaction" +tx 9 "alter table test update value = 11 where id = 1" +tx 9 "alter table test update value = 19 where id = 2" +tx 10 "alter table test update value = 12 where id = 1" | grep -Eo "Serialization error" | uniq +tx 9 "commit" +tx 11 "select 11, * from test order by id" +tx 10 "alter table test update value = 18 where id = 2" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 11 "select 12, * from test order by id" +tx 10 "commit" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 10 "rollback" +tx 11 "commit" +$CLICKHOUSE_CLIENT -q "select 13, * from test order by id" + +# PMP +reset_table +tx_async 12 "begin transaction" +tx_async 13 "begin transaction" +tx_async 12 "select 14, * from test where value = 30" +tx_async 13 "insert into test (id, value) values (3, 30)" +tx_async 13 "commit" +tx_async 12 "select 15, * from test where value = 30" +tx_async 12 "commit" +tx_wait 12 +tx_wait 13 +$CLICKHOUSE_CLIENT -q "select 16, * from test order by id" + +# PMP write +reset_table +tx 14 "begin transaction" +tx 15 "begin transaction" +tx 14 "alter table test update value = value + 10 where 1" +tx 15 "alter table test delete where value = 20" | grep -Eo "Serialization error" | uniq +tx 14 "commit" +tx 15 "select 17, * from test order by id" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 15 "commit" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 15 "rollback" +$CLICKHOUSE_CLIENT -q "select 18, * from test order by id" + +# P4 +reset_table +tx 16 "begin transaction" +tx 17 "begin transaction" +tx 16 "select 19, * from test order by id" +tx 17 "select 20, * from test order by id" +tx 16 "alter table test update value = 11 where id = 1" +tx 17 "alter table test update value = 11 where id = 1" | grep -Eo "Serialization error" | uniq +tx 16 "commit" +tx 17 "commit" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 17 "rollback" +$CLICKHOUSE_CLIENT -q "select 21, * from test order by id" + +# G-single +reset_table +tx_async 18 "begin transaction" +tx_async 19 "begin transaction" +tx_sync 18 "select 22, * from test where id = 1" +tx_async 19 "select 23, * from test where id = 1" +tx_async 19 "select 24, * from test where id = 2" +tx_async 19 "alter table test update value = 12 where id = 1" +tx_async 19 "alter table test update value = 18 where id = 2" +tx_async 19 "commit" +tx_async 18 "select 25, * from test where id = 2" +tx_async 18 "commit" +tx_wait 18 +tx_wait 19 +$CLICKHOUSE_CLIENT -q "select 26, * from test order by id" + +# G2 +reset_table +tx_async 20 "begin transaction" +tx_async 21 "begin transaction" +tx_sync 20 "select 27, * from test where value % 3 = 0" +tx_async 21 "select 28, * from test where value % 3 = 0" +tx_async 20 "insert into test (id, value) values (3, 30)" +tx_async 21 "insert into test (id, value) values (4, 42)" +tx_async 20 "commit" +tx_async 21 "commit" +tx_wait 20 +tx_wait 21 +$CLICKHOUSE_CLIENT -q "select 29, * from test order by id" + diff --git a/tests/queries/0_stateless/01168_mutations_isolation.reference b/tests/queries/0_stateless/01168_mutations_isolation.reference new file mode 100644 index 00000000000..1b3e3f145b1 --- /dev/null +++ b/tests/queries/0_stateless/01168_mutations_isolation.reference @@ -0,0 +1,38 @@ +tx2 1 10 all_1_1_0_4 +tx2 1 30 all_3_3_0_4 +tx1 2 1 all_1_1_0 +tx1 2 2 all_2_2_0 +Serialization error +INVALID_TRANSACTION +tx3 3 1 all_1_1_0 +Serialization error +INVALID_TRANSACTION +INVALID_TRANSACTION +tx5 4 2 all_1_1_0_8 +tx5 4 5 all_10_10_0 +tx5 4 6 all_7_7_0_8 +tx5 5 2 all_1_1_0_8 +tx5 5 5 all_10_10_0 +tx5 5 6 all_7_7_0_8 +SERIALIZATION_ERROR +tx6 6 2 all_1_1_0_11 +tx6 6 6 all_7_7_0_11 +tx7 7 20 all_1_1_0_13 +tx7 7 40 all_14_14_0 +tx7 7 60 all_7_7_0_13 +tx7 7 80 all_12_12_0_13 +tx7 8 20 all_1_14_1_13 +tx7 8 40 all_1_14_1_13 +tx7 8 60 all_1_14_1_13 +tx7 8 80 all_1_14_1_13 +Serialization error +INVALID_TRANSACTION +tx11 9 21 all_1_14_1_17 +tx11 9 41 all_1_14_1_17 +tx11 9 61 all_1_14_1_17 +tx11 9 81 all_1_14_1_17 +1 1 RUNNING +tx14 10 22 all_1_14_1_18 +tx14 10 42 all_1_14_1_18 +tx14 10 62 all_1_14_1_18 +tx14 10 82 all_1_14_1_18 diff --git a/tests/queries/0_stateless/01168_mutations_isolation.sh b/tests/queries/0_stateless/01168_mutations_isolation.sh new file mode 100755 index 00000000000..888858edf32 --- /dev/null +++ b/tests/queries/0_stateless/01168_mutations_isolation.sh @@ -0,0 +1,93 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-replicated-database +# Looks like server does not listen https port in fasttest +# FIXME Replicated database executes ALTERs in separate context, so transaction info is lost + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./transactions.lib +. "$CURDIR"/transactions.lib + +$CLICKHOUSE_CLIENT -q "drop table if exists mt" +$CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by tuple()" + +$CLICKHOUSE_CLIENT -q "insert into mt values (1)" + +tx 1 "begin transaction" +tx 2 "begin transaction" +tx 1 "insert into mt values (2)" +tx 2 "insert into mt values (3)" +tx 2 "alter table mt update n=n*10 where 1" +tx 2 "select 1, n, _part from mt order by n" +tx 1 "select 2, n, _part from mt order by n" +tx 1 "alter table mt update n=n+1 where 1" | grep -Eo "Serialization error" | uniq +tx 1 "commit" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 2 "rollback" + + +tx 3 "begin transaction" +tx 3 "select 3, n, _part from mt order by n" +tx 4 "begin transaction" +tx 3 "insert into mt values (2)" +tx 4 "insert into mt values (3)" +tx 4 "alter table mt update n=n*2 where 1" +tx 3 "alter table mt update n=n+42 where 1" | grep -Eo "Serialization error" | uniq +tx 3 "insert into mt values (4)" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 4 "insert into mt values (5)" +tx 3 "commit" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 4 "commit" + + +tx 5 "begin transaction" +tx 5 "select 4, n, _part from mt order by n" +tx 6 "begin transaction" +tx 6 "alter table mt delete where n%2=1" +tx 6 "alter table mt drop part 'all_10_10_0_11'" +tx 5 "select 5, n, _part from mt order by n" +tx 5 "alter table mt drop partition id 'all'" | grep -Eo "SERIALIZATION_ERROR" | uniq +tx 6 "select 6, n, _part from mt order by n" +tx 5 "rollback" +tx 6 "insert into mt values (8)" +tx 6 "alter table mt update n=n*10 where 1" +tx 6 "insert into mt values (40)" +tx 6 "commit" + + +tx 7 "begin transaction" +tx 7 "select 7, n, _part from mt order by n" +tx 8 "begin transaction" +tx_async 8 "alter table mt update n = 0 where 1" >/dev/null +$CLICKHOUSE_CLIENT -q "kill mutation where database=currentDatabase() and mutation_id='mutation_15.txt' format Null" 2>&1| grep -Fv "probably it finished" +tx_sync 8 "rollback" +tx 7 "optimize table mt final" +tx 7 "select 8, n, _part from mt order by n" +tx 10 "begin transaction" +tx 10 "alter table mt update n = 0 where 1" | grep -Eo "Serialization error" | uniq +tx 7 "alter table mt update n=n+1 where 1" +tx 10 "commit" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 10 "rollback" +tx 7 "commit" + + +tx_async 11 "begin transaction" +tx_async 11 "select 9, n, _part from mt order by n" +tx_async 12 "begin transaction" +tx_async 11 "alter table mt update n=n+1 where 1" >/dev/null +tx_async 12 "alter table mt update n=n+1 where 1" >/dev/null +tx_async 11 "commit" >/dev/null +tx_async 12 "commit" >/dev/null +tx_wait 11 +tx_wait 12 + +tx 13 "begin transaction" +tid_to_kill=$(tx 13 "select transactionID()" | grep -Po "\(.*") +$CLICKHOUSE_CLIENT -q "select count(), any(is_readonly), any(state) from system.transactions where tid=$tid_to_kill" +tx_async 13 "alter table mt update n = 0 where 1" >/dev/null +$CLICKHOUSE_CLIENT -q "kill transaction where tid=$tid_to_kill format Null" +tx_sync 13 "rollback" + +tx 14 "begin transaction" +tx 14 "select 10, n, _part from mt order by n" + +$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=0 -q "drop table mt" diff --git a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.reference b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.reference new file mode 100644 index 00000000000..12b941eab50 --- /dev/null +++ b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.reference @@ -0,0 +1,8 @@ +1 1 +2 1 +3 1 +4 1 +1 +10 100 +1 1 1 +2 1 1 diff --git a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh new file mode 100755 index 00000000000..ab348fd31fb --- /dev/null +++ b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh @@ -0,0 +1,133 @@ +#!/usr/bin/env bash +# Tags: long, no-replicated-database + +# shellcheck disable=SC2015 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS src"; +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS dst"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE src (n UInt64, type UInt8) ENGINE=MergeTree ORDER BY type SETTINGS old_parts_lifetime=0"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE dst (n UInt64, type UInt8) ENGINE=MergeTree ORDER BY type SETTINGS old_parts_lifetime=0"; + +function thread_insert() +{ + set -e + trap "exit 0" INT + val=1 + while true; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* ($val, 1) */ ($val, 1); + INSERT INTO src VALUES /* ($val, 2) */ ($val, 2); + COMMIT;" + val=$((val+1)) + sleep 0.$RANDOM; + done +} + + +# NOTE +# ALTER PARTITION query stops merges, +# but serialization error is still possible if some merge was assigned (and committed) between BEGIN and ALTER. +function thread_partition_src_to_dst() +{ + set -e + count=0 + sum=0 + for i in {1..20}; do + out=$( + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* ($i, 3) */ ($i, 3); + INSERT INTO dst SELECT * FROM src; + ALTER TABLE src DROP PARTITION ID 'all'; + SET throw_on_unsupported_query_inside_transaction=0; + SELECT throwIf((SELECT (count(), sum(n)) FROM merge(currentDatabase(), '') WHERE type=3) != ($count + 1, $sum + $i)) FORMAT Null; + COMMIT;" 2>&1) ||: + + echo "$out" | grep -Fv "SERIALIZATION_ERROR" | grep -F "Received from " && $CLICKHOUSE_CLIENT --multiquery --query " + begin transaction; + set transaction snapshot 3; + select $i, 'src', type, n, _part from src order by type, n; + select $i, 'dst', type, n, _part from dst order by type, n; + rollback" ||: + echo "$out" | grep -Fa "SERIALIZATION_ERROR" >/dev/null || count=$((count+1)) + echo "$out" | grep -Fa "SERIALIZATION_ERROR" >/dev/null || sum=$((sum+i)) + done +} + +function thread_partition_dst_to_src() +{ + set -e + for i in {1..20}; do + action="ROLLBACK" + if (( i % 2 )); then + action="COMMIT" + fi + $CLICKHOUSE_CLIENT --multiquery --query " + SYSTEM STOP MERGES dst; + ALTER TABLE dst DROP PARTITION ID 'nonexistent'; -- STOP MERGES doesn't wait for started merges to finish, so we use this trick + BEGIN TRANSACTION; + INSERT INTO dst VALUES /* ($i, 4) */ ($i, 4); + INSERT INTO src SELECT * FROM dst; + ALTER TABLE dst DROP PARTITION ID 'all'; + SET throw_on_unsupported_query_inside_transaction=0; + SYSTEM START MERGES dst; + SELECT throwIf((SELECT (count(), sum(n)) FROM merge(currentDatabase(), '') WHERE type=4) != (toUInt8($i/2 + 1), (select sum(number) from numbers(1, $i) where number % 2 or number=$i))) FORMAT Null; + $action;" || $CLICKHOUSE_CLIENT --multiquery --query " + begin transaction; + set transaction snapshot 3; + select $i, 'src', type, n, _part from src order by type, n; + select $i, 'dst', type, n, _part from dst order by type, n; + rollback" ||: + done +} + +function thread_select() +{ + set -e + trap "exit 0" INT + while true; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + -- no duplicates + SELECT type, throwIf(count(n) != countDistinct(n)) FROM src GROUP BY type FORMAT Null; + SELECT type, throwIf(count(n) != countDistinct(n)) FROM dst GROUP BY type FORMAT Null; + -- rows inserted by thread_insert moved together + SET throw_on_unsupported_query_inside_transaction=0; + SELECT _table, throwIf(arraySort(groupArrayIf(n, type=1)) != arraySort(groupArrayIf(n, type=2))) FROM merge(currentDatabase(), '') GROUP BY _table FORMAT Null; + -- all rows are inserted in insert_thread + SELECT type, throwIf(count(n) != max(n)), throwIf(sum(n) != max(n)*(max(n)+1)/2) FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type FORMAT Null; + COMMIT;" || $CLICKHOUSE_CLIENT --multiquery --query " + begin transaction; + set transaction snapshot 3; + select $i, 'src', type, n, _part from src order by type, n; + select $i, 'dst', type, n, _part from dst order by type, n; + rollback" ||: + done +} + +thread_insert & PID_1=$! +thread_select & PID_2=$! + +thread_partition_src_to_dst & PID_3=$! +thread_partition_dst_to_src & PID_4=$! +wait $PID_3 && wait $PID_4 + +kill -INT $PID_1 +kill -INT $PID_2 +wait + +$CLICKHOUSE_CLIENT -q "SELECT type, count(n) = countDistinct(n) FROM merge(currentDatabase(), '') GROUP BY type ORDER BY type" +$CLICKHOUSE_CLIENT -q "SELECT DISTINCT arraySort(groupArrayIf(n, type=1)) = arraySort(groupArrayIf(n, type=2)) FROM merge(currentDatabase(), '') GROUP BY _table ORDER BY _table" +$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM merge(currentDatabase(), '') WHERE type=4" +$CLICKHOUSE_CLIENT -q "SELECT type, count(n) == max(n), sum(n) == max(n)*(max(n)+1)/2 FROM merge(currentDatabase(), '') WHERE type IN (1, 2) GROUP BY type ORDER BY type" + + +$CLICKHOUSE_CLIENT --query "DROP TABLE src"; +$CLICKHOUSE_CLIENT --query "DROP TABLE dst"; diff --git a/tests/queries/0_stateless/01170_alter_partition_isolation.reference b/tests/queries/0_stateless/01170_alter_partition_isolation.reference new file mode 100644 index 00000000000..f384fc748d4 --- /dev/null +++ b/tests/queries/0_stateless/01170_alter_partition_isolation.reference @@ -0,0 +1,30 @@ +tx1 1 1 +tx1 2 3 +tx2 3 2 +tx2 3 4 +tx1 4 3 + +5 3 +5 5 + +tx4 6 3 +tx4 6 5 +tx4 6 6 +tx4 7 8 +tx3 8 3 +tx3 8 5 +tx3 8 7 +tx3 8 9 +SERIALIZATION_ERROR +INVALID_TRANSACTION +tx4 9 8 + +10 8 + +11 8 +11 11 +11 12 +12 8 +12 8 +12 11 +12 12 diff --git a/tests/queries/0_stateless/01170_alter_partition_isolation.sh b/tests/queries/0_stateless/01170_alter_partition_isolation.sh new file mode 100755 index 00000000000..2db178fb6d1 --- /dev/null +++ b/tests/queries/0_stateless/01170_alter_partition_isolation.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-replicated-database +# Looks like server does not listen https port in fasttest +# FIXME Replicated database executes ALTERs in separate context, so transaction info is lost + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./transactions.lib +. "$CURDIR"/transactions.lib + +$CLICKHOUSE_CLIENT -q "drop table if exists mt" +$CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by n" + +tx 1 "begin transaction" +tx 1 "insert into mt values (1)" +tx 2 "begin transaction" +tx 2 "insert into mt values (2)" +tx 1 "select 1, n from mt order by n" +tx 1 "alter table mt drop partition id 'all'" +tx 2 "insert into mt values (4)" +tx 1 "insert into mt values (3)" +tx 1 "select 2, n from mt order by n" +tx 2 "select 3, n from mt order by n" +tx 2 "alter table mt drop partition id 'all'" +tx 2 "insert into mt values (5)" +tx 1 "select 4, n from mt order by n" +tx 2 "commit" +tx 1 "commit" + +echo '' +$CLICKHOUSE_CLIENT -q "select 5, n from mt order by n" +echo '' + +tx 4 "begin transaction" +tx 4 "insert into mt values (6)" +tx 3 "begin transaction" +tx 3 "insert into mt values (7)" +tx 4 "select 6, n from mt order by n" +tx 4 "alter table mt drop partition id 'all'" +tx 3 "insert into mt values (9)" +tx 4 "insert into mt values (8)" +tx 4 "select 7, n from mt order by n" +tx 3 "select 8, n from mt order by n" +tx 3 "alter table mt drop partition id 'all'" | grep -Eo "SERIALIZATION_ERROR" | uniq +tx 3 "insert into mt values (10)" | grep -Eo "INVALID_TRANSACTION" | uniq +tx 4 "select 9, n from mt order by n" +tx 3 "rollback" +tx 4 "commit" + +echo '' +$CLICKHOUSE_CLIENT -q "select 10, n from mt order by n" +echo '' + +$CLICKHOUSE_CLIENT -q "drop table if exists another_mt" +$CLICKHOUSE_CLIENT -q "create table another_mt (n int) engine=MergeTree order by n" + +tx 5 "begin transaction" +tx 5 "insert into another_mt values (11)" +tx 6 "begin transaction" +tx 6 "insert into mt values (12)" +tx 6 "insert into another_mt values (13)" +tx 5 "alter table another_mt move partition id 'all' to table mt" +tx 6 "alter table another_mt replace partition id 'all' from mt" +tx 5 "alter table another_mt attach partition id 'all' from mt" +tx 5 "commit" +tx 6 "commit" + +$CLICKHOUSE_CLIENT -q "select 11, n from mt order by n" +$CLICKHOUSE_CLIENT -q "select 12, n from another_mt order by n" + +$CLICKHOUSE_CLIENT -q "drop table another_mt" +$CLICKHOUSE_CLIENT -q "drop table mt" diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.reference b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.reference new file mode 100644 index 00000000000..d8bb9e310e6 --- /dev/null +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.reference @@ -0,0 +1,4 @@ +275 0 138 136 0 +275 0 +275 0 138 136 0 +275 0 diff --git a/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh new file mode 100755 index 00000000000..3de63615bc4 --- /dev/null +++ b/tests/queries/0_stateless/01171_mv_select_insert_isolation_long.sh @@ -0,0 +1,156 @@ +#!/usr/bin/env bash +# Tags: long, no-parallel +# Test is too heavy, avoid parallel run in Flaky Check + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS src"; +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS dst"; +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS mv"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE src (n Int8, m Int8, CONSTRAINT c CHECK xxHash32(n+m) % 8 != 0) ENGINE=MergeTree ORDER BY n PARTITION BY 0 < n SETTINGS old_parts_lifetime=0"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE dst (nm Int16, CONSTRAINT c CHECK xxHash32(nm) % 8 != 0) ENGINE=MergeTree ORDER BY nm SETTINGS old_parts_lifetime=0"; +$CLICKHOUSE_CLIENT --query "CREATE MATERIALIZED VIEW mv TO dst (nm Int16) AS SELECT n*m AS nm FROM src"; + +$CLICKHOUSE_CLIENT --query "CREATE TABLE tmp (x UInt8, nm Int16) ENGINE=MergeTree ORDER BY (x, nm) SETTINGS old_parts_lifetime=0" + +$CLICKHOUSE_CLIENT --query "INSERT INTO src VALUES (0, 0)" + +# some transactions will fail due to constraint +function thread_insert_commit() +{ + set -e + for i in {1..100}; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* ($i, $1) */ ($i, $1); + SELECT throwIf((SELECT sum(nm) FROM mv) != $(($i * $1))) FORMAT Null; + INSERT INTO src VALUES /* (-$i, $1) */ (-$i, $1); + COMMIT;" 2>&1| grep -Fv "is violated at row" | grep -Fv "Transaction is not in RUNNING state" | grep -F "Received from " ||: + done +} + +function thread_insert_rollback() +{ + set -e + for _ in {1..100}; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO src VALUES /* (42, $1) */ (42, $1); + SELECT throwIf((SELECT count() FROM src WHERE n=42 AND m=$1) != 1) FORMAT Null; + ROLLBACK;" + done +} + +# make merges more aggressive +function thread_optimize() +{ + set -e + trap "exit 0" INT + while true; do + optimize_query="OPTIMIZE TABLE src" + partition_id=$(( RANDOM % 2 )) + if (( RANDOM % 2 )); then + optimize_query="OPTIMIZE TABLE dst" + partition_id="all" + fi + if (( RANDOM % 2 )); then + optimize_query="$optimize_query PARTITION ID '$partition_id'" + fi + if (( RANDOM % 2 )); then + optimize_query="$optimize_query FINAL" + fi + action="COMMIT" + if (( RANDOM % 4 )); then + action="ROLLBACK" + fi + + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + $optimize_query; + $action; + " 2>&1| grep -Fv "already exists, but it will be deleted soon" | grep -F "Received from " ||: + sleep 0.$RANDOM; + done +} + +function thread_select() +{ + set -e + trap "exit 0" INT + while true; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + SELECT throwIf((SELECT (sum(n), count() % 2) FROM src) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT (sum(nm), count() % 2) FROM mv) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT (sum(nm), count() % 2) FROM dst) != (0, 1)) FORMAT Null; + SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(nm)) FROM dst)) FORMAT Null; + SELECT throwIf((SELECT arraySort(groupArray(nm)) FROM mv) != (SELECT arraySort(groupArray(n*m)) FROM src)) FORMAT Null; + COMMIT;" || $CLICKHOUSE_CLIENT --multiquery --query " + begin transaction; + set transaction snapshot 3; + select 'src', n, m, _part from src order by n, m; + select 'dst', nm, _part from dst order by nm; + rollback" ||: + done +} + +function thread_select_insert() +{ + set -e + trap "exit 0" INT + while true; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + SELECT throwIf((SELECT count() FROM tmp) != 0) FORMAT Null; + INSERT INTO tmp SELECT 1, n*m FROM src; + INSERT INTO tmp SELECT 2, nm FROM mv; + INSERT INTO tmp SELECT 3, nm FROM dst; + INSERT INTO tmp SELECT 4, (*,).1 FROM (SELECT n*m FROM src UNION ALL SELECT nm FROM mv UNION ALL SELECT nm FROM dst); + SELECT throwIf((SELECT countDistinct(x) FROM tmp) != 4) FORMAT Null; + + -- now check that all results are the same + SELECT throwIf(1 != (SELECT countDistinct(arr) FROM (SELECT x, arraySort(groupArray(nm)) AS arr FROM tmp WHERE x!=4 GROUP BY x))) FORMAT Null; + SELECT throwIf((SELECT count(), sum(nm) FROM tmp WHERE x=4) != (SELECT count(), sum(nm) FROM tmp WHERE x!=4)) FORMAT Null; + ROLLBACK;" || $CLICKHOUSE_CLIENT --multiquery --query " + begin transaction; + set transaction snapshot 3; + select 'src', n, m, _part from src order by n, m; + select 'dst', nm, _part from dst order by nm; + rollback" ||: + done +} + +thread_insert_commit 1 & PID_1=$! +thread_insert_commit 2 & PID_2=$! +thread_insert_rollback 3 & PID_3=$! + +thread_optimize & PID_4=$! +thread_select & PID_5=$! +thread_select_insert & PID_6=$! +sleep 0.$RANDOM; +thread_select & PID_7=$! +thread_select_insert & PID_8=$! + +wait $PID_1 && wait $PID_2 && wait $PID_3 +kill -INT $PID_4 +kill -INT $PID_5 +kill -INT $PID_6 +kill -INT $PID_7 +kill -INT $PID_8 +wait + +$CLICKHOUSE_CLIENT --multiquery --query " +BEGIN TRANSACTION; +SELECT count(), sum(n), sum(m=1), sum(m=2), sum(m=3) FROM src; +SELECT count(), sum(nm) FROM mv"; + +$CLICKHOUSE_CLIENT --query "SELECT count(), sum(n), sum(m=1), sum(m=2), sum(m=3) FROM src" +$CLICKHOUSE_CLIENT --query "SELECT count(), sum(nm) FROM mv" + +$CLICKHOUSE_CLIENT --query "DROP TABLE src"; +$CLICKHOUSE_CLIENT --query "DROP TABLE dst"; +$CLICKHOUSE_CLIENT --query "DROP TABLE mv"; diff --git a/tests/queries/0_stateless/01172_transaction_counters.reference b/tests/queries/0_stateless/01172_transaction_counters.reference new file mode 100644 index 00000000000..1aabf8a2a38 --- /dev/null +++ b/tests/queries/0_stateless/01172_transaction_counters.reference @@ -0,0 +1,40 @@ +(0,0,'00000000-0000-0000-0000-000000000000') +1 all_1_1_0 0 +1 all_2_2_0 1 +2 all_1_1_0 1 (0,0,'00000000-0000-0000-0000-000000000000') 0 +2 all_2_2_0 0 (0,0,'00000000-0000-0000-0000-000000000000') 0 +3 all_1_1_0 0 +3 all_3_3_0 1 +4 all_1_1_0 1 (0,0,'00000000-0000-0000-0000-000000000000') 0 +4 all_2_2_0 18446744073709551615 (1,1,'00000000-0000-0000-0000-000000000000') 0 +4 all_3_3_0 0 (0,0,'00000000-0000-0000-0000-000000000000') 0 +5 1 +6 all_1_1_0 0 +6 all_3_3_0 1 +6 all_4_4_0 1 +7 all_1_1_0 (0,0,'00000000-0000-0000-0000-000000000000') 0 +7 all_3_3_0 (0,0,'00000000-0000-0000-0000-000000000000') 0 +7 all_4_4_0 (0,0,'00000000-0000-0000-0000-000000000000') 0 +8 1 +1 1 AddPart 1 1 1 1 all_1_1_0 +2 1 Begin 1 1 1 1 +2 1 AddPart 1 1 1 1 all_2_2_0 +1 1 LockPart 1 1 1 1 all_2_2_0 +2 1 Rollback 1 1 1 1 +3 1 Begin 1 1 1 1 +3 1 AddPart 1 1 1 1 all_3_3_0 +3 1 Commit 1 1 1 0 +4 1 Begin 1 1 1 1 +4 1 AddPart 1 1 1 1 all_4_4_0 +4 1 Commit 1 1 1 0 +5 1 Begin 1 1 1 1 +5 1 AddPart 1 1 1 1 all_5_5_0 +5 1 LockPart 1 1 1 1 all_1_1_0 +5 1 LockPart 1 1 1 1 all_3_3_0 +5 1 LockPart 1 1 1 1 all_4_4_0 +5 1 LockPart 1 1 1 1 all_5_5_0 +5 1 UnlockPart 1 1 1 1 all_1_1_0 +5 1 UnlockPart 1 1 1 1 all_3_3_0 +5 1 UnlockPart 1 1 1 1 all_4_4_0 +5 1 UnlockPart 1 1 1 1 all_5_5_0 +5 1 Rollback 1 1 1 1 diff --git a/tests/queries/0_stateless/01172_transaction_counters.sql b/tests/queries/0_stateless/01172_transaction_counters.sql new file mode 100644 index 00000000000..5431673fd62 --- /dev/null +++ b/tests/queries/0_stateless/01172_transaction_counters.sql @@ -0,0 +1,50 @@ +-- Tags: no-s3-storage +-- FIXME this test fails with S3 due to a bug in DiskCacheWrapper +drop table if exists txn_counters; + +create table txn_counters (n Int64, creation_tid DEFAULT transactionID()) engine=MergeTree order by n; + +insert into txn_counters(n) values (1); +select transactionID(); + +-- stop background cleanup +system stop merges txn_counters; + +set throw_on_unsupported_query_inside_transaction=0; + +begin transaction; +insert into txn_counters(n) values (2); +select 1, system.parts.name, txn_counters.creation_tid = system.parts.creation_tid from txn_counters join system.parts on txn_counters._part = system.parts.name where database=currentDatabase() and table='txn_counters' order by system.parts.name; +select 2, name, creation_csn, removal_tid, removal_csn from system.parts where database=currentDatabase() and table='txn_counters' order by system.parts.name; +rollback; + +begin transaction; +insert into txn_counters(n) values (3); +select 3, system.parts.name, txn_counters.creation_tid = system.parts.creation_tid from txn_counters join system.parts on txn_counters._part = system.parts.name where database=currentDatabase() and table='txn_counters' order by system.parts.name; +select 4, name, creation_csn, removal_tid, removal_csn from system.parts where database=currentDatabase() and table='txn_counters' order by system.parts.name; +select 5, transactionID().3 == serverUUID(); +commit; + +detach table txn_counters; +attach table txn_counters; + +begin transaction; +insert into txn_counters(n) values (4); +select 6, system.parts.name, txn_counters.creation_tid = system.parts.creation_tid from txn_counters join system.parts on txn_counters._part = system.parts.name where database=currentDatabase() and table='txn_counters' order by system.parts.name; +select 7, name, removal_tid, removal_csn from system.parts where database=currentDatabase() and table='txn_counters' order by system.parts.name; +select 8, transactionID().3 == serverUUID(); +commit; + +begin transaction; +insert into txn_counters(n) values (5); +alter table txn_counters drop partition id 'all'; +rollback; + +system flush logs; +select indexOf((select arraySort(groupUniqArray(tid)) from system.transactions_info_log where database=currentDatabase() and table='txn_counters'), tid), + (toDecimal64(now64(6), 6) - toDecimal64(event_time, 6)) < 100, type, thread_id!=0, length(query_id)=length(queryID()), tid_hash!=0, csn=0, part +from system.transactions_info_log +where tid in (select tid from system.transactions_info_log where database=currentDatabase() and table='txn_counters' and not (tid.1=1 and tid.2=1)) +or (database=currentDatabase() and table='txn_counters') order by event_time; + +drop table txn_counters; diff --git a/tests/queries/0_stateless/01173_transaction_control_queries.reference b/tests/queries/0_stateless/01173_transaction_control_queries.reference new file mode 100644 index 00000000000..01acdffc581 --- /dev/null +++ b/tests/queries/0_stateless/01173_transaction_control_queries.reference @@ -0,0 +1,12 @@ +commit [1,10] +rollback [1,2,10,20] +no nested [1,10] +on exception before start [1,3,10,30] +on exception while processing [1,4,10,40] +on session close [1,6,10,60] +commit [1,7,10,70] +readonly [1,7,10,70] +snapshot 2 8 +snapshot1 0 0 +snapshot3 1 +snapshot100500 2 8 diff --git a/tests/queries/0_stateless/01173_transaction_control_queries.sql b/tests/queries/0_stateless/01173_transaction_control_queries.sql new file mode 100644 index 00000000000..930a2909f7a --- /dev/null +++ b/tests/queries/0_stateless/01173_transaction_control_queries.sql @@ -0,0 +1,102 @@ +drop table if exists mt1; +drop table if exists mt2; + +create table mt1 (n Int64) engine=MergeTree order by n; +create table mt2 (n Int64) engine=MergeTree order by n; + +commit; -- { serverError INVALID_TRANSACTION } +rollback; -- { serverError INVALID_TRANSACTION } + +begin transaction; +insert into mt1 values (1); +insert into mt2 values (10); +select 'commit', arraySort(groupArray(n)) from (select n from mt1 union all select * from mt2); +commit; + +begin transaction; +insert into mt1 values (2); +insert into mt2 values (20); +select 'rollback', arraySort(groupArray(n)) from (select n from mt1 union all select * from mt2); +rollback; + +begin transaction; +select 'no nested', arraySort(groupArray(n)) from (select n from mt1 union all select * from mt2); +begin transaction; -- { serverError INVALID_TRANSACTION } +rollback; + +begin transaction; +insert into mt1 values (3); +insert into mt2 values (30); +select 'on exception before start', arraySort(groupArray(n)) from (select n from mt1 union all select * from mt2); +-- rollback on exception before start +select functionThatDoesNotExist(); -- { serverError 46 } +-- cannot commit after exception +commit; -- { serverError INVALID_TRANSACTION } +begin transaction; -- { serverError INVALID_TRANSACTION } +rollback; + +begin transaction; +insert into mt1 values (4); +insert into mt2 values (40); +select 'on exception while processing', arraySort(groupArray(n)) from (select n from mt1 union all select * from mt2); +-- rollback on exception while processing +select throwIf(100 < number) from numbers(1000); -- { serverError 395 } +-- cannot commit after exception +commit; -- { serverError INVALID_TRANSACTION } +insert into mt1 values (5); -- { serverError INVALID_TRANSACTION } +insert into mt2 values (50); -- { serverError INVALID_TRANSACTION } +select 1; -- { serverError INVALID_TRANSACTION } +rollback; + +begin transaction; +insert into mt1 values (6); +insert into mt2 values (60); +select 'on session close', arraySort(groupArray(n)) from (select n from mt1 union all select * from mt2); +-- trigger reconnection by error on client, check rollback on session close +insert into mt1 values ([1]); -- { clientError 43 } +commit; -- { serverError INVALID_TRANSACTION } +rollback; -- { serverError INVALID_TRANSACTION } + +begin transaction; +insert into mt1 values (7); +insert into mt2 values (70); +select 'commit', arraySort(groupArray(n)) from (select n from mt1 union all select * from mt2); +commit; + +begin transaction; +select 'readonly', arraySort(groupArray(n)) from (select n from mt1 union all select * from mt2); +commit; + +begin transaction; +select 'snapshot', count(), sum(n) from mt1; +set transaction snapshot 1; +select 'snapshot1', count(), sum(n) from mt1; +set transaction snapshot 3; +set throw_on_unsupported_query_inside_transaction=0; +select 'snapshot3', count() = (select count() from system.parts where database=currentDatabase() and table='mt1' and _state in ('Active', 'Outdated')) from mt1; +set throw_on_unsupported_query_inside_transaction=1; +set transaction snapshot 1000000000000000; +select 'snapshot100500', count(), sum(n) from mt1; +set transaction snapshot 5; -- { serverError INVALID_TRANSACTION } +rollback; + +begin transaction; +create table m (n int) engine=Memory; -- { serverError 48 } +commit; -- { serverError INVALID_TRANSACTION } +rollback; + +create table m (n int) engine=Memory; +begin transaction; +insert into m values (1); -- { serverError 48 } +select * from m; -- { serverError INVALID_TRANSACTION } +commit; -- { serverError INVALID_TRANSACTION } +rollback; + +begin transaction; +select * from m; -- { serverError 48 } +commit; -- { serverError INVALID_TRANSACTION } +rollback; + +drop table m; +drop table mt1; +drop table mt2; diff --git a/tests/queries/0_stateless/01174_select_insert_isolation.reference b/tests/queries/0_stateless/01174_select_insert_isolation.reference new file mode 100644 index 00000000000..ba5f4de36ac --- /dev/null +++ b/tests/queries/0_stateless/01174_select_insert_isolation.reference @@ -0,0 +1,2 @@ +200 0 100 100 0 +200 0 100 100 0 diff --git a/tests/queries/0_stateless/01174_select_insert_isolation.sh b/tests/queries/0_stateless/01174_select_insert_isolation.sh new file mode 100755 index 00000000000..8872ab82c03 --- /dev/null +++ b/tests/queries/0_stateless/01174_select_insert_isolation.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# Tags: long + +# shellcheck disable=SC2015 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS mt"; +$CLICKHOUSE_CLIENT --query "CREATE TABLE mt (n Int8, m Int8) ENGINE=MergeTree ORDER BY n PARTITION BY 0 < n SETTINGS old_parts_lifetime=0"; + +function thread_insert_commit() +{ + for i in {1..50}; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO mt VALUES /* ($i, $1) */ ($i, $1); + INSERT INTO mt VALUES /* (-$i, $1) */ (-$i, $1); + COMMIT;"; + done +} + +function thread_insert_rollback() +{ + for _ in {1..50}; do + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + INSERT INTO mt VALUES /* (42, $1) */ (42, $1); + ROLLBACK;"; + done +} + +function thread_select() +{ + trap "exit 0" INT + while true; do + # Result of `uniq | wc -l` must be 1 if the first and the last queries got the same result + $CLICKHOUSE_CLIENT --multiquery --query " + BEGIN TRANSACTION; + SELECT arraySort(groupArray(n)), arraySort(groupArray(m)), arraySort(groupArray(_part)) FROM mt; + SELECT throwIf((SELECT sum(n) FROM mt) != 0) FORMAT Null; + SELECT throwIf((SELECT count() FROM mt) % 2 != 0) FORMAT Null; + SELECT arraySort(groupArray(n)), arraySort(groupArray(m)), arraySort(groupArray(_part)) FROM mt; + COMMIT;" | uniq | wc -l | grep -v "^1$" && $CLICKHOUSE_CLIENT -q "SELECT * FROM system.parts + WHERE database='$CLICKHOUSE_DATABASE' AND table='mt'" ||:; + done +} + +thread_insert_commit 1 & PID_1=$! +thread_insert_commit 2 & PID_2=$! +thread_insert_rollback 3 & PID_3=$! +thread_select & PID_4=$! +wait $PID_1 && wait $PID_2 && wait $PID_3 +kill -INT $PID_4 +wait + +$CLICKHOUSE_CLIENT --multiquery --query " +BEGIN TRANSACTION; +SELECT count(), sum(n), sum(m=1), sum(m=2), sum(m=3) FROM mt;"; + +$CLICKHOUSE_CLIENT --query "SELECT count(), sum(n), sum(m=1), sum(m=2), sum(m=3) FROM mt;" + +$CLICKHOUSE_CLIENT --query "DROP TABLE mt"; diff --git a/tests/queries/0_stateless/01183_custom_separated_format_http.sh b/tests/queries/0_stateless/01183_custom_separated_format_http.sh index f981ef5b890..8eaa22f4ecc 100755 --- a/tests/queries/0_stateless/01183_custom_separated_format_http.sh +++ b/tests/queries/0_stateless/01183_custom_separated_format_http.sh @@ -6,9 +6,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo 'DROP TABLE IF EXISTS mydb' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- echo 'CREATE TABLE mydb (datetime String, d1 String, d2 String ) ENGINE=Memory' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- -echo "2021-Jan^d1^d2" | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&query=INSERT%20INTO%20mydb%20FORMAT%20CustomSeparated%20SETTINGS%20format_custom_escaping_rule%3D%27CSV%27%2C%20format_custom_field_delimiter%20%3D%20%27%5E%27" --data-binary @- -echo -n "" | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&query=INSERT%20INTO%20mydb%20FORMAT%20CustomSeparated%20SETTINGS%20format_custom_escaping_rule%3D%27CSV%27%2C%20format_custom_field_delimiter%20%3D%20%27%5E%27" --data-binary @- +echo "2021-Jan^d1^d2" | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&query=INSERT%20INTO%20mydb%20SETTINGS%20format_custom_escaping_rule%3D%27CSV%27%2C%20format_custom_field_delimiter%20%3D%20%27%5E%27%20FORMAT%20CustomSeparated" --data-binary @- +echo -n "" | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&query=INSERT%20INTO%20mydb%20SETTINGS%20format_custom_escaping_rule%3D%27CSV%27%2C%20format_custom_field_delimiter%20%3D%20%27%5E%27%20FORMAT%20CustomSeparated" --data-binary @- echo 'SELECT * FROM mydb' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- -printf "2021-Jan^d1^d2\n%.0s" {1..999999} | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&query=INSERT%20INTO%20mydb%20FORMAT%20CustomSeparated%20SETTINGS%20format_custom_escaping_rule%3D%27CSV%27%2C%20format_custom_field_delimiter%20%3D%20%27%5E%27" --data-binary @- +printf "2021-Jan^d1^d2\n%.0s" {1..999999} | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&query=INSERT%20INTO%20mydb%20SETTINGS%20format_custom_escaping_rule%3D%27CSV%27%2C%20format_custom_field_delimiter%20%3D%20%27%5E%27%20FORMAT%20CustomSeparated" --data-binary @- echo 'SELECT count(*), countDistinct(datetime, d1, d2) FROM mydb' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- echo 'DROP TABLE mydb' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index b27c0d10d3b..039e438dc0a 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -60,6 +60,7 @@ DROP [] \N ALL TRUNCATE ['TRUNCATE TABLE'] TABLE ALL OPTIMIZE ['OPTIMIZE TABLE'] TABLE ALL KILL QUERY [] GLOBAL ALL +KILL TRANSACTION [] GLOBAL ALL MOVE PARTITION BETWEEN SHARDS [] GLOBAL ALL CREATE USER [] GLOBAL ACCESS MANAGEMENT ALTER USER [] GLOBAL ACCESS MANAGEMENT diff --git a/tests/queries/0_stateless/01593_insert_settings.sql b/tests/queries/0_stateless/01593_insert_settings.sql index 7ef49f54049..88a58b2152e 100644 --- a/tests/queries/0_stateless/01593_insert_settings.sql +++ b/tests/queries/0_stateless/01593_insert_settings.sql @@ -2,9 +2,8 @@ drop table if exists data_01593; create table data_01593 (key Int) engine=MergeTree() order by key partition by key; insert into data_01593 select * from numbers_mt(10); --- TOO_MANY_PARTS error -insert into data_01593 select * from numbers_mt(10) settings max_partitions_per_insert_block=1; -- { serverError 252 } +insert into data_01593 select * from numbers_mt(10) settings max_partitions_per_insert_block=1; -- { serverError TOO_MANY_PARTS } -- settings for INSERT is prefered -insert into data_01593 select * from numbers_mt(10) settings max_partitions_per_insert_block=1 settings max_partitions_per_insert_block=100; +insert into data_01593 settings max_partitions_per_insert_block=100 select * from numbers_mt(10) settings max_partitions_per_insert_block=1; drop table data_01593; diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 750809da338..7654be4eb29 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -22,7 +22,7 @@ OPTIMIZE TABLE adaptive_table FINAL; SELECT marks FROM system.parts WHERE table = 'adaptive_table' and database=currentDatabase() and active; -SET remote_fs_enable_cache = 0; +SET enable_filesystem_cache = 0; -- If we have computed granularity incorrectly than we will exceed this limit. SET max_memory_usage='30M'; diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql index 7ec3153886c..36b6c97460c 100644 --- a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql +++ b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql @@ -3,7 +3,7 @@ drop table if exists data_01641; -- Disable cache for s3 storage tests because it increases memory usage. -set remote_fs_enable_cache=0; +set enable_filesystem_cache=0; set remote_filesystem_read_method='read'; create table data_01641 (key Int, value String) engine=MergeTree order by (key, repeat(value, 40)) settings old_parts_lifetime=0, min_bytes_for_wide_part=0; diff --git a/tests/queries/0_stateless/01801_s3_cluster.reference b/tests/queries/0_stateless/01801_s3_cluster.reference index 31c97f14fa3..0448ff3933b 100644 --- a/tests/queries/0_stateless/01801_s3_cluster.reference +++ b/tests/queries/0_stateless/01801_s3_cluster.reference @@ -2,30 +2,6 @@ 0 0 0 0 0 0 1 2 3 -10 11 12 -13 14 15 -16 17 18 -20 21 22 -23 24 25 -26 27 28 -4 5 6 -7 8 9 -0 0 0 -0 0 0 -0 0 0 -1 2 3 -10 11 12 -13 14 15 -16 17 18 -20 21 22 -23 24 25 -26 27 28 -4 5 6 -7 8 9 -0 0 0 -0 0 0 -0 0 0 -1 2 3 4 5 6 7 8 9 10 11 12 @@ -38,14 +14,26 @@ 0 0 0 0 0 0 1 2 3 +4 5 6 +7 8 9 10 11 12 13 14 15 16 17 18 20 21 22 23 24 25 26 27 28 +0 0 0 +0 0 0 +0 0 0 +1 2 3 4 5 6 7 8 9 +10 11 12 +13 14 15 +16 17 18 +20 21 22 +23 24 25 +26 27 28 0 0 0 0 0 0 0 0 0 @@ -62,14 +50,26 @@ 0 0 0 0 0 0 1 2 3 +4 5 6 +7 8 9 10 11 12 13 14 15 16 17 18 20 21 22 23 24 25 26 27 28 +0 0 0 +0 0 0 +0 0 0 +1 2 3 4 5 6 7 8 9 +10 11 12 +13 14 15 +16 17 18 +20 21 22 +23 24 25 +26 27 28 0 0 0 0 0 0 0 0 0 diff --git a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh index 23df052a8d6..a29d0661621 100755 --- a/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh +++ b/tests/queries/0_stateless/01921_concurrent_ttl_and_normal_merges_zookeeper_long.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash # Tags: long, zookeeper, no-parallel +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=error + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01926_order_by_desc_limit.sql b/tests/queries/0_stateless/01926_order_by_desc_limit.sql index 9f65cf73252..86468b4fcd6 100644 --- a/tests/queries/0_stateless/01926_order_by_desc_limit.sql +++ b/tests/queries/0_stateless/01926_order_by_desc_limit.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS order_by_desc; -SET remote_fs_enable_cache=0; +SET enable_filesystem_cache=0; CREATE TABLE order_by_desc (u UInt32, s String) ENGINE MergeTree ORDER BY u PARTITION BY u % 100 diff --git a/tests/queries/0_stateless/02010_lc_native.python b/tests/queries/0_stateless/02010_lc_native.python index 71965512e64..e6d6f9e1317 100755 --- a/tests/queries/0_stateless/02010_lc_native.python +++ b/tests/queries/0_stateless/02010_lc_native.python @@ -143,7 +143,7 @@ def sendQuery(s, query): writeStringBinary('', ba) # No interserver secret writeVarUInt(2, ba) # Stage - Complete ba.append(0) # No compression - writeStringBinary(query + ' settings input_format_defaults_for_omitted_fields=0', ba) # query, finally + writeStringBinary(query, ba) # query, finally s.sendall(ba) @@ -205,7 +205,7 @@ def insertValidLowCardinalityRow(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) # external tables sendEmptyBlock(s) @@ -241,7 +241,7 @@ def insertLowCardinalityRowWithIndexOverflow(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) # external tables sendEmptyBlock(s) @@ -275,7 +275,7 @@ def insertLowCardinalityRowWithIncorrectDictType(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) # external tables sendEmptyBlock(s) @@ -308,7 +308,7 @@ def insertLowCardinalityRowWithIncorrectAdditionalKeys(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) # external tables sendEmptyBlock(s) diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh index aa2fe6c1b35..cdc1587bccd 100755 --- a/tests/queries/0_stateless/02030_capnp_format.sh +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -19,7 +19,8 @@ cp -r $CLIENT_SCHEMADIR/02030_* $SCHEMADIR/$SERVER_SCHEMADIR/ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_simple_types"; $CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_simple_types (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixed FixedString(5), data String, date Date, datetime DateTime, datetime64 DateTime64(3)) ENGINE=Memory" $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_simple_types values (-1, 1, -1000, 1000, -10000000, 1000000, -1000000000, 1000000000, 123.123, 123123123.123123123, 'Some string', 'fixed', 'Some data', '2000-01-06', '2000-06-01 19:42:42', '2000-04-01 11:21:33.123')" -$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_simple_types FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_simple_types FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_simple_types FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" | \ + $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_simple_types SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message' FORMAT CapnProto" $CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_simple_types" $CLICKHOUSE_CLIENT --query="DROP TABLE capnp_simple_types" @@ -27,7 +28,8 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE capnp_simple_types" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_tuples" $CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_tuples (value UInt64, tuple1 Tuple(one UInt64, two Tuple(three UInt64, four UInt64)), tuple2 Tuple(nested1 Tuple(nested2 Tuple(x UInt64)))) ENGINE=Memory"; $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_tuples VALUES (1, (2, (3, 4)), (((5))))" -$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_tuples:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_tuples:Message'" | \ + $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_tuples SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_tuples:Message' FORMAT CapnProto" $CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_tuples" $CLICKHOUSE_CLIENT --query="DROP TABLE capnp_tuples" @@ -35,7 +37,8 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE capnp_tuples" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_lists" $CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_lists (value UInt64, list1 Array(UInt64), list2 Array(Array(Array(UInt64)))) ENGINE=Memory"; $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_lists VALUES (1, [1, 2, 3], [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], []], []])" -$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_lists FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_lists:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_lists FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_lists:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_lists FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_lists:Message'" | \ + $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_lists SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_lists:Message' FORMAT CapnProto" $CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_lists" $CLICKHOUSE_CLIENT --query="DROP TABLE capnp_lists" @@ -43,7 +46,8 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE capnp_lists" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nested_lists_and_tuples" $CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nested_lists_and_tuples (value UInt64, nested Tuple(a Tuple(b UInt64, c Array(Array(UInt64))), d Array(Tuple(e Array(Array(Tuple(f UInt64, g UInt64))), h Array(Tuple(k Array(UInt64))))))) ENGINE=Memory"; $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_lists_and_tuples VALUES (1, ((2, [[3, 4], [5, 6], []]), [([[(7, 8), (9, 10)], [(11, 12), (13, 14)], []], [([15, 16, 17]), ([])])]))" -$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_lists_and_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_lists_and_tuples:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_lists_and_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_lists_and_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_lists_and_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_lists_and_tuples:Message'" | \ + $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_lists_and_tuples SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_lists_and_tuples:Message' FORMAT CapnProto" $CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_lists_and_tuples" $CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nested_lists_and_tuples" @@ -51,7 +55,8 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nested_lists_and_tuples" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nested_table" $CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nested_table (nested Nested(value UInt64, array Array(UInt64), tuple Tuple(one UInt64, two UInt64))) ENGINE=Memory"; $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_table VALUES ([1, 2, 3], [[4, 5, 6], [], [7, 8]], [(9, 10), (11, 12), (13, 14)])" -$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_table FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_table:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_table FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_table:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_table FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_table:Message'" | \ + $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_table SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_table:Message' FORMAT CapnProto" $CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_table" $CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nested_table" @@ -59,7 +64,8 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nested_table" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nullable" $CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nullable (nullable Nullable(UInt64), array Array(Nullable(UInt64)), tuple Tuple(nullable Nullable(UInt64))) ENGINE=Memory"; $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nullable VALUES (1, [1, Null, 2], (1)), (Null, [Null, Null, 42], (Null))" -$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nullable FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nullable:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nullable FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nullable:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nullable FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nullable:Message'" | \ + $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nullable SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nullable:Message' FORMAT CapnProto" $CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nullable" $CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nullable" @@ -78,7 +84,8 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_low_cardinality" $CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_low_cardinality (lc1 LowCardinality(String), lc2 LowCardinality(Nullable(String)), lc3 Array(LowCardinality(Nullable(String)))) ENGINE=Memory" $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_low_cardinality VALUES ('one', 'two', ['one', Null, 'two', Null]), ('two', Null, [Null])" -$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_low_cardinality FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_low_cardinality:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_low_cardinality FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_low_cardinality:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_low_cardinality FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_low_cardinality:Message'" | \ + $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_low_cardinality SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_low_cardinality:Message' FORMAT CapnProto" $CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_low_cardinality" $CLICKHOUSE_CLIENT --query="DROP TABLE capnp_low_cardinality" diff --git a/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh b/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh index 1285758866d..400bf2a56fa 100755 --- a/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh +++ b/tests/queries/0_stateless/02117_custom_separated_with_names_and_types.sh @@ -14,7 +14,8 @@ for format in CustomSeparated CustomSeparatedWithNames CustomSeparatedWithNamesA do echo $format $CLICKHOUSE_CLIENT -q "SELECT number AS x, number + 1 AS y, 'hello' AS s FROM numbers(5) FORMAT $format $CUSTOM_SETTINGS" - $CLICKHOUSE_CLIENT -q "SELECT number AS x, number + 1 AS y, 'hello' AS s FROM numbers(5) FORMAT $format $CUSTOM_SETTINGS" | $CLICKHOUSE_CLIENT -q "INSERT INTO test_02117 FORMAT $format $CUSTOM_SETTINGS" + $CLICKHOUSE_CLIENT -q "SELECT number AS x, number + 1 AS y, 'hello' AS s FROM numbers(5) FORMAT $format $CUSTOM_SETTINGS" | \ + $CLICKHOUSE_CLIENT -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT $format" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02117" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02117" done @@ -23,66 +24,80 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE test_02117" $CLICKHOUSE_CLIENT -q "CREATE TABLE test_02117 (x UInt32, y String DEFAULT 'default', z Date) engine=Memory()" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' AS y, toDate('2020-01-01') AS z FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' AS y, toDate('2020-01-01') AS z FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" | \ + $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNames" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02117" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02117" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' AS y, toDate('2020-01-01') AS z FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' AS y, toDate('2020-01-01') AS z FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" | \ + $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNamesAndTypes" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02117" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02117" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' AS y, toDate('2020-01-01') AS z FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=0 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' AS y, toDate('2020-01-01') AS z FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" | \ + $CLICKHOUSE_CLIENT --input_format_with_names_use_header=0 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNames" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02117" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02117" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' AS y, toDate('2020-01-01') AS z FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=0 --input_format_with_types_use_header=0 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' AS y, toDate('2020-01-01') AS z FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" | \ + $CLICKHOUSE_CLIENT --input_format_with_names_use_header=0 --input_format_with_types_use_header=0 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNamesAndTypes" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02117" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02117" -$CLICKHOUSE_CLIENT -q "SELECT 'text' AS y, toDate('2020-01-01') AS z, toUInt32(1) AS x FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" +$CLICKHOUSE_CLIENT -q "SELECT 'text' AS y, toDate('2020-01-01') AS z, toUInt32(1) AS x FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" | \ + $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNames" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02117" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02117" -$CLICKHOUSE_CLIENT -q "SELECT 'text' AS y, toDate('2020-01-01') AS z, toUInt32(1) AS x FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" +$CLICKHOUSE_CLIENT -q "SELECT 'text' AS y, toDate('2020-01-01') AS z, toUInt32(1) AS x FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" | \ + $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNamesAndTypes" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02117" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02117" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" | \ + $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNames" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02117" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02117" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" | \ + $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNamesAndTypes" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02117" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02117" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=0 --input_format_with_names_use_header=1 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" | \ + $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=0 --input_format_with_names_use_header=1 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNames" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02117" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02117" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=0 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" | \ + $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=0 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNamesAndTypes" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02117" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02117" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT CustomSeparatedWithNames $CUSTOM_SETTINGS" | \ + $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNames" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02117" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02117" -$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" +$CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, [[1, 2, 3], [4, 5], []] as a FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" | \ + $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNamesAndTypes" $CLICKHOUSE_CLIENT -q "SELECT * FROM test_02117" $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE test_02117" TMP_FILE=$CURDIR/test_02117 $CLICKHOUSE_CLIENT -q "SELECT 'text' AS x, toDate('2020-01-01') AS y, toUInt32(1) AS z FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" > $TMP_FILE -cat $TMP_FILE | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +cat $TMP_FILE | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNamesAndTypes" 2>&1 | \ + grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' $CLICKHOUSE_CLIENT -q "SELECT toUInt32(1) AS x, 'text' as z, toDate('2020-01-01') AS y FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" > $TMP_FILE -cat $TMP_FILE | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 FORMAT CustomSeparatedWithNamesAndTypes $CUSTOM_SETTINGS" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' +cat $TMP_FILE | $CLICKHOUSE_CLIENT --input_format_with_names_use_header=1 --input_format_with_types_use_header=1 -q "INSERT INTO test_02117 $CUSTOM_SETTINGS FORMAT CustomSeparatedWithNamesAndTypes" 2>&1 | \ + grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' $CLICKHOUSE_CLIENT -q "DROP TABLE test_02117" rm $TMP_FILE diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 91566295997..246b8ef6d3b 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -12,7 +12,7 @@ CREATE TABLE system.data_type_families\n(\n `name` String,\n `case_insensi CREATE TABLE system.databases\n(\n `name` String,\n `engine` String,\n `data_path` String,\n `metadata_path` String,\n `uuid` UUID,\n `comment` String,\n `database` String\n)\nENGINE = SystemDatabases()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.detached_parts\n(\n `database` String,\n `table` String,\n `partition_id` Nullable(String),\n `name` String,\n `disk` String,\n `reason` Nullable(String),\n `min_block_number` Nullable(Int64),\n `max_block_number` Nullable(Int64),\n `level` Nullable(UInt32)\n)\nENGINE = SystemDetachedParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.dictionaries\n(\n `database` String,\n `name` String,\n `uuid` UUID,\n `status` Enum8(\'NOT_LOADED\' = 0, \'LOADED\' = 1, \'FAILED\' = 2, \'LOADING\' = 3, \'FAILED_AND_RELOADING\' = 4, \'LOADED_AND_RELOADING\' = 5, \'NOT_EXIST\' = 6),\n `origin` String,\n `type` String,\n `key.names` Array(String),\n `key.types` Array(String),\n `attribute.names` Array(String),\n `attribute.types` Array(String),\n `bytes_allocated` UInt64,\n `query_count` UInt64,\n `hit_rate` Float64,\n `found_rate` Float64,\n `element_count` UInt64,\n `load_factor` Float64,\n `source` String,\n `lifetime_min` UInt64,\n `lifetime_max` UInt64,\n `loading_start_time` DateTime,\n `last_successful_update_time` DateTime,\n `loading_duration` Float32,\n `last_exception` String,\n `comment` String\n)\nENGINE = SystemDictionaries()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.disks\n(\n `name` String,\n `path` String,\n `free_space` UInt64,\n `total_space` UInt64,\n `keep_free_space` UInt64,\n `type` String\n)\nENGINE = SystemDisks()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.disks\n(\n `name` String,\n `path` String,\n `free_space` UInt64,\n `total_space` UInt64,\n `keep_free_space` UInt64,\n `type` String,\n `cache_path` String\n)\nENGINE = SystemDisks()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.distributed_ddl_queue\n(\n `entry` String,\n `entry_version` Nullable(UInt8),\n `initiator_host` Nullable(String),\n `initiator_port` Nullable(UInt16),\n `cluster` String,\n `query` String,\n `settings` Map(String, String),\n `query_create_time` DateTime,\n `host` Nullable(String),\n `port` Nullable(UInt16),\n `status` Nullable(Enum8(\'Inactive\' = 0, \'Active\' = 1, \'Finished\' = 2, \'Removing\' = 3, \'Unknown\' = 4)),\n `exception_code` Nullable(UInt16),\n `exception_text` Nullable(String),\n `query_finish_time` Nullable(DateTime),\n `query_duration_ms` Nullable(UInt64)\n)\nENGINE = SystemDDLWorkerQueue()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.distribution_queue\n(\n `database` String,\n `table` String,\n `data_path` String,\n `is_blocked` UInt8,\n `error_count` UInt64,\n `data_files` UInt64,\n `data_compressed_bytes` UInt64,\n `broken_data_files` UInt64,\n `broken_data_compressed_bytes` UInt64,\n `last_exception` String\n)\nENGINE = SystemDistributionQueue()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.enabled_roles\n(\n `role_name` String,\n `with_admin_option` UInt8,\n `is_current` UInt8,\n `is_default` UInt8\n)\nENGINE = SystemEnabledRoles()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' @@ -20,7 +20,7 @@ CREATE TABLE system.errors\n(\n `name` String,\n `code` Int32,\n `value CREATE TABLE system.events\n(\n `event` String,\n `value` UInt64,\n `description` String\n)\nENGINE = SystemEvents()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.formats\n(\n `name` String,\n `is_input` UInt8,\n `is_output` UInt8\n)\nENGINE = SystemFormats()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.functions\n(\n `name` String,\n `is_aggregate` UInt8,\n `case_insensitive` UInt8,\n `alias_to` String,\n `create_query` String,\n `origin` Enum8(\'System\' = 0, \'SQLUserDefined\' = 1, \'ExecutableUserDefined\' = 2)\n)\nENGINE = SystemFunctions()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.grants\n(\n `user_name` Nullable(String),\n `role_name` Nullable(String),\n `access_type` Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127, \'MYSQL\' = 128, \'POSTGRES\' = 129, \'SQLITE\' = 130, \'ODBC\' = 131, \'JDBC\' = 132, \'HDFS\' = 133, \'S3\' = 134, \'HIVE\' = 135, \'SOURCES\' = 136, \'ALL\' = 137, \'NONE\' = 138),\n `database` Nullable(String),\n `table` Nullable(String),\n `column` Nullable(String),\n `is_partial_revoke` UInt8,\n `grant_option` UInt8\n)\nENGINE = SystemGrants()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.grants\n(\n `user_name` Nullable(String),\n `role_name` Nullable(String),\n `access_type` Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'KILL TRANSACTION\' = 62, \'MOVE PARTITION BETWEEN SHARDS\' = 63, \'CREATE USER\' = 64, \'ALTER USER\' = 65, \'DROP USER\' = 66, \'CREATE ROLE\' = 67, \'ALTER ROLE\' = 68, \'DROP ROLE\' = 69, \'ROLE ADMIN\' = 70, \'CREATE ROW POLICY\' = 71, \'ALTER ROW POLICY\' = 72, \'DROP ROW POLICY\' = 73, \'CREATE QUOTA\' = 74, \'ALTER QUOTA\' = 75, \'DROP QUOTA\' = 76, \'CREATE SETTINGS PROFILE\' = 77, \'ALTER SETTINGS PROFILE\' = 78, \'DROP SETTINGS PROFILE\' = 79, \'SHOW USERS\' = 80, \'SHOW ROLES\' = 81, \'SHOW ROW POLICIES\' = 82, \'SHOW QUOTAS\' = 83, \'SHOW SETTINGS PROFILES\' = 84, \'SHOW ACCESS\' = 85, \'ACCESS MANAGEMENT\' = 86, \'SYSTEM SHUTDOWN\' = 87, \'SYSTEM DROP DNS CACHE\' = 88, \'SYSTEM DROP MARK CACHE\' = 89, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 90, \'SYSTEM DROP MMAP CACHE\' = 91, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 92, \'SYSTEM DROP CACHE\' = 93, \'SYSTEM RELOAD CONFIG\' = 94, \'SYSTEM RELOAD SYMBOLS\' = 95, \'SYSTEM RELOAD DICTIONARY\' = 96, \'SYSTEM RELOAD MODEL\' = 97, \'SYSTEM RELOAD FUNCTION\' = 98, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 99, \'SYSTEM RELOAD\' = 100, \'SYSTEM RESTART DISK\' = 101, \'SYSTEM MERGES\' = 102, \'SYSTEM TTL MERGES\' = 103, \'SYSTEM FETCHES\' = 104, \'SYSTEM MOVES\' = 105, \'SYSTEM DISTRIBUTED SENDS\' = 106, \'SYSTEM REPLICATED SENDS\' = 107, \'SYSTEM SENDS\' = 108, \'SYSTEM REPLICATION QUEUES\' = 109, \'SYSTEM DROP REPLICA\' = 110, \'SYSTEM SYNC REPLICA\' = 111, \'SYSTEM RESTART REPLICA\' = 112, \'SYSTEM RESTORE REPLICA\' = 113, \'SYSTEM FLUSH DISTRIBUTED\' = 114, \'SYSTEM FLUSH LOGS\' = 115, \'SYSTEM FLUSH\' = 116, \'SYSTEM THREAD FUZZER\' = 117, \'SYSTEM\' = 118, \'dictGet\' = 119, \'addressToLine\' = 120, \'addressToLineWithInlines\' = 121, \'addressToSymbol\' = 122, \'demangle\' = 123, \'INTROSPECTION\' = 124, \'FILE\' = 125, \'URL\' = 126, \'REMOTE\' = 127, \'MONGO\' = 128, \'MYSQL\' = 129, \'POSTGRES\' = 130, \'SQLITE\' = 131, \'ODBC\' = 132, \'JDBC\' = 133, \'HDFS\' = 134, \'S3\' = 135, \'HIVE\' = 136, \'SOURCES\' = 137, \'ALL\' = 138, \'NONE\' = 139),\n `database` Nullable(String),\n `table` Nullable(String),\n `column` Nullable(String),\n `is_partial_revoke` UInt8,\n `grant_option` UInt8\n)\nENGINE = SystemGrants()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.graphite_retentions\n(\n `config_name` String,\n `rule_type` String,\n `regexp` String,\n `function` String,\n `age` UInt64,\n `precision` UInt64,\n `priority` UInt16,\n `is_default` UInt8,\n `Tables.database` Array(String),\n `Tables.table` Array(String)\n)\nENGINE = SystemGraphite()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.licenses\n(\n `library_name` String,\n `license_type` String,\n `license_path` String,\n `license_text` String\n)\nENGINE = SystemLicenses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.macros\n(\n `macro` String,\n `substitution` String\n)\nENGINE = SystemMacros()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' @@ -33,9 +33,9 @@ CREATE TABLE system.numbers\n(\n `number` UInt64\n)\nENGINE = SystemNumbers() CREATE TABLE system.numbers_mt\n(\n `number` UInt64\n)\nENGINE = SystemNumbers()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.one\n(\n `dummy` UInt8\n)\nENGINE = SystemOne()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.part_moves_between_shards\n(\n `database` String,\n `table` String,\n `task_name` String,\n `task_uuid` UUID,\n `create_time` DateTime,\n `part_name` String,\n `part_uuid` UUID,\n `to_shard` String,\n `dst_part_name` String,\n `update_time` DateTime,\n `state` String,\n `rollback` UInt8,\n `num_tries` UInt32,\n `last_exception` String\n)\nENGINE = SystemShardMoves()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.parts\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `secondary_indices_compressed_bytes` UInt64,\n `secondary_indices_uncompressed_bytes` UInt64,\n `secondary_indices_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `projections` Array(String),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.parts\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `secondary_indices_compressed_bytes` UInt64,\n `secondary_indices_uncompressed_bytes` UInt64,\n `secondary_indices_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `projections` Array(String),\n `visible` UInt8,\n `creation_tid` Tuple(UInt64, UInt64, UUID),\n `removal_tid` Tuple(UInt64, UInt64, UUID),\n `creation_csn` UInt64,\n `removal_csn` UInt64,\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.parts_columns\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `serialization_kind` String,\n `subcolumns.names` Array(String),\n `subcolumns.types` Array(String),\n `subcolumns.serializations` Array(String),\n `subcolumns.bytes_on_disk` Array(UInt64),\n `subcolumns.data_compressed_bytes` Array(UInt64),\n `subcolumns.data_uncompressed_bytes` Array(UInt64),\n `subcolumns.marks_bytes` Array(UInt64),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.privileges\n(\n `privilege` Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127, \'MYSQL\' = 128, \'POSTGRES\' = 129, \'SQLITE\' = 130, \'ODBC\' = 131, \'JDBC\' = 132, \'HDFS\' = 133, \'S3\' = 134, \'HIVE\' = 135, \'SOURCES\' = 136, \'ALL\' = 137, \'NONE\' = 138),\n `aliases` Array(String),\n `level` Nullable(Enum8(\'GLOBAL\' = 0, \'DATABASE\' = 1, \'TABLE\' = 2, \'DICTIONARY\' = 3, \'VIEW\' = 4, \'COLUMN\' = 5)),\n `parent_group` Nullable(Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127, \'MYSQL\' = 128, \'POSTGRES\' = 129, \'SQLITE\' = 130, \'ODBC\' = 131, \'JDBC\' = 132, \'HDFS\' = 133, \'S3\' = 134, \'HIVE\' = 135, \'SOURCES\' = 136, \'ALL\' = 137, \'NONE\' = 138))\n)\nENGINE = SystemPrivileges()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.privileges\n(\n `privilege` Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'KILL TRANSACTION\' = 62, \'MOVE PARTITION BETWEEN SHARDS\' = 63, \'CREATE USER\' = 64, \'ALTER USER\' = 65, \'DROP USER\' = 66, \'CREATE ROLE\' = 67, \'ALTER ROLE\' = 68, \'DROP ROLE\' = 69, \'ROLE ADMIN\' = 70, \'CREATE ROW POLICY\' = 71, \'ALTER ROW POLICY\' = 72, \'DROP ROW POLICY\' = 73, \'CREATE QUOTA\' = 74, \'ALTER QUOTA\' = 75, \'DROP QUOTA\' = 76, \'CREATE SETTINGS PROFILE\' = 77, \'ALTER SETTINGS PROFILE\' = 78, \'DROP SETTINGS PROFILE\' = 79, \'SHOW USERS\' = 80, \'SHOW ROLES\' = 81, \'SHOW ROW POLICIES\' = 82, \'SHOW QUOTAS\' = 83, \'SHOW SETTINGS PROFILES\' = 84, \'SHOW ACCESS\' = 85, \'ACCESS MANAGEMENT\' = 86, \'SYSTEM SHUTDOWN\' = 87, \'SYSTEM DROP DNS CACHE\' = 88, \'SYSTEM DROP MARK CACHE\' = 89, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 90, \'SYSTEM DROP MMAP CACHE\' = 91, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 92, \'SYSTEM DROP CACHE\' = 93, \'SYSTEM RELOAD CONFIG\' = 94, \'SYSTEM RELOAD SYMBOLS\' = 95, \'SYSTEM RELOAD DICTIONARY\' = 96, \'SYSTEM RELOAD MODEL\' = 97, \'SYSTEM RELOAD FUNCTION\' = 98, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 99, \'SYSTEM RELOAD\' = 100, \'SYSTEM RESTART DISK\' = 101, \'SYSTEM MERGES\' = 102, \'SYSTEM TTL MERGES\' = 103, \'SYSTEM FETCHES\' = 104, \'SYSTEM MOVES\' = 105, \'SYSTEM DISTRIBUTED SENDS\' = 106, \'SYSTEM REPLICATED SENDS\' = 107, \'SYSTEM SENDS\' = 108, \'SYSTEM REPLICATION QUEUES\' = 109, \'SYSTEM DROP REPLICA\' = 110, \'SYSTEM SYNC REPLICA\' = 111, \'SYSTEM RESTART REPLICA\' = 112, \'SYSTEM RESTORE REPLICA\' = 113, \'SYSTEM FLUSH DISTRIBUTED\' = 114, \'SYSTEM FLUSH LOGS\' = 115, \'SYSTEM FLUSH\' = 116, \'SYSTEM THREAD FUZZER\' = 117, \'SYSTEM\' = 118, \'dictGet\' = 119, \'addressToLine\' = 120, \'addressToLineWithInlines\' = 121, \'addressToSymbol\' = 122, \'demangle\' = 123, \'INTROSPECTION\' = 124, \'FILE\' = 125, \'URL\' = 126, \'REMOTE\' = 127, \'MONGO\' = 128, \'MYSQL\' = 129, \'POSTGRES\' = 130, \'SQLITE\' = 131, \'ODBC\' = 132, \'JDBC\' = 133, \'HDFS\' = 134, \'S3\' = 135, \'HIVE\' = 136, \'SOURCES\' = 137, \'ALL\' = 138, \'NONE\' = 139),\n `aliases` Array(String),\n `level` Nullable(Enum8(\'GLOBAL\' = 0, \'DATABASE\' = 1, \'TABLE\' = 2, \'DICTIONARY\' = 3, \'VIEW\' = 4, \'COLUMN\' = 5)),\n `parent_group` Nullable(Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'KILL TRANSACTION\' = 62, \'MOVE PARTITION BETWEEN SHARDS\' = 63, \'CREATE USER\' = 64, \'ALTER USER\' = 65, \'DROP USER\' = 66, \'CREATE ROLE\' = 67, \'ALTER ROLE\' = 68, \'DROP ROLE\' = 69, \'ROLE ADMIN\' = 70, \'CREATE ROW POLICY\' = 71, \'ALTER ROW POLICY\' = 72, \'DROP ROW POLICY\' = 73, \'CREATE QUOTA\' = 74, \'ALTER QUOTA\' = 75, \'DROP QUOTA\' = 76, \'CREATE SETTINGS PROFILE\' = 77, \'ALTER SETTINGS PROFILE\' = 78, \'DROP SETTINGS PROFILE\' = 79, \'SHOW USERS\' = 80, \'SHOW ROLES\' = 81, \'SHOW ROW POLICIES\' = 82, \'SHOW QUOTAS\' = 83, \'SHOW SETTINGS PROFILES\' = 84, \'SHOW ACCESS\' = 85, \'ACCESS MANAGEMENT\' = 86, \'SYSTEM SHUTDOWN\' = 87, \'SYSTEM DROP DNS CACHE\' = 88, \'SYSTEM DROP MARK CACHE\' = 89, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 90, \'SYSTEM DROP MMAP CACHE\' = 91, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 92, \'SYSTEM DROP CACHE\' = 93, \'SYSTEM RELOAD CONFIG\' = 94, \'SYSTEM RELOAD SYMBOLS\' = 95, \'SYSTEM RELOAD DICTIONARY\' = 96, \'SYSTEM RELOAD MODEL\' = 97, \'SYSTEM RELOAD FUNCTION\' = 98, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 99, \'SYSTEM RELOAD\' = 100, \'SYSTEM RESTART DISK\' = 101, \'SYSTEM MERGES\' = 102, \'SYSTEM TTL MERGES\' = 103, \'SYSTEM FETCHES\' = 104, \'SYSTEM MOVES\' = 105, \'SYSTEM DISTRIBUTED SENDS\' = 106, \'SYSTEM REPLICATED SENDS\' = 107, \'SYSTEM SENDS\' = 108, \'SYSTEM REPLICATION QUEUES\' = 109, \'SYSTEM DROP REPLICA\' = 110, \'SYSTEM SYNC REPLICA\' = 111, \'SYSTEM RESTART REPLICA\' = 112, \'SYSTEM RESTORE REPLICA\' = 113, \'SYSTEM FLUSH DISTRIBUTED\' = 114, \'SYSTEM FLUSH LOGS\' = 115, \'SYSTEM FLUSH\' = 116, \'SYSTEM THREAD FUZZER\' = 117, \'SYSTEM\' = 118, \'dictGet\' = 119, \'addressToLine\' = 120, \'addressToLineWithInlines\' = 121, \'addressToSymbol\' = 122, \'demangle\' = 123, \'INTROSPECTION\' = 124, \'FILE\' = 125, \'URL\' = 126, \'REMOTE\' = 127, \'MONGO\' = 128, \'MYSQL\' = 129, \'POSTGRES\' = 130, \'SQLITE\' = 131, \'ODBC\' = 132, \'JDBC\' = 133, \'HDFS\' = 134, \'S3\' = 135, \'HIVE\' = 136, \'SOURCES\' = 137, \'ALL\' = 138, \'NONE\' = 139))\n)\nENGINE = SystemPrivileges()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.processes\n(\n `is_initial_query` UInt8,\n `user` String,\n `query_id` String,\n `address` IPv6,\n `port` UInt16,\n `initial_user` String,\n `initial_query_id` String,\n `initial_address` IPv6,\n `initial_port` UInt16,\n `interface` UInt8,\n `os_user` String,\n `client_hostname` String,\n `client_name` String,\n `client_revision` UInt64,\n `client_version_major` UInt64,\n `client_version_minor` UInt64,\n `client_version_patch` UInt64,\n `http_method` UInt8,\n `http_user_agent` String,\n `http_referer` String,\n `forwarded_for` String,\n `quota_key` String,\n `distributed_depth` UInt64,\n `elapsed` Float64,\n `is_cancelled` UInt8,\n `read_rows` UInt64,\n `read_bytes` UInt64,\n `total_rows_approx` UInt64,\n `written_rows` UInt64,\n `written_bytes` UInt64,\n `memory_usage` Int64,\n `peak_memory_usage` Int64,\n `query` String,\n `thread_ids` Array(UInt64),\n `ProfileEvents` Map(String, UInt64),\n `Settings` Map(String, String),\n `current_database` String,\n `ProfileEvents.Names` Array(String),\n `ProfileEvents.Values` Array(UInt64),\n `Settings.Names` Array(String),\n `Settings.Values` Array(String)\n)\nENGINE = SystemProcesses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts_columns\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' diff --git a/tests/queries/0_stateless/02127_plus_before_float.sh b/tests/queries/0_stateless/02127_plus_before_float.sh index b464bedb837..2f0195410eb 100755 --- a/tests/queries/0_stateless/02127_plus_before_float.sh +++ b/tests/queries/0_stateless/02127_plus_before_float.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -q "create table test_02127 (x Float32, y Float64) engine=Mem for escaping_rule in Quoted JSON Escaped CSV Raw do -echo -e "+42.42\t+42.42" | $CLICKHOUSE_CLIENT -q "insert into test_02127 format CustomSeparated settings format_custom_escaping_rule='$escaping_rule'" +echo -e "+42.42\t+42.42" | $CLICKHOUSE_CLIENT -q "insert into test_02127 settings format_custom_escaping_rule='$escaping_rule' format CustomSeparated" done diff --git a/tests/queries/0_stateless/02129_skip_quoted_fields.sh b/tests/queries/0_stateless/02129_skip_quoted_fields.sh index c1baeb5b8f2..ac702d3c750 100755 --- a/tests/queries/0_stateless/02129_skip_quoted_fields.sh +++ b/tests/queries/0_stateless/02129_skip_quoted_fields.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "drop table if exists test_02129" $CLICKHOUSE_CLIENT -q "create table test_02129 (x UInt64, y UInt64) engine=Memory()" -QUERY="insert into test_02129 format CustomSeparatedWithNames settings input_format_skip_unknown_fields=1, format_custom_escaping_rule='Quoted'" +QUERY="insert into test_02129 settings input_format_skip_unknown_fields=1, format_custom_escaping_rule='Quoted' format CustomSeparatedWithNames" # Skip string echo -e "'x'\t'trash'\t'y'\n1\t'Some string'\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" diff --git a/tests/queries/0_stateless/02134_async_inserts_formats.sh b/tests/queries/0_stateless/02134_async_inserts_formats.sh index bd102fefe9f..631809e5dc2 100755 --- a/tests/queries/0_stateless/02134_async_inserts_formats.sh +++ b/tests/queries/0_stateless/02134_async_inserts_formats.sh @@ -9,23 +9,23 @@ url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" -${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO async_inserts FORMAT CustomSeparated settings format_custom_escaping_rule='CSV', format_custom_field_delimiter=',' +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO async_inserts settings format_custom_escaping_rule='CSV', format_custom_field_delimiter=',' FORMAT CustomSeparated 1,\"a\" 2,\"b\" " & -${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO async_inserts FORMAT CustomSeparated settings format_custom_escaping_rule='CSV', format_custom_field_delimiter=',' +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO async_inserts settings format_custom_escaping_rule='CSV', format_custom_field_delimiter=',' FORMAT CustomSeparated 3,\"a\" 4,\"b\" " & -${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO async_inserts FORMAT CustomSeparatedWithNames settings format_custom_escaping_rule='CSV', format_custom_field_delimiter=',' +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO async_inserts settings format_custom_escaping_rule='CSV', format_custom_field_delimiter=',' FORMAT CustomSeparatedWithNames \"id\",\"s\" 5,\"a\" 6,\"b\" " & -${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO async_inserts FORMAT CustomSeparatedWithNames settings format_custom_escaping_rule='CSV', format_custom_field_delimiter=',' +${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO async_inserts settings format_custom_escaping_rule='CSV', format_custom_field_delimiter=',' FORMAT CustomSeparatedWithNames \"id\",\"s\" 7,\"a\" 8,\"b\" diff --git a/tests/queries/0_stateless/02147_arrow_duplicate_columns.sh b/tests/queries/0_stateless/02147_arrow_duplicate_columns.sh index 938b45fee98..548b2ca868b 100755 --- a/tests/queries/0_stateless/02147_arrow_duplicate_columns.sh +++ b/tests/queries/0_stateless/02147_arrow_duplicate_columns.sh @@ -26,6 +26,6 @@ GZDATA="H4sIAHTzuWEAA9VTuw3CMBB9+RCsyIULhFIwAC0SJQWZACkNi1CAxCCMwCCMQMEIKdkgPJ8P ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS t1" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE t1 ( x Int64, y Int64, z Int64 ) ENGINE = Memory" -echo ${GZDATA} | base64 --decode | gunzip | ${CLICKHOUSE_CLIENT} -q "INSERT INTO t1 FORMAT Arrow settings input_format_arrow_allow_missing_columns = true" 2>&1 | grep -qF "DUPLICATE_COLUMN" && echo 'OK' || echo 'FAIL' ||: +echo ${GZDATA} | base64 --decode | gunzip | ${CLICKHOUSE_CLIENT} -q "INSERT INTO t1 settings input_format_arrow_allow_missing_columns = true FORMAT Arrow" 2>&1 | grep -qF "DUPLICATE_COLUMN" && echo 'OK' || echo 'FAIL' ||: ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t1" diff --git a/tests/queries/0_stateless/02149_schema_inference.reference b/tests/queries/0_stateless/02149_schema_inference.reference index e4a0c3c3602..2d7dd5caca7 100644 --- a/tests/queries/0_stateless/02149_schema_inference.reference +++ b/tests/queries/0_stateless/02149_schema_inference.reference @@ -1,17 +1,17 @@ TSV -c1 Nullable(String) +c1 Nullable(Float64) c2 Nullable(String) -c3 Nullable(String) -c4 Nullable(String) -42 Some string [1, 2, 3, 4] (1, 2, 3) -42 abcd [] (4, 5, 6) +c3 Array(Nullable(Float64)) +c4 Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64)) +42 Some string [1,2,3,4] (1,2,3) +42 abcd [] (4,5,6) TSVWithNames -number Nullable(String) +number Nullable(Float64) string Nullable(String) -array Nullable(String) -tuple Nullable(String) -42 Some string [1, 2, 3, 4] (1, 2, 3) -42 abcd [] (4, 5, 6) +array Array(Nullable(Float64)) +tuple Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64)) +42 Some string [1,2,3,4] (1,2,3) +42 abcd [] (4,5,6) CSV c1 Nullable(Float64) c2 Nullable(String) @@ -73,13 +73,13 @@ c Array(Nullable(Float64)) \N \N [] \N \N [3] TSKV -a Nullable(String) +a Nullable(Float64) b Nullable(String) -c Nullable(String) -1 s1 \N +c Array(Nullable(Float64)) +1 s1 [] 2 } [2] -\N \N \N -\N \N \N +\N \N [] +\N \N [] \N \N [3] Values c1 Nullable(Float64) @@ -96,7 +96,7 @@ c5 Tuple(Array(Nullable(Float64)), Array(Tuple(Nullable(Float64), Nullable(Strin 42.42 \N [1,NULL,3] (1,NULL) ([1,2],[(3,'4'),(5,'6')]) \N Some string [10] (1,2) ([],[]) Regexp -c1 Nullable(String) +c1 Nullable(Float64) c2 Nullable(String) c3 Nullable(String) 42 Some string 1 [([1, 2, 3], String 1), ([], String 1)] diff --git a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference index d3d2d86d696..b0ec4bef499 100644 --- a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference +++ b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference @@ -1,137 +1,137 @@ Arrow -int8 Int8 -uint8 UInt8 -int16 Int16 -uint16 UInt16 -int32 Int32 -uint32 UInt32 -int64 Int64 -uint64 UInt64 +int8 Nullable(Int8) +uint8 Nullable(UInt8) +int16 Nullable(Int16) +uint16 Nullable(UInt16) +int32 Nullable(Int32) +uint32 Nullable(UInt32) +int64 Nullable(Int64) +uint64 Nullable(UInt64) 0 0 0 0 0 0 0 0 -1 1 -1 1 -1 1 -1 1 -float32 Float32 -float64 Float64 -decimal32 Decimal(9, 5) -decimal64 Decimal(18, 5) +float32 Nullable(Float32) +float64 Nullable(Float64) +decimal32 Nullable(Decimal(9, 5)) +decimal64 Nullable(Decimal(18, 5)) 0 0 0 0 1.2 0.7692307692307692 3.33333 333.33333 -date UInt16 -date32 Date32 +date Nullable(UInt16) +date32 Nullable(Date32) 0 1970-01-01 1 1970-01-02 -str String -fixed_string String +str Nullable(String) +fixed_string Nullable(String) Str: 0 100 Str: 1 200 -array Array(UInt64) -tuple Tuple(`tuple.0` UInt64, `tuple.1` String) -map Map(String, UInt64) +array Array(Nullable(UInt64)) +tuple Tuple(Nullable(UInt64), Nullable(String)) +map Map(String, Nullable(UInt64)) [0,1] (0,'0') {'0':0} [1,2] (1,'1') {'1':1} -nested1 Array(Tuple(`nested1.0` Array(UInt64), `nested1.1` Map(String, UInt64))) -nested2 Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(UInt64)), `nested2.0.1` Map(UInt64, Array(Tuple(`nested2.0.1.0` UInt64, `nested2.0.1.1` String)))), `nested2.1` UInt8) +nested1 Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64)))) +nested2 Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8)) [([0,1],{'42':0}),([],{}),([42],{'42':42})] (([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42) [([1,2],{'42':1}),([],{}),([42],{'42':42})] (([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42) ArrowStream -int8 Int8 -uint8 UInt8 -int16 Int16 -uint16 UInt16 -int32 Int32 -uint32 UInt32 -int64 Int64 -uint64 UInt64 +int8 Nullable(Int8) +uint8 Nullable(UInt8) +int16 Nullable(Int16) +uint16 Nullable(UInt16) +int32 Nullable(Int32) +uint32 Nullable(UInt32) +int64 Nullable(Int64) +uint64 Nullable(UInt64) 0 0 0 0 0 0 0 0 -1 1 -1 1 -1 1 -1 1 -float32 Float32 -float64 Float64 -decimal32 Decimal(9, 5) -decimal64 Decimal(18, 5) +float32 Nullable(Float32) +float64 Nullable(Float64) +decimal32 Nullable(Decimal(9, 5)) +decimal64 Nullable(Decimal(18, 5)) 0 0 0 0 1.2 0.7692307692307692 3.33333 333.33333 -date UInt16 -date32 Date32 +date Nullable(UInt16) +date32 Nullable(Date32) 0 1970-01-01 1 1970-01-02 -str String -fixed_string String +str Nullable(String) +fixed_string Nullable(String) Str: 0 100 Str: 1 200 -array Array(UInt64) -tuple Tuple(`tuple.0` UInt64, `tuple.1` String) -map Map(String, UInt64) +array Array(Nullable(UInt64)) +tuple Tuple(Nullable(UInt64), Nullable(String)) +map Map(String, Nullable(UInt64)) [0,1] (0,'0') {'0':0} [1,2] (1,'1') {'1':1} -nested1 Array(Tuple(`nested1.0` Array(UInt64), `nested1.1` Map(String, UInt64))) -nested2 Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(UInt64)), `nested2.0.1` Map(UInt64, Array(Tuple(`nested2.0.1.0` UInt64, `nested2.0.1.1` String)))), `nested2.1` UInt8) +nested1 Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64)))) +nested2 Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8)) [([0,1],{'42':0}),([],{}),([42],{'42':42})] (([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42) [([1,2],{'42':1}),([],{}),([42],{'42':42})] (([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42) Parquet -int8 Int8 -uint8 UInt8 -int16 Int16 -uint16 UInt16 -int32 Int32 -uint32 Int64 -int64 Int64 -uint64 UInt64 +int8 Nullable(Int8) +uint8 Nullable(UInt8) +int16 Nullable(Int16) +uint16 Nullable(UInt16) +int32 Nullable(Int32) +uint32 Nullable(Int64) +int64 Nullable(Int64) +uint64 Nullable(UInt64) 0 0 0 0 0 0 0 0 -1 1 -1 1 -1 1 -1 1 -float32 Float32 -float64 Float64 -decimal32 Decimal(9, 5) -decimal64 Decimal(18, 5) +float32 Nullable(Float32) +float64 Nullable(Float64) +decimal32 Nullable(Decimal(9, 5)) +decimal64 Nullable(Decimal(18, 5)) 0 0 0 0 1.2 0.7692307692307692 3.33333 333.33333 -date UInt16 -date32 Date32 +date Nullable(UInt16) +date32 Nullable(Date32) 0 1970-01-01 1 1970-01-02 -str String -fixed_string String +str Nullable(String) +fixed_string Nullable(String) Str: 0 100 Str: 1 200 -array Array(UInt64) -tuple Tuple(`tuple.0` UInt64, `tuple.1` String) -map Map(String, UInt64) +array Array(Nullable(UInt64)) +tuple Tuple(Nullable(UInt64), Nullable(String)) +map Map(String, Nullable(UInt64)) [0,1] (0,'0') {'0':0} [1,2] (1,'1') {'1':1} -nested1 Array(Tuple(`nested1.0` Array(UInt64), `nested1.1` Map(String, UInt64))) -nested2 Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(UInt64)), `nested2.0.1` Map(UInt64, Array(Tuple(`nested2.0.1.0` UInt64, `nested2.0.1.1` String)))), `nested2.1` UInt8) +nested1 Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64)))) +nested2 Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8)) [([0,1],{'42':0}),([],{}),([42],{'42':42})] (([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42) [([1,2],{'42':1}),([],{}),([42],{'42':42})] (([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42) ORC -int8 Int8 -uint8 Int8 -int16 Int16 -uint16 Int16 -int32 Int32 -uint32 Int32 -int64 Int64 -uint64 Int64 +int8 Nullable(Int8) +uint8 Nullable(Int8) +int16 Nullable(Int16) +uint16 Nullable(Int16) +int32 Nullable(Int32) +uint32 Nullable(Int32) +int64 Nullable(Int64) +uint64 Nullable(Int64) 0 0 0 0 0 0 0 0 -1 1 -1 1 -1 1 -1 1 -float32 Float32 -float64 Float64 -decimal32 Decimal(9, 5) -decimal64 Decimal(18, 5) +float32 Nullable(Float32) +float64 Nullable(Float64) +decimal32 Nullable(Decimal(9, 5)) +decimal64 Nullable(Decimal(18, 5)) 0 0 0 0 1.2 0.7692307692307692 3.33333 333.33333 -date Date32 -date32 Date32 +date Nullable(Date32) +date32 Nullable(Date32) 1970-01-01 1970-01-01 1970-01-02 1970-01-02 -str String -fixed_string String +str Nullable(String) +fixed_string Nullable(String) Str: 0 100 Str: 1 200 -array Array(Int64) -tuple Tuple(`tuple.0` Int64, `tuple.1` String) -map Map(String, Int64) +array Array(Nullable(Int64)) +tuple Tuple(Nullable(Int64), Nullable(String)) +map Map(String, Nullable(Int64)) [0,1] (0,'0') {'0':0} [1,2] (1,'1') {'1':1} -nested1 Array(Tuple(`nested1.0` Array(Int64), `nested1.1` Map(String, Int64))) -nested2 Tuple(`nested2.0` Tuple(`nested2.0.0` Array(Array(Int64)), `nested2.0.1` Map(Int64, Array(Tuple(`nested2.0.1.0` Int64, `nested2.0.1.1` String)))), `nested2.1` Int8) +nested1 Array(Tuple(Array(Nullable(Int64)), Map(String, Nullable(Int64)))) +nested2 Tuple(Tuple(Array(Array(Nullable(Int64))), Map(Int64, Array(Tuple(Nullable(Int64), Nullable(String))))), Nullable(Int8)) [([0,1],{'42':0}),([],{}),([42],{'42':42})] (([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42) [([1,2],{'42':1}),([],{}),([42],{'42':42})] (([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42) Native diff --git a/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh b/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh index ab2577e6138..08d380bf559 100755 --- a/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh +++ b/tests/queries/0_stateless/02155_csv_with_strings_with_slash.sh @@ -10,13 +10,13 @@ ${CLICKHOUSE_CLIENT} --query="create table test_02155_csv (A Int64, S String, D echo "input_format_null_as_default = 1" -cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv FORMAT CSV SETTINGS input_format_null_as_default = 1" +cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv SETTINGS input_format_null_as_default = 1 FORMAT CSV" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM test_02155_csv" ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE test_02155_csv" echo "input_format_null_as_default = 0" -cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv FORMAT CSV SETTINGS input_format_null_as_default = 0" +cat $CUR_DIR/data_csv/csv_with_slash.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_02155_csv SETTINGS input_format_null_as_default = 0 FORMAT CSV" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM test_02155_csv" diff --git a/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference b/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference index 46f448cfba7..20f3368e446 100644 --- a/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference +++ b/tests/queries/0_stateless/02166_arrow_dictionary_inference.reference @@ -1 +1 @@ -x LowCardinality(UInt64) +x LowCardinality(Nullable(UInt64)) diff --git a/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh b/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh index e560dc10d2c..7d313b571d9 100755 --- a/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh +++ b/tests/queries/0_stateless/02166_arrow_dictionary_inference.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "insert into table function file('arrow.dict', 'Arrow', 'x LowCardinality(UInt64)') select number from numbers(10) settings output_format_arrow_low_cardinality_as_dictionary=1" +$CLICKHOUSE_CLIENT -q "insert into table function file('arrow.dict', 'Arrow', 'x LowCardinality(UInt64)') select number from numbers(10) settings output_format_arrow_low_cardinality_as_dictionary=1, engine_file_truncate_on_insert=1" $CLICKHOUSE_CLIENT -q "desc file('arrow.dict', 'Arrow')" diff --git a/tests/queries/0_stateless/02180_insert_into_values_settings.sql b/tests/queries/0_stateless/02180_insert_into_values_settings.sql index 0a1468070c1..a499ab15f26 100644 --- a/tests/queries/0_stateless/02180_insert_into_values_settings.sql +++ b/tests/queries/0_stateless/02180_insert_into_values_settings.sql @@ -1,4 +1,4 @@ drop table if exists t; create table t (x Bool) engine=Memory(); -insert into t values settings bool_true_representation='да' ('да'); +insert into t settings bool_true_representation='да' values ('да'); drop table t; diff --git a/tests/queries/0_stateless/02193_async_insert_tcp_client_1.sql b/tests/queries/0_stateless/02193_async_insert_tcp_client_1.sql index 795a27883e6..7e68beb4b6f 100644 --- a/tests/queries/0_stateless/02193_async_insert_tcp_client_1.sql +++ b/tests/queries/0_stateless/02193_async_insert_tcp_client_1.sql @@ -1,12 +1,14 @@ +SET log_queries = 1; + DROP TABLE IF EXISTS t_async_insert_02193_1; CREATE TABLE t_async_insert_02193_1 (id UInt32, s String) ENGINE = Memory; -INSERT INTO t_async_insert_02193_1 FORMAT CSV SETTINGS async_insert = 1 +INSERT INTO t_async_insert_02193_1 SETTINGS async_insert = 1 FORMAT CSV 1,aaa ; -INSERT INTO t_async_insert_02193_1 FORMAT Values SETTINGS async_insert = 1 (2, 'bbb'); +INSERT INTO t_async_insert_02193_1 SETTINGS async_insert = 1 FORMAT Values (2, 'bbb'); SET async_insert = 1; diff --git a/tests/queries/0_stateless/02193_async_insert_tcp_client_2.sh b/tests/queries/0_stateless/02193_async_insert_tcp_client_2.sh index e620b21ac72..8aeb53d3b87 100755 --- a/tests/queries/0_stateless/02193_async_insert_tcp_client_2.sh +++ b/tests/queries/0_stateless/02193_async_insert_tcp_client_2.sh @@ -9,8 +9,8 @@ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_async_insert_02193_2" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_async_insert_02193_2 (id UInt32, s String) ENGINE = Memory" -${CLICKHOUSE_CLIENT} -q "INSERT INTO t_async_insert_02193_2 FORMAT CSV SETTINGS async_insert = 1 1,aaa" -${CLICKHOUSE_CLIENT} -q "INSERT INTO t_async_insert_02193_2 FORMAT Values SETTINGS async_insert = 1 (2, 'bbb')" +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_async_insert_02193_2 SETTINGS async_insert = 1 FORMAT CSV 1,aaa" +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_async_insert_02193_2 SETTINGS async_insert = 1 FORMAT Values (2, 'bbb')" ${CLICKHOUSE_CLIENT} -q "INSERT INTO t_async_insert_02193_2 VALUES (3, 'ccc')" --async_insert=1 ${CLICKHOUSE_CLIENT} -q 'INSERT INTO t_async_insert_02193_2 FORMAT JSONEachRow {"id": 4, "s": "ddd"}' --async_insert=1 diff --git a/tests/queries/0_stateless/02211_shcema_inference_from_stdin.reference b/tests/queries/0_stateless/02211_shcema_inference_from_stdin.reference index d176e0ee1ed..6920aa16198 100644 --- a/tests/queries/0_stateless/02211_shcema_inference_from_stdin.reference +++ b/tests/queries/0_stateless/02211_shcema_inference_from_stdin.reference @@ -9,7 +9,7 @@ x Nullable(Float64) 7 8 9 -c1 Nullable(String) -c2 Nullable(String) -c3 Nullable(String) +c1 Nullable(Float64) +c2 Nullable(Float64) +c3 Nullable(Float64) 1 2 3 diff --git a/tests/queries/0_stateless/02226_s3_with_cache.reference b/tests/queries/0_stateless/02226_s3_with_cache.reference index 214addac2d6..4041f51b3f9 100644 --- a/tests/queries/0_stateless/02226_s3_with_cache.reference +++ b/tests/queries/0_stateless/02226_s3_with_cache.reference @@ -1,2 +1,4 @@ SELECT 1, * FROM test LIMIT 10 FORMAT Null; 1 0 1 SELECT 2, * FROM test LIMIT 10 FORMAT Null; 0 1 0 +0 +SELECT 3, * FROM test LIMIT 10 FORMAT Null; 1 1 0 diff --git a/tests/queries/0_stateless/02226_s3_with_cache.sql b/tests/queries/0_stateless/02226_s3_with_cache.sql index b3126a419df..d470f2ef140 100644 --- a/tests/queries/0_stateless/02226_s3_with_cache.sql +++ b/tests/queries/0_stateless/02226_s3_with_cache.sql @@ -1,7 +1,9 @@ -- Tags: no-parallel, no-fasttest, long SET max_memory_usage='20G'; +SET enable_filesystem_cache_on_write_operations = 0; +DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; INSERT INTO test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; @@ -41,4 +43,27 @@ SET remote_filesystem_read_method='threadpool'; SELECT * FROM test WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null; +SET enable_filesystem_cache_on_write_operations = 1; + +TRUNCATE TABLE test; +SELECT count() FROM test; + +SYSTEM DROP FILESYSTEM CACHE; + +INSERT INTO test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; + +SELECT 3, * FROM test LIMIT 10 FORMAT Null; + +SYSTEM FLUSH LOGS; +SELECT query, + ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read, + ProfileEvents['RemoteFSCacheReadBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['RemoteFSCacheDownloadBytes'] > 0 as remote_fs_read_and_download +FROM system.query_log +WHERE query LIKE 'SELECT 3, * FROM test LIMIT%' +AND type = 'QueryFinish' +AND current_database = currentDatabase() +ORDER BY query_start_time DESC +LIMIT 1; + DROP TABLE test; diff --git a/tests/queries/0_stateless/02240_protobuflist_format_persons.sh b/tests/queries/0_stateless/02240_protobuflist_format_persons.sh index dec14b54eb2..637e01b9e63 100755 --- a/tests/queries/0_stateless/02240_protobuflist_format_persons.sh +++ b/tests/queries/0_stateless/02240_protobuflist_format_persons.sh @@ -72,7 +72,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format echo echo "Roundtrip:" $CLICKHOUSE_CLIENT --query "CREATE TABLE roundtrip_persons_02240 AS persons_02240" -$CLICKHOUSE_CLIENT --query "INSERT INTO roundtrip_persons_02240 FORMAT ProtobufList SETTINGS format_schema='$SCHEMADIR/02240_protobuflist1_format_persons:Person'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO roundtrip_persons_02240 SETTINGS format_schema='$SCHEMADIR/02240_protobuflist1_format_persons:Person' FORMAT ProtobufList" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM roundtrip_persons_02240 ORDER BY name" rm "$BINARY_FILE_PATH" @@ -86,7 +86,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format echo echo "Roundtrip:" $CLICKHOUSE_CLIENT --query "CREATE TABLE alt_persons_02240 AS persons_02240" -$CLICKHOUSE_CLIENT --query "INSERT INTO alt_persons_02240 FORMAT ProtobufList SETTINGS format_schema='$SCHEMADIR/02240_protobuflist2_format_persons:AltPerson'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO alt_persons_02240 SETTINGS format_schema='$SCHEMADIR/02240_protobuflist2_format_persons:AltPerson' FORMAT ProtobufList" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM alt_persons_02240 ORDER BY name" rm "$BINARY_FILE_PATH" @@ -100,7 +100,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format # echo echo "Roundtrip:" $CLICKHOUSE_CLIENT --query "CREATE TABLE str_persons_02240 AS persons_02240" -$CLICKHOUSE_CLIENT --query "INSERT INTO str_persons_02240 FORMAT ProtobufList SETTINGS format_schema='$SCHEMADIR/02240_protobuflist3_format_persons:StrPerson'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO str_persons_02240 SETTINGS format_schema='$SCHEMADIR/02240_protobuflist3_format_persons:StrPerson' FORMAT ProtobufList" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM str_persons_02240 ORDER BY name" rm "$BINARY_FILE_PATH" @@ -114,7 +114,7 @@ $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format echo echo "Roundtrip:" $CLICKHOUSE_CLIENT --query "CREATE TABLE syntax2_persons_02240 AS persons_02240" -$CLICKHOUSE_CLIENT --query "INSERT INTO syntax2_persons_02240 FORMAT ProtobufList SETTINGS format_schema='$SCHEMADIR/02240_protobuflist_format_persons_syntax2:Syntax2Person'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "INSERT INTO syntax2_persons_02240 SETTINGS format_schema='$SCHEMADIR/02240_protobuflist_format_persons_syntax2:Syntax2Person' FORMAT ProtobufList" < "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "SELECT * FROM syntax2_persons_02240 ORDER BY name" rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference new file mode 100644 index 00000000000..8bcb7e1dd42 --- /dev/null +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.reference @@ -0,0 +1,19 @@ +-- { echo } + +SYSTEM DROP FILESYSTEM CACHE; +SET enable_filesystem_cache_on_write_operations=0; +DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +0 0 1 +0 79 80 +0 745 746 +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +0 745 746 +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql new file mode 100644 index 00000000000..aa469779130 --- /dev/null +++ b/tests/queries/0_stateless/02240_system_remote_filesystem_cache.sql @@ -0,0 +1,18 @@ +-- Tags: no-parallel, no-fasttest, no-s3-storage + +-- { echo } + +SYSTEM DROP FILESYSTEM CACHE; +SET enable_filesystem_cache_on_write_operations=0; +DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); + +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference index 69ed3536951..d0ced74f8f6 100644 --- a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference +++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference @@ -1,8 +1,8 @@ -a Nullable(String) +a Nullable(Float64) b Nullable(String) -c Nullable(String) -1 s1 \N +c Array(Nullable(Float64)) +1 s1 [] 2 } [2] -\N \N \N -\N \N \N +\N \N [] +\N \N [] \N \N [3] diff --git a/tests/queries/0_stateless/02241_parquet_bad_column.sh b/tests/queries/0_stateless/02241_parquet_bad_column.sh index 9efd11cbbe1..cfe8c2d0dbe 100755 --- a/tests/queries/0_stateless/02241_parquet_bad_column.sh +++ b/tests/queries/0_stateless/02241_parquet_bad_column.sh @@ -22,7 +22,7 @@ for case_insensitive in "true" "false"; do original_width Nullable(UInt32), original_height Nullable(UInt32)) engine=Memory" - cat $CUR_DIR/data_parquet_bad_column/metadata_0.parquet | $CLICKHOUSE_CLIENT -q "insert into test_02241 format Parquet SETTINGS input_format_parquet_case_insensitive_column_matching=$case_insensitive" + cat $CUR_DIR/data_parquet_bad_column/metadata_0.parquet | $CLICKHOUSE_CLIENT -q "insert into test_02241 SETTINGS input_format_parquet_case_insensitive_column_matching=$case_insensitive format Parquet" $CLICKHOUSE_CLIENT -q "select count() from test_02241" $CLICKHOUSE_CLIENT -q "drop table test_02241" diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference new file mode 100644 index 00000000000..b2269c16264 --- /dev/null +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.reference @@ -0,0 +1,75 @@ +-- { echo } + +SET enable_filesystem_cache_on_write_operations=1; +DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +0 +SELECT count() FROM system.filesystem_cache; +0 +INSERT INTO test SELECT number, toString(number) FROM numbers(100); +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +Row 1: +────── +file_segment_range_begin: 0 +file_segment_range_end: 745 +size: 746 +state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +7 +SELECT count() FROM system.filesystem_cache; +7 +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; +0 +SELECT * FROM test FORMAT Null; +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; +2 +SELECT * FROM test FORMAT Null; +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; +2 +SELECT count() size FROM system.filesystem_cache; +7 +SYSTEM DROP FILESYSTEM CACHE; +INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +Row 1: +────── +file_segment_range_begin: 0 +file_segment_range_end: 1659 +size: 1660 +state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +7 +SELECT count() FROM system.filesystem_cache; +7 +SELECT count() FROM system.filesystem_cache; +7 +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0; +SELECT count() FROM system.filesystem_cache; +7 +INSERT INTO test SELECT number, toString(number) FROM numbers(100); +INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); +SELECT count() FROM system.filesystem_cache; +21 +OPTIMIZE TABLE test FINAL; +SELECT count() FROM system.filesystem_cache; +27 +SET mutations_sync=2; +ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; +SELECT count() FROM system.filesystem_cache; +28 +INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); +SYSTEM FLUSH LOGS; +SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read +FROM system.query_log +WHERE query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' +AND type = 'QueryFinish' +AND current_database = currentDatabase() +ORDER BY query_start_time DESC +LIMIT 1; +SELECT count() FROM test; +5010500 +SELECT count() FROM test WHERE value LIKE '%010%'; +18816 diff --git a/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql new file mode 100644 index 00000000000..c3ab1de3693 --- /dev/null +++ b/tests/queries/0_stateless/02241_remote_filesystem_cache_on_insert.sql @@ -0,0 +1,64 @@ +-- Tags: no-parallel, no-fasttest, no-s3-storage + +-- { echo } + +SET enable_filesystem_cache_on_write_operations=1; + +DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; + +SYSTEM DROP FILESYSTEM CACHE; + +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +SELECT count() FROM system.filesystem_cache; + +INSERT INTO test SELECT number, toString(number) FROM numbers(100); + +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +SELECT count() FROM system.filesystem_cache; + +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; + +SELECT * FROM test FORMAT Null; +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; + +SELECT * FROM test FORMAT Null; +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0; + +SELECT count() size FROM system.filesystem_cache; + +SYSTEM DROP FILESYSTEM CACHE; + +INSERT INTO test SELECT number, toString(number) FROM numbers(100, 200); + +SELECT file_segment_range_begin, file_segment_range_end, size, state FROM (SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path) WHERE endsWith(local_path, 'data.bin') FORMAT Vertical; +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path; +SELECT count() FROM system.filesystem_cache; + +SELECT count() FROM system.filesystem_cache; +INSERT INTO test SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0; +SELECT count() FROM system.filesystem_cache; + +INSERT INTO test SELECT number, toString(number) FROM numbers(100); +INSERT INTO test SELECT number, toString(number) FROM numbers(300, 10000); +SELECT count() FROM system.filesystem_cache; +OPTIMIZE TABLE test FINAL; +SELECT count() FROM system.filesystem_cache; + +SET mutations_sync=2; +ALTER TABLE test UPDATE value = 'kek' WHERE key = 100; +SELECT count() FROM system.filesystem_cache; + +INSERT INTO test SELECT number, toString(number) FROM numbers(5000000); +SYSTEM FLUSH LOGS; +SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read +FROM system.query_log +WHERE query LIKE 'SELECT number, toString(number) FROM numbers(5000000)%' +AND type = 'QueryFinish' +AND current_database = currentDatabase() +ORDER BY query_start_time DESC +LIMIT 1; +SELECT count() FROM test; +SELECT count() FROM test WHERE value LIKE '%010%'; diff --git a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference new file mode 100644 index 00000000000..debc5c58936 --- /dev/null +++ b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference @@ -0,0 +1,40 @@ +Arrow +x Nullable(UInt64) +arr1 Array(Nullable(UInt64)) +arr2 Array(Array(Nullable(String))) +arr3 Array(Tuple(Nullable(String), Nullable(UInt64))) +0 [0,1] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,0)] +\N [NULL,2] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,1)] +2 [2,3] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,2)] +\N [NULL,4] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,3)] +4 [4,5] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,4)] +ArrowStream +x Nullable(UInt64) +arr1 Array(Nullable(UInt64)) +arr2 Array(Array(Nullable(String))) +arr3 Array(Tuple(Nullable(String), Nullable(UInt64))) +0 [0,1] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,0)] +\N [NULL,2] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,1)] +2 [2,3] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,2)] +\N [NULL,4] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,3)] +4 [4,5] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,4)] +Parquet +x Nullable(UInt64) +arr1 Array(Nullable(UInt64)) +arr2 Array(Array(Nullable(String))) +arr3 Array(Tuple(Nullable(String), Nullable(UInt64))) +0 [0,1] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,0)] +\N [NULL,2] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,1)] +2 [2,3] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,2)] +\N [NULL,4] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,3)] +4 [4,5] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,4)] +ORC +x Nullable(Int64) +arr1 Array(Nullable(Int64)) +arr2 Array(Array(Nullable(String))) +arr3 Array(Tuple(Nullable(String), Nullable(Int64))) +0 [0,1] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,0)] +\N [NULL,2] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,1)] +2 [2,3] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,2)] +\N [NULL,4] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,3)] +4 [4,5] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,4)] diff --git a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh new file mode 100755 index 00000000000..1b6999e3f09 --- /dev/null +++ b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +FILE_NAME=test_02242.data +DATA_FILE=$USER_FILES_PATH/$FILE_NAME + +for format in Arrow ArrowStream Parquet ORC +do + echo $format + $CLICKHOUSE_CLIENT -q "select number % 2 ? NULL : number as x, [number % 2 ? NULL : number, number + 1] as arr1, [[NULL, 'String'], [NULL], []] as arr2, [(NULL, NULL), ('String', NULL), (NULL, number)] as arr3 from numbers(5) format $format" > $DATA_FILE + $CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', '$format')" + $CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', '$format')" +done + +rm $DATA_FILE diff --git a/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh b/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh index 8ebf2952ab3..42652615d7d 100755 --- a/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh +++ b/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh @@ -9,7 +9,7 @@ echo "Parquet" DATA_FILE=$CUR_DIR/data_parquet/case_insensitive_column_matching.parquet ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (iD String, scOre Int32) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet SETTINGS input_format_parquet_case_insensitive_column_matching=true" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load SETTINGS input_format_parquet_case_insensitive_column_matching=true FORMAT Parquet" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" ${CLICKHOUSE_CLIENT} --query="drop table parquet_load" @@ -17,7 +17,7 @@ echo "ORC" DATA_FILE=$CUR_DIR/data_orc/case_insensitive_column_matching.orc ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_load" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_load (iD String, sCorE Int32) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_load FORMAT ORC SETTINGS input_format_orc_case_insensitive_column_matching=true" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_load SETTINGS input_format_orc_case_insensitive_column_matching=true FORMAT ORC" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_load" ${CLICKHOUSE_CLIENT} --query="drop table orc_load" @@ -25,6 +25,6 @@ echo "Arrow" DATA_FILE=$CUR_DIR/data_arrow/case_insensitive_column_matching.arrow ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_load" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_load (iD String, sCorE Int32) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO arrow_load FORMAT Arrow SETTINGS input_format_arrow_case_insensitive_column_matching=true" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO arrow_load SETTINGS input_format_arrow_case_insensitive_column_matching=true FORMAT Arrow" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_load" ${CLICKHOUSE_CLIENT} --query="drop table arrow_load" diff --git a/tests/queries/0_stateless/02242_case_insensitive_nested.sh b/tests/queries/0_stateless/02242_case_insensitive_nested.sh index c22f5695dc3..05d7bf4fc8e 100755 --- a/tests/queries/0_stateless/02242_case_insensitive_nested.sh +++ b/tests/queries/0_stateless/02242_case_insensitive_nested.sh @@ -17,7 +17,7 @@ for ((i = 0; i < 3; i++)) do echo ${formats[i]} ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE nested_table" - cat $CUR_DIR/data_orc_arrow_parquet_nested/nested_table.${format_files[i]} | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nested_table FORMAT ${formats[i]} SETTINGS input_format_${format_files[i]}_import_nested = 1, input_format_${format_files[i]}_case_insensitive_column_matching = true" + cat $CUR_DIR/data_orc_arrow_parquet_nested/nested_table.${format_files[i]} | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nested_table SETTINGS input_format_${format_files[i]}_import_nested = 1, input_format_${format_files[i]}_case_insensitive_column_matching = true FORMAT ${formats[i]}" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM nested_table" diff --git a/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.reference b/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.reference new file mode 100644 index 00000000000..f599e28b8ab --- /dev/null +++ b/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.reference @@ -0,0 +1 @@ +10 diff --git a/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.sh b/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.sh new file mode 100755 index 00000000000..cc8db7fb316 --- /dev/null +++ b/tests/queries/0_stateless/02243_arrow_read_null_type_to_nullable_column.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test_02243" +$CLICKHOUSE_CLIENT -q "create table test_02243 (image_path Nullable(String), + caption Nullable(String), + NSFW Nullable(String), + similarity Nullable(Float64), + LICENSE Nullable(String), + url Nullable(String), + key Nullable(UInt64), + shard_id Nullable(UInt64), + status Nullable(String), + error_message Nullable(String), + width Nullable(UInt32), + height Nullable(UInt32), + exif Nullable(String), + original_width Nullable(UInt32), + original_height Nullable(UInt32)) engine=Memory" + +cat $CUR_DIR/data_parquet_bad_column/metadata_0.parquet | $CLICKHOUSE_CLIENT --stacktrace -q "insert into test_02243 format Parquet" + +$CLICKHOUSE_CLIENT -q "select count() from test_02243" +$CLICKHOUSE_CLIENT -q "drop table test_02243" diff --git a/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.reference b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.reference new file mode 100644 index 00000000000..d237caf630f --- /dev/null +++ b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.reference @@ -0,0 +1,8 @@ +x Nullable(String) +y Nullable(Float64) +x Nullable(String) +y Nullable(Float64) +x Nullable(String) +y Nullable(Float64) +x Nullable(String) +y Nullable(Float64) diff --git a/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql new file mode 100644 index 00000000000..af56856f0be --- /dev/null +++ b/tests/queries/0_stateless/02244_column_names_in_shcmea_inference.sql @@ -0,0 +1,14 @@ +-- Tags: no-fasttest, no-parallel + +insert into function file('test_02244', 'TSV', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1; +desc file('test_02244', 'TSV') settings column_names_for_schema_inference='x,y'; + +insert into function file('test_02244', 'CSV', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1; +desc file('test_02244', 'CSV') settings column_names_for_schema_inference='x,y'; + +insert into function file('test_02244', 'JSONCompactEachRow', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1; +desc file('test_02244', 'JSONCompactEachRow') settings column_names_for_schema_inference='x,y'; + +insert into function file('test_02244', 'Values', 'x String, y UInt32') select 'Hello, world!', 42 settings engine_file_truncate_on_insert=1; +desc file('test_02244', 'Values') settings column_names_for_schema_inference='x,y'; + diff --git a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.reference b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.reference new file mode 100644 index 00000000000..4f9cde534f0 --- /dev/null +++ b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.reference @@ -0,0 +1,16 @@ +OK +image_path Nullable(String) +caption Nullable(String) +NSFW Nullable(String) +similarity Nullable(Float64) +LICENSE Nullable(String) +url Nullable(String) +key Nullable(Int64) +shard_id Nullable(Int64) +status Nullable(String) +width Nullable(Int64) +height Nullable(Int64) +exif Nullable(String) +original_width Nullable(Int64) +original_height Nullable(Int64) +10 diff --git a/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh new file mode 100755 index 00000000000..005c089e434 --- /dev/null +++ b/tests/queries/0_stateless/02245_parquet_skip_unknown_type.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +FILE_NAME=test_02245.parquet +DATA_FILE=$USER_FILES_PATH/$FILE_NAME + +cp $CUR_DIR/data_parquet_bad_column/metadata_0.parquet $DATA_FILE + + +$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet)" 2>&1 | grep -qF "CANNOT_EXTRACT_TABLE_STRUCTURE" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT -q "desc file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1" +$CLICKHOUSE_CLIENT -q "select count(*) from file(test_02245.parquet) settings input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference=1" + diff --git a/tests/queries/0_stateless/02245_s3_schema_desc.reference b/tests/queries/0_stateless/02245_s3_schema_desc.reference index a5b0f81a2c7..e039680d933 100644 --- a/tests/queries/0_stateless/02245_s3_schema_desc.reference +++ b/tests/queries/0_stateless/02245_s3_schema_desc.reference @@ -1,21 +1,21 @@ -c1 Nullable(String) -c2 Nullable(String) -c3 Nullable(String) -c1 Nullable(String) -c2 Nullable(String) -c3 Nullable(String) +c1 Nullable(Float64) +c2 Nullable(Float64) +c3 Nullable(Float64) +c1 Nullable(Float64) +c2 Nullable(Float64) +c3 Nullable(Float64) c1 UInt64 c2 UInt64 c3 UInt64 -c1 Nullable(String) -c2 Nullable(String) -c3 Nullable(String) +c1 Nullable(Float64) +c2 Nullable(Float64) +c3 Nullable(Float64) c1 UInt64 c2 UInt64 c3 UInt64 -c1 Nullable(String) -c2 Nullable(String) -c3 Nullable(String) +c1 Nullable(Float64) +c2 Nullable(Float64) +c3 Nullable(Float64) c1 UInt64 c2 UInt64 c3 UInt64 diff --git a/tests/queries/0_stateless/02245_s3_schema_desc.sql b/tests/queries/0_stateless/02245_s3_schema_desc.sql index 4ab870e1379..2cd362ff233 100644 --- a/tests/queries/0_stateless/02245_s3_schema_desc.sql +++ b/tests/queries/0_stateless/02245_s3_schema_desc.sql @@ -10,4 +10,5 @@ desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'test', 'testtest', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64'); desc s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'test', 'testtest', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto'); + SELECT * FROM s3(decodeURLComponent(NULL), [NULL]); --{serverError 170} diff --git a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.reference b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.reference new file mode 100644 index 00000000000..c245f13fdbe --- /dev/null +++ b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.reference @@ -0,0 +1,107 @@ +TSV +c1 Nullable(Float64) +c2 Nullable(String) +c3 Array(Nullable(Float64)) +c4 Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64)) +42 Some string [1,2,3,4] (1,2,3) +42 abcd [] (4,5,6) +c1 Nullable(String) +[({\'key\' : 42.42}, [\'String\', \'String2\'], 42.42), ({}, [], -42), ({\'key2\' : NULL}, [NULL], NULL)] +[] +[({}, [], 0)] +[({}, [NULL], NULL)] +[({}, [\'String3\'], NULL)] +[({\'key3\': NULL}, []), NULL] +c1 Array(Tuple(Map(String, Nullable(Float64)), Array(Nullable(String)), Nullable(Float64))) +[({'key':42.42},['String','String2'],42.42),({},[],-42),({'key2':NULL},[NULL],NULL)] +[] +[({},[],0)] +[({},[NULL],NULL)] +[({},['String3'],NULL)] +[({'key3':NULL},[],NULL)] +c1 Nullable(Bool) +true +false +\N +c1 Array(Nullable(Bool)) +[true,NULL] +[] +[NULL] +[false] +c1 Nullable(String) +[] +c1 Nullable(String) +{} +c1 Nullable(String) +() +c1 Nullable(String) +[1, 2, 3 +c1 Nullable(String) +[(1, 2, 3 4)] +c1 Nullable(String) +[1, 2, 3 + 4] +c1 Nullable(String) +(1, 2, +c1 Nullable(String) +[1, Some trash, 42.2] +c1 Nullable(String) +[1, \'String\', {\'key\' : 2}] +c1 Nullable(String) +{\'key\' : 1, [1] : 10} +c1 Nullable(String) +{}{} +c1 Nullable(String) +[1, 2, 3 +c1 Nullable(String) +[abc, def] +c1 Array(Nullable(String)) +['abc','def'] +c1 Nullable(String) +[\'string] +c1 Nullable(String) +\'string +c1 Nullable(Float64) +42.42 +c1 Nullable(String) +42.42sometrash +c1 Nullable(String) +[42.42sometrash, 42.42] + +CSV +c1 Nullable(String) +c2 Nullable(String) +c3 Array(Nullable(Float64)) +c4 Array(Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64))) +42 Some string [1,2,3,4] [(1,2,3)] +42\\ abcd [] [(4,5,6)] +c1 Nullable(String) +[({\'key\' : 42.42}, [\'String\', \'String2\'], 42.42), ({}, [], -42), ({\'key2\' : NULL}, [NULL], NULL)] +[] +[({}, [], 0)] +[({}, [NULL], NULL)] +[({}, [\'String3\'], NULL)] +[({\'key3\': NULL}, []), NULL] +c1 Array(Tuple(Map(String, Nullable(Float64)), Array(Nullable(String)), Nullable(Float64))) +[({'key':42.42},['String','String2'],42.42),({},[],-42),({'key2':NULL},[NULL],NULL)] +[] +[({},[],0)] +[({},[NULL],NULL)] +[({},['String3'],NULL)] +[({'key3':NULL},[],NULL)] +c1 Nullable(Bool) +true +false +\N +c1 Array(Nullable(Bool)) +[true,NULL] +[] +[NULL] +[false] +c1 Nullable(String) +(1, 2, 3) +c1 Nullable(String) +123.123 +c1 Array(Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64))) +[(1,2,3)] +c1 Array(Tuple(Nullable(Float64), Nullable(Float64), Nullable(Float64))) +[(1,2,3)] diff --git a/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh new file mode 100755 index 00000000000..6589765f739 --- /dev/null +++ b/tests/queries/0_stateless/02246_tsv_csv_best_effort_schema_inference.sh @@ -0,0 +1,220 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +FILE_NAME=test_02149.data +DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME + +touch $DATA_FILE + +echo "TSV" + +echo -e "42\tSome string\t[1, 2, 3, 4]\t(1, 2, 3) +42\tabcd\t[]\t(4, 5, 6)" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)] +[] +[({}, [], 0)] +[({}, [NULL], NULL)] +[({}, ['String3'], NULL)] +[({'key3': NULL}, []), NULL]"> $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV') settings input_format_tsv_use_best_effort_in_schema_inference=false" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV') settings input_format_tsv_use_best_effort_in_schema_inference=false" + + +echo -e "[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)] +[] +[({}, [], 0)] +[({}, [NULL], NULL)] +[({}, ['String3'], NULL)] +[({'key3': NULL}, [], NULL)]"> $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "true +false +\N" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "[true, NULL] +[] +[NULL] +[false]" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "[]" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "{}" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "()" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "[1, 2, 3" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "[(1, 2, 3 4)]" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "[1, 2, 3 + 4]" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "(1, 2," > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "[1, Some trash, 42.2]" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "[1, 'String', {'key' : 2}]" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "{'key' : 1, [1] : 10}" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "{}{}" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "[1, 2, 3" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "[abc, def]" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "['abc', 'def']" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "['string]" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "'string" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "42.42" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "42.42sometrash" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + +echo -e "[42.42sometrash, 42.42]" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'TSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'TSV')" + + +echo +echo "CSV" + +echo -e "42,Some string,'[1, 2, 3, 4]','[(1, 2, 3)]' +42\,abcd,'[]','[(4, 5, 6)]'" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" + +echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]\" +'[]' +'[({}, [], 0)]' +'[({}, [NULL], NULL)]' +\"[({}, ['String3'], NULL)]\" +\"[({'key3': NULL}, []), NULL]\""> $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV') settings input_format_csv_use_best_effort_in_schema_inference=false" + +echo -e "\"[({'key' : 42.42}, ['String', 'String2'], 42.42), ({}, [], -42), ({'key2' : NULL}, [NULL], NULL)]\" +'[]' +'[({}, [], 0)]' +'[({}, [NULL], NULL)]' +\"[({}, ['String3'], NULL)]\" +\"[({'key3': NULL}, [], NULL)]\""> $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" + +echo -e "true +false +\N" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" + +echo -e "'[true, NULL]' +'[]' +'[NULL]' +'[false]'" > $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" + + +echo -e "'(1, 2, 3)'"> $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" + +echo -e "'123.123'"> $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" + +echo -e "'[(1, 2, 3)]'"> $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" + +echo -e "\"[(1, 2, 3)]\""> $DATA_FILE + +$CLICKHOUSE_CLIENT -q "desc file('$FILE_NAME', 'CSV')" +$CLICKHOUSE_CLIENT -q "select * from file('$FILE_NAME', 'CSV')" + + diff --git a/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.reference b/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.reference index 49a285dc11a..300846c17a0 100644 --- a/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.reference +++ b/tests/queries/0_stateless/02247_names_order_in_json_and_tskv.reference @@ -1,15 +1,15 @@ -a Nullable(String) +a Nullable(Float64) b Nullable(String) -c Nullable(String) -1 s1 \N +c Array(Nullable(Float64)) +1 s1 [] 2 } [2] -\N \N \N -\N \N \N +\N \N [] +\N \N [] \N \N [3] -b Nullable(String) -a Nullable(String) -c Nullable(String) -e Nullable(String) +b Nullable(Float64) +a Nullable(Float64) +c Nullable(Float64) +e Nullable(Float64) 1 \N \N \N \N 2 3 \N \N \N \N \N diff --git a/tests/queries/0_stateless/02263_format_insert_settings.reference b/tests/queries/0_stateless/02263_format_insert_settings.reference new file mode 100644 index 00000000000..721e7960875 --- /dev/null +++ b/tests/queries/0_stateless/02263_format_insert_settings.reference @@ -0,0 +1,69 @@ +insert into foo settings max_threads=1 +Syntax error (query): failed at position 40 (end of query): +insert into foo format tsv settings max_threads=1 +Can't format ASTInsertQuery with data, since data will be lost. +[multi] insert into foo format tsv settings max_threads=1 +INSERT INTO foo +SETTINGS max_threads = 1 +FORMAT tsv +[oneline] insert into foo format tsv settings max_threads=1 +INSERT INTO foo SETTINGS max_threads = 1 FORMAT tsv +insert into foo settings max_threads=1 format tsv settings max_threads=1 +You have SETTINGS before and after FORMAT +Cannot parse input: expected '\n' before: 'settings max_threads=1 1' +1 +You have SETTINGS before and after FORMAT +[multi] insert into foo values +INSERT INTO foo FORMAT Values +[oneline] insert into foo values +INSERT INTO foo FORMAT Values +[multi] insert into foo select 1 +INSERT INTO foo SELECT 1 +[oneline] insert into foo select 1 +INSERT INTO foo SELECT 1 +[multi] insert into foo watch bar +INSERT INTO foo WATCH bar +[oneline] insert into foo watch bar +INSERT INTO foo WATCH bar +[multi] insert into foo format tsv +INSERT INTO foo FORMAT tsv +[oneline] insert into foo format tsv +INSERT INTO foo FORMAT tsv +[multi] insert into foo settings max_threads=1 values +INSERT INTO foo +SETTINGS max_threads = 1 +FORMAT Values +[oneline] insert into foo settings max_threads=1 values +INSERT INTO foo SETTINGS max_threads = 1 FORMAT Values +[multi] insert into foo settings max_threads=1 select 1 +INSERT INTO foo +SETTINGS max_threads = 1 +SELECT 1 +[oneline] insert into foo settings max_threads=1 select 1 +INSERT INTO foo SETTINGS max_threads = 1 SELECT 1 +[multi] insert into foo settings max_threads=1 watch bar +INSERT INTO foo +SETTINGS max_threads = 1 +WATCH bar +[oneline] insert into foo settings max_threads=1 watch bar +INSERT INTO foo SETTINGS max_threads = 1 WATCH bar +[multi] insert into foo settings max_threads=1 format tsv +INSERT INTO foo +SETTINGS max_threads = 1 +FORMAT tsv +[oneline] insert into foo settings max_threads=1 format tsv +INSERT INTO foo SETTINGS max_threads = 1 FORMAT tsv +[multi] insert into foo select 1 settings max_threads=1 +INSERT INTO foo +SETTINGS max_threads = 1 +SELECT 1 +SETTINGS max_threads = 1 +[oneline] insert into foo select 1 settings max_threads=1 +INSERT INTO foo SETTINGS max_threads = 1 SELECT 1 SETTINGS max_threads = 1 +[multi] insert into foo settings max_threads=1 select 1 settings max_threads=1 +INSERT INTO foo +SETTINGS max_threads = 1 +SELECT 1 +SETTINGS max_threads = 1 +[oneline] insert into foo settings max_threads=1 select 1 settings max_threads=1 +INSERT INTO foo SETTINGS max_threads = 1 SELECT 1 SETTINGS max_threads = 1 diff --git a/tests/queries/0_stateless/02263_format_insert_settings.sh b/tests/queries/0_stateless/02263_format_insert_settings.sh new file mode 100755 index 00000000000..3d5f780a38c --- /dev/null +++ b/tests/queries/0_stateless/02263_format_insert_settings.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function run_format() +{ + local q="$1" && shift + + echo "$q" + $CLICKHOUSE_FORMAT "$@" <<<"$q" +} +function run_format_both() +{ + local q="$1" && shift + + echo "[multi] $q" + $CLICKHOUSE_FORMAT "$@" <<<"$q" + echo "[oneline] $q" + $CLICKHOUSE_FORMAT --oneline "$@" <<<"$q" +} + +# NOTE: that those queries may work slow, due to stack trace obtaining +run_format 'insert into foo settings max_threads=1' 2> >(grep -m1 -o "Syntax error (query): failed at position .* (end of query):") +# compatibility +run_format 'insert into foo format tsv settings max_threads=1' 2> >(grep -m1 -F -o "Can't format ASTInsertQuery with data, since data will be lost.") +run_format_both 'insert into foo format tsv settings max_threads=1' --allow_settings_after_format_in_insert +run_format 'insert into foo settings max_threads=1 format tsv settings max_threads=1' --allow_settings_after_format_in_insert 2> >(grep -m1 -F -o "You have SETTINGS before and after FORMAT") +# and via server (since this is a separate code path) +$CLICKHOUSE_CLIENT -q 'drop table if exists data_02263' +$CLICKHOUSE_CLIENT -q 'create table data_02263 (key Int) engine=Memory()' +$CLICKHOUSE_CLIENT -q 'insert into data_02263 format TSV settings max_threads=1 1' 2> >(grep -m1 -F -o "Cannot parse input: expected '\n' before: 'settings max_threads=1 1'") +$CLICKHOUSE_CLIENT --allow_settings_after_format_in_insert=1 -q 'insert into data_02263 format TSV settings max_threads=1 1' +$CLICKHOUSE_CLIENT -q 'select * from data_02263' +$CLICKHOUSE_CLIENT --allow_settings_after_format_in_insert=1 -q 'insert into data_02263 settings max_threads=1 format tsv settings max_threads=1' 2> >(grep -m1 -F -o "You have SETTINGS before and after FORMAT") +$CLICKHOUSE_CLIENT -q 'drop table data_02263' + +run_format_both 'insert into foo values' +run_format_both 'insert into foo select 1' +run_format_both 'insert into foo watch bar' +run_format_both 'insert into foo format tsv' + +run_format_both 'insert into foo settings max_threads=1 values' +run_format_both 'insert into foo settings max_threads=1 select 1' +run_format_both 'insert into foo settings max_threads=1 watch bar' +run_format_both 'insert into foo settings max_threads=1 format tsv' +run_format_both 'insert into foo select 1 settings max_threads=1' +run_format_both 'insert into foo settings max_threads=1 select 1 settings max_threads=1' diff --git a/tests/queries/0_stateless/02265_cross_join_empty_list.reference b/tests/queries/0_stateless/02265_cross_join_empty_list.reference new file mode 100644 index 00000000000..fef5e889a1e --- /dev/null +++ b/tests/queries/0_stateless/02265_cross_join_empty_list.reference @@ -0,0 +1,52 @@ +24 +24 +24 +24 24 24 +0 0 0 +0 0 1 +0 0 2 +0 0 3 +0 1 0 +0 1 1 +0 1 2 +0 1 3 +0 2 0 +0 2 1 +0 2 2 +0 2 3 +1 0 0 +1 0 1 +1 0 2 +1 0 3 +1 1 0 +1 1 1 +1 1 2 +1 1 3 +1 2 0 +1 2 1 +1 2 2 +1 2 3 +0 0 0 +0 0 1 +0 0 2 +0 0 3 +0 1 0 +0 1 1 +0 1 2 +0 1 3 +0 2 0 +0 2 1 +0 2 2 +0 2 3 +1 0 0 +1 0 1 +1 0 2 +1 0 3 +1 1 0 +1 1 1 +1 1 2 +1 1 3 +1 2 0 +1 2 1 +1 2 2 +1 2 3 diff --git a/tests/queries/0_stateless/02265_cross_join_empty_list.sql b/tests/queries/0_stateless/02265_cross_join_empty_list.sql new file mode 100644 index 00000000000..346a047351d --- /dev/null +++ b/tests/queries/0_stateless/02265_cross_join_empty_list.sql @@ -0,0 +1,6 @@ +SELECT count(1) FROM numbers(2) AS n1, numbers(3) AS n2, numbers(4) AS n3; +SELECT count(*) FROM numbers(2) AS n1, numbers(3) AS n2, numbers(4) AS n3; +SELECT count() FROM numbers(2) AS n1, numbers(3) AS n2, numbers(4) AS n3; +SELECT count(n1.number), count(n2.number), count(n3.number) FROM numbers(2) AS n1, numbers(3) AS n2, numbers(4) AS n3; +SELECT * FROM numbers(2) AS n1, numbers(3) AS n2, numbers(4) AS n3 ORDER BY n1.number, n2.number, n3.number; +SELECT n1.number, n2.number, n3.number FROM numbers(2) AS n1, numbers(3) AS n2, numbers(4) AS n3 ORDER BY n1.number, n2.number, n3.number; diff --git a/tests/queries/0_stateless/transactions.lib b/tests/queries/0_stateless/transactions.lib new file mode 100755 index 00000000000..521c56754bc --- /dev/null +++ b/tests/queries/0_stateless/transactions.lib @@ -0,0 +1,78 @@ +#!/usr/bin/env bash + +# shellcheck disable=SC2015 + +# Useful to run queries in parallel sessions +function tx() +{ + tx_num=$1 + query=$2 + + session="${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}_tx$tx_num" + query_id="${session}_${RANDOM}" + url_without_session="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/?" + url="${url_without_session}session_id=$session&query_id=$query_id&database=$CLICKHOUSE_DATABASE" + + ${CLICKHOUSE_CURL} -m 60 -sSk "$url" --data "$query" | sed "s/^/tx$tx_num\t/" +} + +# Waits for the last query in session to finish +function tx_wait() { + tx_num=$1 + + session="${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}_tx$tx_num" + + # try get pid of previous query + query_pid="" + tmp_file_name="${CLICKHOUSE_TMP}/tmp_tx_${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}" + query_id_and_pid=$(grep -F "$session" "$tmp_file_name" 2>/dev/null | tail -1) ||: + read -r query_id query_pid <<< "$query_id_and_pid" ||: + + # wait for previous query in transaction + if [ -n "$query_pid" ]; then + timeout 5 tail --pid=$query_pid -f /dev/null && return ||: + fi + + # there is no pid (or maybe we got wrong one), so wait using system.processes (it's less reliable) + count=0 + while [[ $($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '$session%'") -gt 0 ]]; do + sleep 0.5 + count=$((count+1)) + if [ "$count" -gt 120 ]; then + echo "timeout while waiting for $tx_num" + break + fi + done; +} + +# Wait for previous query in session to finish, starts new one asynchronously +function tx_async() +{ + tx_num=$1 + query=$2 + + tx_wait "$tx_num" + + session="${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}_tx$tx_num" + query_id="${session}_${RANDOM}" + url_without_session="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/?" + url="${url_without_session}session_id=$session&query_id=$query_id&database=$CLICKHOUSE_DATABASE" + + # We cannot be sure that query will actually start execution and appear in system.processes before the next call to tx_wait + # Also we cannot use global map in bash to store last query_id for each tx_num, so we use tmp file... + tmp_file_name="${CLICKHOUSE_TMP}/tmp_tx_${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}" + + # run query asynchronously + ${CLICKHOUSE_CURL} -m 60 -sSk "$url" --data "$query" | sed "s/^/tx$tx_num\t/" & + query_pid=$! + echo -e "$query_id\t$query_pid" >> "$tmp_file_name" +} + +# Wait for previous query in session to finish, execute the next one synchronously +function tx_sync() +{ + tx_num=$1 + query=$2 + tx_wait "$tx_num" + tx "$tx_num" "$query" +} diff --git a/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh index 33562918f67..433d51a3036 100755 --- a/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh +++ b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh @@ -15,7 +15,7 @@ do echo "$format, false"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \ - $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format SETTINGS input_format_null_as_default=0" + $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names SETTINGS input_format_null_as_default=0 FORMAT $format" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" @@ -25,7 +25,7 @@ do echo "$format, true"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \ - $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format SETTINGS input_format_null_as_default=0" + $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names SETTINGS input_format_null_as_default=0 FORMAT $format" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" diff --git a/utils/check-style/codespell-ignore-words.list b/utils/check-style/codespell-ignore-words.list index d3a7586647c..7aabaff17c5 100644 --- a/utils/check-style/codespell-ignore-words.list +++ b/utils/check-style/codespell-ignore-words.list @@ -10,3 +10,4 @@ ths offsett numer ue +alse